xref: /illumos-gate/usr/src/uts/common/os/exit.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
25  * Copyright 2020 Oxide Computer Company
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/errno.h>
37 #include <sys/proc.h>
38 #include <sys/ucontext.h>
39 #include <sys/procfs.h>
40 #include <sys/vnode.h>
41 #include <sys/acct.h>
42 #include <sys/var.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/wait.h>
46 #include <sys/siginfo.h>
47 #include <sys/procset.h>
48 #include <sys/class.h>
49 #include <sys/file.h>
50 #include <sys/session.h>
51 #include <sys/kmem.h>
52 #include <sys/vtrace.h>
53 #include <sys/prsystm.h>
54 #include <sys/ipc.h>
55 #include <sys/sem_impl.h>
56 #include <c2/audit.h>
57 #include <sys/aio_impl.h>
58 #include <vm/as.h>
59 #include <sys/poll.h>
60 #include <sys/door.h>
61 #include <sys/lwpchan_impl.h>
62 #include <sys/utrap.h>
63 #include <sys/task.h>
64 #include <sys/exacct.h>
65 #include <sys/cyclic.h>
66 #include <sys/schedctl.h>
67 #include <sys/rctl.h>
68 #include <sys/contract_impl.h>
69 #include <sys/contract/process_impl.h>
70 #include <sys/list.h>
71 #include <sys/dtrace.h>
72 #include <sys/pool.h>
73 #include <sys/sdt.h>
74 #include <sys/corectl.h>
75 #include <sys/core.h>
76 #include <sys/brand.h>
77 #include <sys/libc_kernel.h>
78 
79 /*
80  * convert code/data pair into old style wait status
81  */
82 int
83 wstat(int code, int data)
84 {
85 	int stat = (data & 0377);
86 
87 	switch (code) {
88 	case CLD_EXITED:
89 		stat <<= 8;
90 		break;
91 	case CLD_DUMPED:
92 		stat |= WCOREFLG;
93 		break;
94 	case CLD_KILLED:
95 		break;
96 	case CLD_TRAPPED:
97 	case CLD_STOPPED:
98 		stat <<= 8;
99 		stat |= WSTOPFLG;
100 		break;
101 	case CLD_CONTINUED:
102 		stat = WCONTFLG;
103 		break;
104 	default:
105 		cmn_err(CE_PANIC, "wstat: bad code");
106 		/* NOTREACHED */
107 	}
108 	return (stat);
109 }
110 
111 static char *
112 exit_reason(char *buf, size_t bufsz, int what, int why)
113 {
114 	switch (why) {
115 	case CLD_EXITED:
116 		(void) snprintf(buf, bufsz, "exited with status %d", what);
117 		break;
118 	case CLD_KILLED:
119 		(void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
120 		break;
121 	case CLD_DUMPED:
122 		(void) snprintf(buf, bufsz, "core dumped on signal %d", what);
123 		break;
124 	default:
125 		(void) snprintf(buf, bufsz, "encountered unknown error "
126 		    "(%d, %d)", why, what);
127 		break;
128 	}
129 
130 	return (buf);
131 }
132 
133 /*
134  * exit system call: pass back caller's arg.
135  */
136 void
137 rexit(int rval)
138 {
139 	exit(CLD_EXITED, rval);
140 }
141 
142 /*
143  * Called by proc_exit() when a zone's init exits, presumably because
144  * it failed.  As long as the given zone is still in the "running"
145  * state, we will re-exec() init, but first we need to reset things
146  * which are usually inherited across exec() but will break init's
147  * assumption that it is being exec()'d from a virgin process.  Most
148  * importantly this includes closing all file descriptors (exec only
149  * closes those marked close-on-exec) and resetting signals (exec only
150  * resets handled signals, and we need to clear any signals which
151  * killed init).  Anything else that exec(2) says would be inherited,
152  * but would affect the execution of init, needs to be reset.
153  */
154 static int
155 restart_init(int what, int why)
156 {
157 	kthread_t *t = curthread;
158 	klwp_t *lwp = ttolwp(t);
159 	proc_t *p = ttoproc(t);
160 	user_t *up = PTOU(p);
161 
162 	vnode_t *oldcd, *oldrd;
163 	int i, err;
164 	char reason_buf[64];
165 
166 	/*
167 	 * Let zone admin (and global zone admin if this is for a non-global
168 	 * zone) know that init has failed and will be restarted.
169 	 */
170 	zcmn_err(p->p_zone->zone_id, CE_WARN,
171 	    "init(1M) %s: restarting automatically",
172 	    exit_reason(reason_buf, sizeof (reason_buf), what, why));
173 
174 	if (!INGLOBALZONE(p)) {
175 		cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
176 		    "restarting automatically",
177 		    p->p_zone->zone_name, p->p_pid, reason_buf);
178 	}
179 
180 	/*
181 	 * Remove any fpollinfo_t's for this (last) thread from our file
182 	 * descriptors so closeall() can ASSERT() that they're all gone.
183 	 * Then close all open file descriptors in the process.
184 	 */
185 	pollcleanup();
186 	closeall(P_FINFO(p));
187 
188 	/*
189 	 * Grab p_lock and begin clearing miscellaneous global process
190 	 * state that needs to be reset before we exec the new init(1M).
191 	 */
192 
193 	mutex_enter(&p->p_lock);
194 	prbarrier(p);
195 
196 	p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
197 	up->u_cmask = CMASK;
198 
199 	sigemptyset(&t->t_hold);
200 	sigemptyset(&t->t_sig);
201 	sigemptyset(&t->t_extsig);
202 
203 	sigemptyset(&p->p_sig);
204 	sigemptyset(&p->p_extsig);
205 
206 	sigdelq(p, t, 0);
207 	sigdelq(p, NULL, 0);
208 
209 	if (p->p_killsqp) {
210 		siginfofree(p->p_killsqp);
211 		p->p_killsqp = NULL;
212 	}
213 
214 	/*
215 	 * Reset any signals that are ignored back to the default disposition.
216 	 * Other u_signal members will be cleared when exec calls sigdefault().
217 	 */
218 	for (i = 1; i < NSIG; i++) {
219 		if (up->u_signal[i - 1] == SIG_IGN) {
220 			up->u_signal[i - 1] = SIG_DFL;
221 			sigemptyset(&up->u_sigmask[i - 1]);
222 		}
223 	}
224 
225 	/*
226 	 * Clear the current signal, any signal info associated with it, and
227 	 * any signal information from contracts and/or contract templates.
228 	 */
229 	lwp->lwp_cursig = 0;
230 	lwp->lwp_extsig = 0;
231 	if (lwp->lwp_curinfo != NULL) {
232 		siginfofree(lwp->lwp_curinfo);
233 		lwp->lwp_curinfo = NULL;
234 	}
235 	lwp_ctmpl_clear(lwp);
236 
237 	/*
238 	 * Reset both the process root directory and the current working
239 	 * directory to the root of the zone just as we do during boot.
240 	 */
241 	VN_HOLD(p->p_zone->zone_rootvp);
242 	oldrd = up->u_rdir;
243 	up->u_rdir = p->p_zone->zone_rootvp;
244 
245 	VN_HOLD(p->p_zone->zone_rootvp);
246 	oldcd = up->u_cdir;
247 	up->u_cdir = p->p_zone->zone_rootvp;
248 
249 	if (up->u_cwd != NULL) {
250 		refstr_rele(up->u_cwd);
251 		up->u_cwd = NULL;
252 	}
253 
254 	mutex_exit(&p->p_lock);
255 
256 	if (oldrd != NULL)
257 		VN_RELE(oldrd);
258 	if (oldcd != NULL)
259 		VN_RELE(oldcd);
260 
261 	/* Free the controlling tty.  (freectty() always assumes curproc.) */
262 	ASSERT(p == curproc);
263 	(void) freectty(B_TRUE);
264 
265 	/*
266 	 * Now exec() the new init(1M) on top of the current process.  If we
267 	 * succeed, the caller will treat this like a successful system call.
268 	 * If we fail, we issue messages and the caller will proceed with exit.
269 	 */
270 	err = exec_init(p->p_zone->zone_initname, NULL);
271 
272 	if (err == 0)
273 		return (0);
274 
275 	zcmn_err(p->p_zone->zone_id, CE_WARN,
276 	    "failed to restart init(1M) (err=%d): system reboot required", err);
277 
278 	if (!INGLOBALZONE(p)) {
279 		cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
280 		    "(pid %d, err=%d): zoneadm(1M) boot required",
281 		    p->p_zone->zone_name, p->p_pid, err);
282 	}
283 
284 	return (-1);
285 }
286 
287 /*
288  * Release resources.
289  * Enter zombie state.
290  * Wake up parent and init processes,
291  * and dispose of children.
292  */
293 void
294 exit(int why, int what)
295 {
296 	/*
297 	 * If proc_exit() fails, then some other lwp in the process
298 	 * got there first.  We just have to call lwp_exit() to allow
299 	 * the other lwp to finish exiting the process.  Otherwise we're
300 	 * restarting init, and should return.
301 	 */
302 	if (proc_exit(why, what) != 0) {
303 		mutex_enter(&curproc->p_lock);
304 		ASSERT(curproc->p_flag & SEXITLWPS);
305 		lwp_exit();
306 		/* NOTREACHED */
307 	}
308 }
309 
310 /*
311  * Set the SEXITING flag on the process, after making sure /proc does
312  * not have it locked.  This is done in more places than proc_exit(),
313  * so it is a separate function.
314  */
315 void
316 proc_is_exiting(proc_t *p)
317 {
318 	mutex_enter(&p->p_lock);
319 	prbarrier(p);
320 	p->p_flag |= SEXITING;
321 	mutex_exit(&p->p_lock);
322 }
323 
324 /*
325  * Return value:
326  *   1 - exitlwps() failed, call (or continue) lwp_exit()
327  *   0 - restarting init.  Return through system call path
328  */
329 int
330 proc_exit(int why, int what)
331 {
332 	kthread_t *t = curthread;
333 	klwp_t *lwp = ttolwp(t);
334 	proc_t *p = ttoproc(t);
335 	zone_t *z = p->p_zone;
336 	timeout_id_t tmp_id;
337 	int rv;
338 	proc_t *q;
339 	task_t *tk;
340 	vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
341 	sigqueue_t *sqp;
342 	lwpdir_t *lwpdir;
343 	uint_t lwpdir_sz;
344 	tidhash_t *tidhash;
345 	uint_t tidhash_sz;
346 	ret_tidhash_t *ret_tidhash;
347 	refstr_t *cwd;
348 	hrtime_t hrutime, hrstime;
349 	int evaporate;
350 
351 	/*
352 	 * Stop and discard the process's lwps except for the current one,
353 	 * unless some other lwp beat us to it.  If exitlwps() fails then
354 	 * return and the calling lwp will call (or continue in) lwp_exit().
355 	 */
356 	proc_is_exiting(p);
357 	if (exitlwps(0) != 0)
358 		return (1);
359 
360 	mutex_enter(&p->p_lock);
361 	if (p->p_ttime > 0) {
362 		/*
363 		 * Account any remaining ticks charged to this process
364 		 * on its way out.
365 		 */
366 		(void) task_cpu_time_incr(p->p_task, p->p_ttime);
367 		p->p_ttime = 0;
368 	}
369 	mutex_exit(&p->p_lock);
370 
371 	DTRACE_PROC(lwp__exit);
372 	DTRACE_PROC1(exit, int, why);
373 
374 	/*
375 	 * Will perform any brand specific proc exit processing, since this
376 	 * is always the last lwp, will also perform lwp_exit and free brand
377 	 * data
378 	 */
379 	if (PROC_IS_BRANDED(p)) {
380 		lwp_detach_brand_hdlrs(lwp);
381 		brand_clearbrand(p, B_FALSE);
382 	}
383 
384 	/*
385 	 * Don't let init exit unless zone_start_init() failed its exec, or
386 	 * we are shutting down the zone or the machine.
387 	 *
388 	 * Since we are single threaded, we don't need to lock the
389 	 * following accesses to zone_proc_initpid.
390 	 */
391 	if (p->p_pid == z->zone_proc_initpid) {
392 		if (z->zone_boot_err == 0 &&
393 		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
394 		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
395 			if (z->zone_restart_init == B_TRUE) {
396 				if (restart_init(what, why) == 0)
397 					return (0);
398 			} else {
399 				(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
400 				    CRED());
401 			}
402 		}
403 
404 		/*
405 		 * Since we didn't or couldn't restart init, we clear
406 		 * the zone's init state and proceed with exit
407 		 * processing.
408 		 */
409 		z->zone_proc_initpid = -1;
410 	}
411 
412 	lwp_pcb_exit();
413 
414 	/*
415 	 * Allocate a sigqueue now, before we grab locks.
416 	 * It will be given to sigcld(), below.
417 	 * Special case:  If we will be making the process disappear
418 	 * without a trace because it is either:
419 	 *	* an exiting SSYS process, or
420 	 *	* a posix_spawn() vfork child who requests it,
421 	 * we don't bother to allocate a useless sigqueue.
422 	 */
423 	evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
424 	    why == CLD_EXITED && what == _EVAPORATE);
425 	if (!evaporate)
426 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
427 
428 	/*
429 	 * revoke any doors created by the process.
430 	 */
431 	if (p->p_door_list)
432 		door_exit();
433 
434 	/*
435 	 * Release schedctl data structures.
436 	 */
437 	if (p->p_pagep)
438 		schedctl_proc_cleanup();
439 
440 	/*
441 	 * make sure all pending kaio has completed.
442 	 */
443 	if (p->p_aio)
444 		aio_cleanup_exit();
445 
446 	/*
447 	 * discard the lwpchan cache.
448 	 */
449 	if (p->p_lcp != NULL)
450 		lwpchan_destroy_cache(0);
451 
452 	/*
453 	 * Clean up any DTrace helper actions or probes for the process.
454 	 */
455 	if (p->p_dtrace_helpers != NULL) {
456 		ASSERT(dtrace_helpers_cleanup != NULL);
457 		(*dtrace_helpers_cleanup)(p);
458 	}
459 
460 	/*
461 	 * Clean up any signalfd state for the process.
462 	 */
463 	if (p->p_sigfd != NULL) {
464 		VERIFY(sigfd_exit_helper != NULL);
465 		(*sigfd_exit_helper)();
466 	}
467 
468 	/* untimeout the realtime timers */
469 	if (p->p_itimer != NULL)
470 		timer_exit();
471 
472 	if ((tmp_id = p->p_alarmid) != 0) {
473 		p->p_alarmid = 0;
474 		(void) untimeout(tmp_id);
475 	}
476 
477 	/*
478 	 * If we had generated any upanic(2) state, free that now.
479 	 */
480 	if (p->p_upanic != NULL) {
481 		kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
482 		p->p_upanic = NULL;
483 	}
484 
485 	/*
486 	 * Remove any fpollinfo_t's for this (last) thread from our file
487 	 * descriptors so closeall() can ASSERT() that they're all gone.
488 	 */
489 	pollcleanup();
490 
491 	if (p->p_rprof_cyclic != CYCLIC_NONE) {
492 		mutex_enter(&cpu_lock);
493 		cyclic_remove(p->p_rprof_cyclic);
494 		mutex_exit(&cpu_lock);
495 	}
496 
497 	mutex_enter(&p->p_lock);
498 
499 	/*
500 	 * Clean up any DTrace probes associated with this process.
501 	 */
502 	if (p->p_dtrace_probes) {
503 		ASSERT(dtrace_fasttrap_exit_ptr != NULL);
504 		dtrace_fasttrap_exit_ptr(p);
505 	}
506 
507 	while ((tmp_id = p->p_itimerid) != 0) {
508 		p->p_itimerid = 0;
509 		mutex_exit(&p->p_lock);
510 		(void) untimeout(tmp_id);
511 		mutex_enter(&p->p_lock);
512 	}
513 
514 	lwp_cleanup();
515 
516 	/*
517 	 * We are about to exit; prevent our resource associations from
518 	 * being changed.
519 	 */
520 	pool_barrier_enter();
521 
522 	/*
523 	 * Block the process against /proc now that we have really
524 	 * acquired p->p_lock (to manipulate p_tlist at least).
525 	 */
526 	prbarrier(p);
527 
528 	sigfillset(&p->p_ignore);
529 	sigemptyset(&p->p_siginfo);
530 	sigemptyset(&p->p_sig);
531 	sigemptyset(&p->p_extsig);
532 	sigemptyset(&t->t_sig);
533 	sigemptyset(&t->t_extsig);
534 	sigemptyset(&p->p_sigmask);
535 	sigdelq(p, t, 0);
536 	lwp->lwp_cursig = 0;
537 	lwp->lwp_extsig = 0;
538 	p->p_flag &= ~(SKILLED | SEXTKILLED);
539 	if (lwp->lwp_curinfo) {
540 		siginfofree(lwp->lwp_curinfo);
541 		lwp->lwp_curinfo = NULL;
542 	}
543 
544 	t->t_proc_flag |= TP_LWPEXIT;
545 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
546 	prlwpexit(t);		/* notify /proc */
547 	lwp_hash_out(p, t->t_tid);
548 	prexit(p);
549 
550 	p->p_lwpcnt = 0;
551 	p->p_tlist = NULL;
552 	sigqfree(p);
553 	term_mstate(t);
554 	p->p_mterm = gethrtime();
555 
556 	exec_vp = p->p_exec;
557 	execdir_vp = p->p_execdir;
558 	p->p_exec = NULLVP;
559 	p->p_execdir = NULLVP;
560 	mutex_exit(&p->p_lock);
561 
562 	pr_free_watched_pages(p);
563 
564 	closeall(P_FINFO(p));
565 
566 	/* Free the controlling tty.  (freectty() always assumes curproc.) */
567 	ASSERT(p == curproc);
568 	(void) freectty(B_TRUE);
569 
570 #if defined(__sparc)
571 	if (p->p_utraps != NULL)
572 		utrap_free(p);
573 #endif
574 	if (p->p_semacct)			/* IPC semaphore exit */
575 		semexit(p);
576 	rv = wstat(why, what);
577 
578 	acct(rv & 0xff);
579 	exacct_commit_proc(p, rv);
580 
581 	/*
582 	 * Release any resources associated with C2 auditing
583 	 */
584 	if (AU_AUDITING()) {
585 		/*
586 		 * audit exit system call
587 		 */
588 		audit_exit(why, what);
589 	}
590 
591 	/*
592 	 * Free address space.
593 	 */
594 	relvm();
595 
596 	if (exec_vp) {
597 		/*
598 		 * Close this executable which has been opened when the process
599 		 * was created by getproc().
600 		 */
601 		(void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
602 		VN_RELE(exec_vp);
603 	}
604 	if (execdir_vp)
605 		VN_RELE(execdir_vp);
606 
607 	/*
608 	 * Release held contracts.
609 	 */
610 	contract_exit(p);
611 
612 	/*
613 	 * Depart our encapsulating process contract.
614 	 */
615 	if ((p->p_flag & SSYS) == 0) {
616 		ASSERT(p->p_ct_process);
617 		contract_process_exit(p->p_ct_process, p, rv);
618 	}
619 
620 	/*
621 	 * Remove pool association, and block if requested by pool_do_bind.
622 	 */
623 	mutex_enter(&p->p_lock);
624 	ASSERT(p->p_pool->pool_ref > 0);
625 	atomic_dec_32(&p->p_pool->pool_ref);
626 	p->p_pool = pool_default;
627 	/*
628 	 * Now that our address space has been freed and all other threads
629 	 * in this process have exited, set the PEXITED pool flag.  This
630 	 * tells the pools subsystems to ignore this process if it was
631 	 * requested to rebind this process to a new pool.
632 	 */
633 	p->p_poolflag |= PEXITED;
634 	pool_barrier_exit();
635 	mutex_exit(&p->p_lock);
636 
637 	mutex_enter(&pidlock);
638 
639 	/*
640 	 * Delete this process from the newstate list of its parent. We
641 	 * will put it in the right place in the sigcld in the end.
642 	 */
643 	delete_ns(p->p_parent, p);
644 
645 	/*
646 	 * Reassign the orphans to the next of kin.
647 	 * Don't rearrange init's orphanage.
648 	 */
649 	if ((q = p->p_orphan) != NULL && p != proc_init) {
650 
651 		proc_t *nokp = p->p_nextofkin;
652 
653 		for (;;) {
654 			q->p_nextofkin = nokp;
655 			if (q->p_nextorph == NULL)
656 				break;
657 			q = q->p_nextorph;
658 		}
659 		q->p_nextorph = nokp->p_orphan;
660 		nokp->p_orphan = p->p_orphan;
661 		p->p_orphan = NULL;
662 	}
663 
664 	/*
665 	 * Reassign the children to init.
666 	 * Don't try to assign init's children to init.
667 	 */
668 	if ((q = p->p_child) != NULL && p != proc_init) {
669 		struct proc	*np;
670 		struct proc	*initp = proc_init;
671 		boolean_t	setzonetop = B_FALSE;
672 
673 		if (!INGLOBALZONE(curproc))
674 			setzonetop = B_TRUE;
675 
676 		pgdetach(p);
677 
678 		do {
679 			np = q->p_sibling;
680 			/*
681 			 * Delete it from its current parent new state
682 			 * list and add it to init new state list
683 			 */
684 			delete_ns(q->p_parent, q);
685 
686 			q->p_ppid = 1;
687 			q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
688 			if (setzonetop) {
689 				mutex_enter(&q->p_lock);
690 				q->p_flag |= SZONETOP;
691 				mutex_exit(&q->p_lock);
692 			}
693 			q->p_parent = initp;
694 
695 			/*
696 			 * Since q will be the first child,
697 			 * it will not have a previous sibling.
698 			 */
699 			q->p_psibling = NULL;
700 			if (initp->p_child) {
701 				initp->p_child->p_psibling = q;
702 			}
703 			q->p_sibling = initp->p_child;
704 			initp->p_child = q;
705 			if (q->p_proc_flag & P_PR_PTRACE) {
706 				mutex_enter(&q->p_lock);
707 				sigtoproc(q, NULL, SIGKILL);
708 				mutex_exit(&q->p_lock);
709 			}
710 			/*
711 			 * sigcld() will add the child to parents
712 			 * newstate list.
713 			 */
714 			if (q->p_stat == SZOMB)
715 				sigcld(q, NULL);
716 		} while ((q = np) != NULL);
717 
718 		p->p_child = NULL;
719 		ASSERT(p->p_child_ns == NULL);
720 	}
721 
722 	TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
723 
724 	mutex_enter(&p->p_lock);
725 	CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
726 
727 	/*
728 	 * Have our task accummulate our resource usage data before they
729 	 * become contaminated by p_cacct etc., and before we renounce
730 	 * membership of the task.
731 	 *
732 	 * We do this regardless of whether or not task accounting is active.
733 	 * This is to avoid having nonsense data reported for this task if
734 	 * task accounting is subsequently enabled. The overhead is minimal;
735 	 * by this point, this process has accounted for the usage of all its
736 	 * LWPs. We nonetheless do the work here, and under the protection of
737 	 * pidlock, so that the movement of the process's usage to the task
738 	 * happens at the same time as the removal of the process from the
739 	 * task, from the point of view of exacct_snapshot_task_usage().
740 	 */
741 	exacct_update_task_mstate(p);
742 
743 	hrutime = mstate_aggr_state(p, LMS_USER);
744 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
745 	p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
746 	p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
747 
748 	p->p_acct[LMS_USER]	+= p->p_cacct[LMS_USER];
749 	p->p_acct[LMS_SYSTEM]	+= p->p_cacct[LMS_SYSTEM];
750 	p->p_acct[LMS_TRAP]	+= p->p_cacct[LMS_TRAP];
751 	p->p_acct[LMS_TFAULT]	+= p->p_cacct[LMS_TFAULT];
752 	p->p_acct[LMS_DFAULT]	+= p->p_cacct[LMS_DFAULT];
753 	p->p_acct[LMS_KFAULT]	+= p->p_cacct[LMS_KFAULT];
754 	p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
755 	p->p_acct[LMS_SLEEP]	+= p->p_cacct[LMS_SLEEP];
756 	p->p_acct[LMS_WAIT_CPU]	+= p->p_cacct[LMS_WAIT_CPU];
757 	p->p_acct[LMS_STOPPED]	+= p->p_cacct[LMS_STOPPED];
758 
759 	p->p_ru.minflt	+= p->p_cru.minflt;
760 	p->p_ru.majflt	+= p->p_cru.majflt;
761 	p->p_ru.nswap	+= p->p_cru.nswap;
762 	p->p_ru.inblock	+= p->p_cru.inblock;
763 	p->p_ru.oublock	+= p->p_cru.oublock;
764 	p->p_ru.msgsnd	+= p->p_cru.msgsnd;
765 	p->p_ru.msgrcv	+= p->p_cru.msgrcv;
766 	p->p_ru.nsignals += p->p_cru.nsignals;
767 	p->p_ru.nvcsw	+= p->p_cru.nvcsw;
768 	p->p_ru.nivcsw	+= p->p_cru.nivcsw;
769 	p->p_ru.sysc	+= p->p_cru.sysc;
770 	p->p_ru.ioch	+= p->p_cru.ioch;
771 
772 	p->p_stat = SZOMB;
773 	p->p_proc_flag &= ~P_PR_PTRACE;
774 	p->p_wdata = what;
775 	p->p_wcode = (char)why;
776 
777 	cdir = PTOU(p)->u_cdir;
778 	rdir = PTOU(p)->u_rdir;
779 	cwd = PTOU(p)->u_cwd;
780 
781 	ASSERT(cdir != NULL || p->p_parent == &p0);
782 
783 	/*
784 	 * Release resource controls, as they are no longer enforceable.
785 	 */
786 	rctl_set_free(p->p_rctls);
787 
788 	/*
789 	 * Decrement tk_nlwps counter for our task.max-lwps resource control.
790 	 * An extended accounting record, if that facility is active, is
791 	 * scheduled to be written.  We cannot give up task and project
792 	 * membership at this point because that would allow zombies to escape
793 	 * from the max-processes resource controls.  Zombies stay in their
794 	 * current task and project until the process table slot is released
795 	 * in freeproc().
796 	 */
797 	tk = p->p_task;
798 
799 	mutex_enter(&p->p_zone->zone_nlwps_lock);
800 	tk->tk_nlwps--;
801 	tk->tk_proj->kpj_nlwps--;
802 	p->p_zone->zone_nlwps--;
803 	mutex_exit(&p->p_zone->zone_nlwps_lock);
804 
805 	/*
806 	 * Clear the lwp directory and the lwpid hash table
807 	 * now that /proc can't bother us any more.
808 	 * We free the memory below, after dropping p->p_lock.
809 	 */
810 	lwpdir = p->p_lwpdir;
811 	lwpdir_sz = p->p_lwpdir_sz;
812 	tidhash = p->p_tidhash;
813 	tidhash_sz = p->p_tidhash_sz;
814 	ret_tidhash = p->p_ret_tidhash;
815 	p->p_lwpdir = NULL;
816 	p->p_lwpfree = NULL;
817 	p->p_lwpdir_sz = 0;
818 	p->p_tidhash = NULL;
819 	p->p_tidhash_sz = 0;
820 	p->p_ret_tidhash = NULL;
821 
822 	/*
823 	 * If the process has context ops installed, call the exit routine
824 	 * on behalf of this last remaining thread. Normally exitpctx() is
825 	 * called during thread_exit() or lwp_exit(), but because this is the
826 	 * last thread in the process, we must call it here. By the time
827 	 * thread_exit() is called (below), the association with the relevant
828 	 * process has been lost.
829 	 *
830 	 * We also free the context here.
831 	 */
832 	if (p->p_pctx) {
833 		kpreempt_disable();
834 		exitpctx(p);
835 		kpreempt_enable();
836 
837 		freepctx(p, 0);
838 	}
839 
840 	/*
841 	 * curthread's proc pointer is changed to point to the 'sched'
842 	 * process for the corresponding zone, except in the case when
843 	 * the exiting process is in fact a zsched instance, in which
844 	 * case the proc pointer is set to p0.  We do so, so that the
845 	 * process still points at the right zone when we call the VN_RELE()
846 	 * below.
847 	 *
848 	 * This is because curthread's original proc pointer can be freed as
849 	 * soon as the child sends a SIGCLD to its parent.  We use zsched so
850 	 * that for user processes, even in the final moments of death, the
851 	 * process is still associated with its zone.
852 	 */
853 	if (p != t->t_procp->p_zone->zone_zsched)
854 		t->t_procp = t->t_procp->p_zone->zone_zsched;
855 	else
856 		t->t_procp = &p0;
857 
858 	mutex_exit(&p->p_lock);
859 	if (!evaporate) {
860 		p->p_pidflag &= ~CLDPEND;
861 		sigcld(p, sqp);
862 	} else {
863 		/*
864 		 * Do what sigcld() would do if the disposition
865 		 * of the SIGCHLD signal were set to be ignored.
866 		 */
867 		cv_broadcast(&p->p_srwchan_cv);
868 		freeproc(p);
869 	}
870 	mutex_exit(&pidlock);
871 
872 	/*
873 	 * We don't release u_cdir and u_rdir until SZOMB is set.
874 	 * This protects us against dofusers().
875 	 */
876 	if (cdir)
877 		VN_RELE(cdir);
878 	if (rdir)
879 		VN_RELE(rdir);
880 	if (cwd)
881 		refstr_rele(cwd);
882 
883 	/*
884 	 * task_rele() may ultimately cause the zone to go away (or
885 	 * may cause the last user process in a zone to go away, which
886 	 * signals zsched to go away).  So prior to this call, we must
887 	 * no longer point at zsched.
888 	 */
889 	t->t_procp = &p0;
890 
891 	kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
892 	kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
893 	while (ret_tidhash != NULL) {
894 		ret_tidhash_t *next = ret_tidhash->rth_next;
895 		kmem_free(ret_tidhash->rth_tidhash,
896 		    ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
897 		kmem_free(ret_tidhash, sizeof (*ret_tidhash));
898 		ret_tidhash = next;
899 	}
900 
901 	thread_exit();
902 	/* NOTREACHED */
903 }
904 
905 /*
906  * Format siginfo structure for wait system calls.
907  */
908 void
909 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
910 {
911 	ASSERT(MUTEX_HELD(&pidlock));
912 
913 	bzero(ip, sizeof (k_siginfo_t));
914 	ip->si_signo = SIGCLD;
915 	ip->si_code = pp->p_wcode;
916 	ip->si_pid = pp->p_pid;
917 	ip->si_ctid = PRCTID(pp);
918 	ip->si_zoneid = pp->p_zone->zone_id;
919 	ip->si_status = pp->p_wdata;
920 	ip->si_stime = pp->p_stime;
921 	ip->si_utime = pp->p_utime;
922 
923 	if (waitflag) {
924 		pp->p_wcode = 0;
925 		pp->p_wdata = 0;
926 		pp->p_pidflag &= ~CLDPEND;
927 	}
928 }
929 
930 /*
931  * Wait system call.
932  * Search for a terminated (zombie) child,
933  * finally lay it to rest, and collect its status.
934  * Look also for stopped children,
935  * and pass back status from them.
936  */
937 int
938 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
939 {
940 	int found;
941 	proc_t *cp, *pp;
942 	int proc_gone;
943 	int waitflag = !(options & WNOWAIT);
944 
945 	/*
946 	 * Obsolete flag, defined here only for binary compatibility
947 	 * with old statically linked executables.  Delete this when
948 	 * we no longer care about these old and broken applications.
949 	 */
950 #define	_WNOCHLD	0400
951 	options &= ~_WNOCHLD;
952 
953 	if (options == 0 || (options & ~WOPTMASK))
954 		return (EINVAL);
955 
956 	switch (idtype) {
957 	case P_PID:
958 	case P_PGID:
959 		if (id < 0 || id >= maxpid)
960 			return (EINVAL);
961 		/* FALLTHROUGH */
962 	case P_ALL:
963 		break;
964 	default:
965 		return (EINVAL);
966 	}
967 
968 	pp = ttoproc(curthread);
969 
970 	/*
971 	 * lock parent mutex so that sibling chain can be searched.
972 	 */
973 	mutex_enter(&pidlock);
974 
975 	/*
976 	 * if we are only looking for exited processes and child_ns list
977 	 * is empty no reason to look at all children.
978 	 */
979 	if (idtype == P_ALL &&
980 	    (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
981 	    pp->p_child_ns == NULL) {
982 		if (pp->p_child) {
983 			mutex_exit(&pidlock);
984 			bzero(ip, sizeof (k_siginfo_t));
985 			return (0);
986 		}
987 		mutex_exit(&pidlock);
988 		return (ECHILD);
989 	}
990 
991 	while (pp->p_child != NULL) {
992 
993 		proc_gone = 0;
994 
995 		for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
996 			if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
997 				continue;
998 			if (idtype == P_PID && id != cp->p_pid)
999 				continue;
1000 			if (idtype == P_PGID && id != cp->p_pgrp)
1001 				continue;
1002 
1003 			switch (cp->p_wcode) {
1004 
1005 			case CLD_TRAPPED:
1006 			case CLD_STOPPED:
1007 			case CLD_CONTINUED:
1008 				cmn_err(CE_PANIC,
1009 				    "waitid: wrong state %d on the p_newstate"
1010 				    " list", cp->p_wcode);
1011 				break;
1012 
1013 			case CLD_EXITED:
1014 			case CLD_DUMPED:
1015 			case CLD_KILLED:
1016 				if (!(options & WEXITED)) {
1017 					/*
1018 					 * Count how many are already gone
1019 					 * for good.
1020 					 */
1021 					proc_gone++;
1022 					break;
1023 				}
1024 				if (!waitflag) {
1025 					winfo(cp, ip, 0);
1026 				} else {
1027 					winfo(cp, ip, 1);
1028 					freeproc(cp);
1029 				}
1030 				mutex_exit(&pidlock);
1031 				if (waitflag) {		/* accept SIGCLD */
1032 					sigcld_delete(ip);
1033 					sigcld_repost();
1034 				}
1035 				return (0);
1036 			}
1037 
1038 			if (idtype == P_PID)
1039 				break;
1040 		}
1041 
1042 		/*
1043 		 * Wow! None of the threads on the p_sibling_ns list were
1044 		 * interesting threads. Check all the kids!
1045 		 */
1046 		found = 0;
1047 		for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1048 			if (idtype == P_PID && id != cp->p_pid)
1049 				continue;
1050 			if (idtype == P_PGID && id != cp->p_pgrp)
1051 				continue;
1052 
1053 			switch (cp->p_wcode) {
1054 			case CLD_TRAPPED:
1055 				if (!(options & WTRAPPED))
1056 					break;
1057 				winfo(cp, ip, waitflag);
1058 				mutex_exit(&pidlock);
1059 				if (waitflag) {		/* accept SIGCLD */
1060 					sigcld_delete(ip);
1061 					sigcld_repost();
1062 				}
1063 				return (0);
1064 
1065 			case CLD_STOPPED:
1066 				if (!(options & WSTOPPED))
1067 					break;
1068 				/* Is it still stopped? */
1069 				mutex_enter(&cp->p_lock);
1070 				if (!jobstopped(cp)) {
1071 					mutex_exit(&cp->p_lock);
1072 					break;
1073 				}
1074 				mutex_exit(&cp->p_lock);
1075 				winfo(cp, ip, waitflag);
1076 				mutex_exit(&pidlock);
1077 				if (waitflag) {		/* accept SIGCLD */
1078 					sigcld_delete(ip);
1079 					sigcld_repost();
1080 				}
1081 				return (0);
1082 
1083 			case CLD_CONTINUED:
1084 				if (!(options & WCONTINUED))
1085 					break;
1086 				winfo(cp, ip, waitflag);
1087 				mutex_exit(&pidlock);
1088 				if (waitflag) {		/* accept SIGCLD */
1089 					sigcld_delete(ip);
1090 					sigcld_repost();
1091 				}
1092 				return (0);
1093 
1094 			case CLD_EXITED:
1095 			case CLD_DUMPED:
1096 			case CLD_KILLED:
1097 				if (idtype != P_PID &&
1098 				    (cp->p_pidflag & CLDWAITPID))
1099 					continue;
1100 				/*
1101 				 * Don't complain if a process was found in
1102 				 * the first loop but we broke out of the loop
1103 				 * because of the arguments passed to us.
1104 				 */
1105 				if (proc_gone == 0) {
1106 					cmn_err(CE_PANIC,
1107 					    "waitid: wrong state on the"
1108 					    " p_child list");
1109 				} else {
1110 					break;
1111 				}
1112 			}
1113 
1114 			found++;
1115 
1116 			if (idtype == P_PID)
1117 				break;
1118 		}
1119 
1120 		/*
1121 		 * If we found no interesting processes at all,
1122 		 * break out and return ECHILD.
1123 		 */
1124 		if (found + proc_gone == 0)
1125 			break;
1126 
1127 		if (options & WNOHANG) {
1128 			mutex_exit(&pidlock);
1129 			bzero(ip, sizeof (k_siginfo_t));
1130 			/*
1131 			 * We should set ip->si_signo = SIGCLD,
1132 			 * but there is an SVVS test that expects
1133 			 * ip->si_signo to be zero in this case.
1134 			 */
1135 			return (0);
1136 		}
1137 
1138 		/*
1139 		 * If we found no processes of interest that could
1140 		 * change state while we wait, we don't wait at all.
1141 		 * Get out with ECHILD according to SVID.
1142 		 */
1143 		if (found == proc_gone)
1144 			break;
1145 
1146 		if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1147 			mutex_exit(&pidlock);
1148 			return (EINTR);
1149 		}
1150 	}
1151 	mutex_exit(&pidlock);
1152 	return (ECHILD);
1153 }
1154 
1155 int
1156 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1157 {
1158 	int error;
1159 	k_siginfo_t info;
1160 
1161 	if (error = waitid(idtype, id, &info, options))
1162 		return (set_errno(error));
1163 	if (copyout(&info, infop, sizeof (k_siginfo_t)))
1164 		return (set_errno(EFAULT));
1165 	return (0);
1166 }
1167 
1168 #ifdef _SYSCALL32_IMPL
1169 
1170 int
1171 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1172 {
1173 	int error;
1174 	k_siginfo_t info;
1175 	siginfo32_t info32;
1176 
1177 	if (error = waitid(idtype, id, &info, options))
1178 		return (set_errno(error));
1179 	siginfo_kto32(&info, &info32);
1180 	if (copyout(&info32, infop, sizeof (info32)))
1181 		return (set_errno(EFAULT));
1182 	return (0);
1183 }
1184 
1185 #endif	/* _SYSCALL32_IMPL */
1186 
1187 void
1188 proc_detach(proc_t *p)
1189 {
1190 	proc_t *q;
1191 
1192 	ASSERT(MUTEX_HELD(&pidlock));
1193 
1194 	q = p->p_parent;
1195 	ASSERT(q != NULL);
1196 
1197 	/*
1198 	 * Take it off the newstate list of its parent
1199 	 */
1200 	delete_ns(q, p);
1201 
1202 	if (q->p_child == p) {
1203 		q->p_child = p->p_sibling;
1204 		/*
1205 		 * If the parent has no children, it better not
1206 		 * have any with new states either!
1207 		 */
1208 		ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1209 	}
1210 
1211 	if (p->p_sibling) {
1212 		p->p_sibling->p_psibling = p->p_psibling;
1213 	}
1214 
1215 	if (p->p_psibling) {
1216 		p->p_psibling->p_sibling = p->p_sibling;
1217 	}
1218 }
1219 
1220 /*
1221  * Remove zombie children from the process table.
1222  */
1223 void
1224 freeproc(proc_t *p)
1225 {
1226 	proc_t *q;
1227 	task_t *tk;
1228 
1229 	ASSERT(p->p_stat == SZOMB);
1230 	ASSERT(p->p_tlist == NULL);
1231 	ASSERT(MUTEX_HELD(&pidlock));
1232 
1233 	sigdelq(p, NULL, 0);
1234 	if (p->p_killsqp) {
1235 		siginfofree(p->p_killsqp);
1236 		p->p_killsqp = NULL;
1237 	}
1238 
1239 	prfree(p);	/* inform /proc */
1240 
1241 	/*
1242 	 * Don't free the init processes.
1243 	 * Other dying processes will access it.
1244 	 */
1245 	if (p == proc_init)
1246 		return;
1247 
1248 
1249 	/*
1250 	 * We wait until now to free the cred structure because a
1251 	 * zombie process's credentials may be examined by /proc.
1252 	 * No cred locking needed because there are no threads at this point.
1253 	 */
1254 	upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1255 	crfree(p->p_cred);
1256 	if (p->p_corefile != NULL) {
1257 		corectl_path_rele(p->p_corefile);
1258 		p->p_corefile = NULL;
1259 	}
1260 	if (p->p_content != NULL) {
1261 		corectl_content_rele(p->p_content);
1262 		p->p_content = NULL;
1263 	}
1264 
1265 	if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1266 	    (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1267 		/*
1268 		 * This should still do the right thing since p_utime/stime
1269 		 * get set to the correct value on process exit, so it
1270 		 * should get properly updated
1271 		 */
1272 		p->p_nextofkin->p_cutime += p->p_utime;
1273 		p->p_nextofkin->p_cstime += p->p_stime;
1274 
1275 		p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1276 		p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1277 		p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1278 		p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1279 		p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1280 		p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1281 		p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1282 		    += p->p_acct[LMS_USER_LOCK];
1283 		p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1284 		p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1285 		    += p->p_acct[LMS_WAIT_CPU];
1286 		p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1287 
1288 		p->p_nextofkin->p_cru.minflt	+= p->p_ru.minflt;
1289 		p->p_nextofkin->p_cru.majflt	+= p->p_ru.majflt;
1290 		p->p_nextofkin->p_cru.nswap	+= p->p_ru.nswap;
1291 		p->p_nextofkin->p_cru.inblock	+= p->p_ru.inblock;
1292 		p->p_nextofkin->p_cru.oublock	+= p->p_ru.oublock;
1293 		p->p_nextofkin->p_cru.msgsnd	+= p->p_ru.msgsnd;
1294 		p->p_nextofkin->p_cru.msgrcv	+= p->p_ru.msgrcv;
1295 		p->p_nextofkin->p_cru.nsignals	+= p->p_ru.nsignals;
1296 		p->p_nextofkin->p_cru.nvcsw	+= p->p_ru.nvcsw;
1297 		p->p_nextofkin->p_cru.nivcsw	+= p->p_ru.nivcsw;
1298 		p->p_nextofkin->p_cru.sysc	+= p->p_ru.sysc;
1299 		p->p_nextofkin->p_cru.ioch	+= p->p_ru.ioch;
1300 
1301 	}
1302 
1303 	q = p->p_nextofkin;
1304 	if (q && q->p_orphan == p)
1305 		q->p_orphan = p->p_nextorph;
1306 	else if (q) {
1307 		for (q = q->p_orphan; q; q = q->p_nextorph)
1308 			if (q->p_nextorph == p)
1309 				break;
1310 		ASSERT(q && q->p_nextorph == p);
1311 		q->p_nextorph = p->p_nextorph;
1312 	}
1313 
1314 	/*
1315 	 * The process table slot is being freed, so it is now safe to give up
1316 	 * task and project membership.
1317 	 */
1318 	mutex_enter(&p->p_lock);
1319 	tk = p->p_task;
1320 	task_detach(p);
1321 	mutex_exit(&p->p_lock);
1322 
1323 	proc_detach(p);
1324 	pid_exit(p, tk);	/* frees pid and proc structure */
1325 
1326 	task_rele(tk);
1327 }
1328 
1329 /*
1330  * Delete process "child" from the newstate list of process "parent"
1331  */
1332 void
1333 delete_ns(proc_t *parent, proc_t *child)
1334 {
1335 	proc_t **ns;
1336 
1337 	ASSERT(MUTEX_HELD(&pidlock));
1338 	ASSERT(child->p_parent == parent);
1339 	for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1340 		if (*ns == child) {
1341 
1342 			ASSERT((*ns)->p_parent == parent);
1343 
1344 			*ns = child->p_sibling_ns;
1345 			child->p_sibling_ns = NULL;
1346 			return;
1347 		}
1348 	}
1349 }
1350 
1351 /*
1352  * Add process "child" to the new state list of process "parent"
1353  */
1354 void
1355 add_ns(proc_t *parent, proc_t *child)
1356 {
1357 	ASSERT(child->p_sibling_ns == NULL);
1358 	child->p_sibling_ns = parent->p_child_ns;
1359 	parent->p_child_ns = child;
1360 }
1361