1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/user.h>
35 #include <sys/errno.h>
36 #include <sys/proc.h>
37 #include <sys/ucontext.h>
38 #include <sys/procfs.h>
39 #include <sys/vnode.h>
40 #include <sys/acct.h>
41 #include <sys/var.h>
42 #include <sys/cmn_err.h>
43 #include <sys/debug.h>
44 #include <sys/wait.h>
45 #include <sys/siginfo.h>
46 #include <sys/procset.h>
47 #include <sys/class.h>
48 #include <sys/file.h>
49 #include <sys/session.h>
50 #include <sys/kmem.h>
51 #include <sys/vtrace.h>
52 #include <sys/prsystm.h>
53 #include <sys/ipc.h>
54 #include <sys/sem_impl.h>
55 #include <c2/audit.h>
56 #include <sys/aio_impl.h>
57 #include <vm/as.h>
58 #include <sys/poll.h>
59 #include <sys/door.h>
60 #include <sys/lwpchan_impl.h>
61 #include <sys/utrap.h>
62 #include <sys/task.h>
63 #include <sys/exacct.h>
64 #include <sys/cyclic.h>
65 #include <sys/schedctl.h>
66 #include <sys/rctl.h>
67 #include <sys/contract_impl.h>
68 #include <sys/contract/process_impl.h>
69 #include <sys/list.h>
70 #include <sys/dtrace.h>
71 #include <sys/pool.h>
72 #include <sys/sdt.h>
73 #include <sys/corectl.h>
74 #include <sys/brand.h>
75 #include <sys/libc_kernel.h>
76
77 /*
78 * convert code/data pair into old style wait status
79 */
80 int
wstat(int code,int data)81 wstat(int code, int data)
82 {
83 int stat = (data & 0377);
84
85 switch (code) {
86 case CLD_EXITED:
87 stat <<= 8;
88 break;
89 case CLD_DUMPED:
90 stat |= WCOREFLG;
91 break;
92 case CLD_KILLED:
93 break;
94 case CLD_TRAPPED:
95 case CLD_STOPPED:
96 stat <<= 8;
97 stat |= WSTOPFLG;
98 break;
99 case CLD_CONTINUED:
100 stat = WCONTFLG;
101 break;
102 default:
103 cmn_err(CE_PANIC, "wstat: bad code");
104 /* NOTREACHED */
105 }
106 return (stat);
107 }
108
109 static char *
exit_reason(char * buf,size_t bufsz,int what,int why)110 exit_reason(char *buf, size_t bufsz, int what, int why)
111 {
112 switch (why) {
113 case CLD_EXITED:
114 (void) snprintf(buf, bufsz, "exited with status %d", what);
115 break;
116 case CLD_KILLED:
117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
118 break;
119 case CLD_DUMPED:
120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
121 break;
122 default:
123 (void) snprintf(buf, bufsz, "encountered unknown error "
124 "(%d, %d)", why, what);
125 break;
126 }
127
128 return (buf);
129 }
130
131 /*
132 * exit system call: pass back caller's arg.
133 */
134 void
rexit(int rval)135 rexit(int rval)
136 {
137 exit(CLD_EXITED, rval);
138 }
139
140 /*
141 * Called by proc_exit() when a zone's init exits, presumably because
142 * it failed. As long as the given zone is still in the "running"
143 * state, we will re-exec() init, but first we need to reset things
144 * which are usually inherited across exec() but will break init's
145 * assumption that it is being exec()'d from a virgin process. Most
146 * importantly this includes closing all file descriptors (exec only
147 * closes those marked close-on-exec) and resetting signals (exec only
148 * resets handled signals, and we need to clear any signals which
149 * killed init). Anything else that exec(2) says would be inherited,
150 * but would affect the execution of init, needs to be reset.
151 */
152 static int
restart_init(int what,int why)153 restart_init(int what, int why)
154 {
155 kthread_t *t = curthread;
156 klwp_t *lwp = ttolwp(t);
157 proc_t *p = ttoproc(t);
158 user_t *up = PTOU(p);
159
160 vnode_t *oldcd, *oldrd;
161 int i, err;
162 char reason_buf[64];
163
164 /*
165 * Let zone admin (and global zone admin if this is for a non-global
166 * zone) know that init has failed and will be restarted.
167 */
168 zcmn_err(p->p_zone->zone_id, CE_WARN,
169 "init(1M) %s: restarting automatically",
170 exit_reason(reason_buf, sizeof (reason_buf), what, why));
171
172 if (!INGLOBALZONE(p)) {
173 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
174 "restarting automatically",
175 p->p_zone->zone_name, p->p_pid, reason_buf);
176 }
177
178 /*
179 * Remove any fpollinfo_t's for this (last) thread from our file
180 * descriptors so closeall() can ASSERT() that they're all gone.
181 * Then close all open file descriptors in the process.
182 */
183 pollcleanup();
184 closeall(P_FINFO(p));
185
186 /*
187 * Grab p_lock and begin clearing miscellaneous global process
188 * state that needs to be reset before we exec the new init(1M).
189 */
190
191 mutex_enter(&p->p_lock);
192 prbarrier(p);
193
194 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
195 up->u_cmask = CMASK;
196
197 sigemptyset(&t->t_hold);
198 sigemptyset(&t->t_sig);
199 sigemptyset(&t->t_extsig);
200
201 sigemptyset(&p->p_sig);
202 sigemptyset(&p->p_extsig);
203
204 sigdelq(p, t, 0);
205 sigdelq(p, NULL, 0);
206
207 if (p->p_killsqp) {
208 siginfofree(p->p_killsqp);
209 p->p_killsqp = NULL;
210 }
211
212 /*
213 * Reset any signals that are ignored back to the default disposition.
214 * Other u_signal members will be cleared when exec calls sigdefault().
215 */
216 for (i = 1; i < NSIG; i++) {
217 if (up->u_signal[i - 1] == SIG_IGN) {
218 up->u_signal[i - 1] = SIG_DFL;
219 sigemptyset(&up->u_sigmask[i - 1]);
220 }
221 }
222
223 /*
224 * Clear the current signal, any signal info associated with it, and
225 * any signal information from contracts and/or contract templates.
226 */
227 lwp->lwp_cursig = 0;
228 lwp->lwp_extsig = 0;
229 if (lwp->lwp_curinfo != NULL) {
230 siginfofree(lwp->lwp_curinfo);
231 lwp->lwp_curinfo = NULL;
232 }
233 lwp_ctmpl_clear(lwp);
234
235 /*
236 * Reset both the process root directory and the current working
237 * directory to the root of the zone just as we do during boot.
238 */
239 VN_HOLD(p->p_zone->zone_rootvp);
240 oldrd = up->u_rdir;
241 up->u_rdir = p->p_zone->zone_rootvp;
242
243 VN_HOLD(p->p_zone->zone_rootvp);
244 oldcd = up->u_cdir;
245 up->u_cdir = p->p_zone->zone_rootvp;
246
247 if (up->u_cwd != NULL) {
248 refstr_rele(up->u_cwd);
249 up->u_cwd = NULL;
250 }
251
252 mutex_exit(&p->p_lock);
253
254 if (oldrd != NULL)
255 VN_RELE(oldrd);
256 if (oldcd != NULL)
257 VN_RELE(oldcd);
258
259 /* Free the controlling tty. (freectty() always assumes curproc.) */
260 ASSERT(p == curproc);
261 (void) freectty(B_TRUE);
262
263 /*
264 * Now exec() the new init(1M) on top of the current process. If we
265 * succeed, the caller will treat this like a successful system call.
266 * If we fail, we issue messages and the caller will proceed with exit.
267 */
268 err = exec_init(p->p_zone->zone_initname, NULL);
269
270 if (err == 0)
271 return (0);
272
273 zcmn_err(p->p_zone->zone_id, CE_WARN,
274 "failed to restart init(1M) (err=%d): system reboot required", err);
275
276 if (!INGLOBALZONE(p)) {
277 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
278 "(pid %d, err=%d): zoneadm(1M) boot required",
279 p->p_zone->zone_name, p->p_pid, err);
280 }
281
282 return (-1);
283 }
284
285 /*
286 * Release resources.
287 * Enter zombie state.
288 * Wake up parent and init processes,
289 * and dispose of children.
290 */
291 void
exit(int why,int what)292 exit(int why, int what)
293 {
294 /*
295 * If proc_exit() fails, then some other lwp in the process
296 * got there first. We just have to call lwp_exit() to allow
297 * the other lwp to finish exiting the process. Otherwise we're
298 * restarting init, and should return.
299 */
300 if (proc_exit(why, what) != 0) {
301 mutex_enter(&curproc->p_lock);
302 ASSERT(curproc->p_flag & SEXITLWPS);
303 lwp_exit();
304 /* NOTREACHED */
305 }
306 }
307
308 /*
309 * Set the SEXITING flag on the process, after making sure /proc does
310 * not have it locked. This is done in more places than proc_exit(),
311 * so it is a separate function.
312 */
313 void
proc_is_exiting(proc_t * p)314 proc_is_exiting(proc_t *p)
315 {
316 mutex_enter(&p->p_lock);
317 prbarrier(p);
318 p->p_flag |= SEXITING;
319 mutex_exit(&p->p_lock);
320 }
321
322 /*
323 * Return value:
324 * 1 - exitlwps() failed, call (or continue) lwp_exit()
325 * 0 - restarting init. Return through system call path
326 */
327 int
proc_exit(int why,int what)328 proc_exit(int why, int what)
329 {
330 kthread_t *t = curthread;
331 klwp_t *lwp = ttolwp(t);
332 proc_t *p = ttoproc(t);
333 zone_t *z = p->p_zone;
334 timeout_id_t tmp_id;
335 int rv;
336 proc_t *q;
337 task_t *tk;
338 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
339 sigqueue_t *sqp;
340 lwpdir_t *lwpdir;
341 uint_t lwpdir_sz;
342 tidhash_t *tidhash;
343 uint_t tidhash_sz;
344 ret_tidhash_t *ret_tidhash;
345 refstr_t *cwd;
346 hrtime_t hrutime, hrstime;
347 int evaporate;
348
349 /*
350 * Stop and discard the process's lwps except for the current one,
351 * unless some other lwp beat us to it. If exitlwps() fails then
352 * return and the calling lwp will call (or continue in) lwp_exit().
353 */
354 proc_is_exiting(p);
355 if (exitlwps(0) != 0)
356 return (1);
357
358 mutex_enter(&p->p_lock);
359 if (p->p_ttime > 0) {
360 /*
361 * Account any remaining ticks charged to this process
362 * on its way out.
363 */
364 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
365 p->p_ttime = 0;
366 }
367 mutex_exit(&p->p_lock);
368
369 DTRACE_PROC(lwp__exit);
370 DTRACE_PROC1(exit, int, why);
371
372 /*
373 * Will perform any brand specific proc exit processing, since this
374 * is always the last lwp, will also perform lwp_exit and free brand
375 * data
376 */
377 if (PROC_IS_BRANDED(p)) {
378 lwp_detach_brand_hdlrs(lwp);
379 brand_clearbrand(p, B_FALSE);
380 }
381
382 /*
383 * Don't let init exit unless zone_start_init() failed its exec, or
384 * we are shutting down the zone or the machine.
385 *
386 * Since we are single threaded, we don't need to lock the
387 * following accesses to zone_proc_initpid.
388 */
389 if (p->p_pid == z->zone_proc_initpid) {
390 if (z->zone_boot_err == 0 &&
391 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
392 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
393 if (z->zone_restart_init == B_TRUE) {
394 if (restart_init(what, why) == 0)
395 return (0);
396 } else {
397 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
398 CRED());
399 }
400 }
401
402 /*
403 * Since we didn't or couldn't restart init, we clear
404 * the zone's init state and proceed with exit
405 * processing.
406 */
407 z->zone_proc_initpid = -1;
408 }
409
410 lwp_pcb_exit();
411
412 /*
413 * Allocate a sigqueue now, before we grab locks.
414 * It will be given to sigcld(), below.
415 * Special case: If we will be making the process disappear
416 * without a trace because it is either:
417 * * an exiting SSYS process, or
418 * * a posix_spawn() vfork child who requests it,
419 * we don't bother to allocate a useless sigqueue.
420 */
421 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
422 why == CLD_EXITED && what == _EVAPORATE);
423 if (!evaporate)
424 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
425
426 /*
427 * revoke any doors created by the process.
428 */
429 if (p->p_door_list)
430 door_exit();
431
432 /*
433 * Release schedctl data structures.
434 */
435 if (p->p_pagep)
436 schedctl_proc_cleanup();
437
438 /*
439 * make sure all pending kaio has completed.
440 */
441 if (p->p_aio)
442 aio_cleanup_exit();
443
444 /*
445 * discard the lwpchan cache.
446 */
447 if (p->p_lcp != NULL)
448 lwpchan_destroy_cache(0);
449
450 /*
451 * Clean up any DTrace helper actions or probes for the process.
452 */
453 if (p->p_dtrace_helpers != NULL) {
454 ASSERT(dtrace_helpers_cleanup != NULL);
455 (*dtrace_helpers_cleanup)();
456 }
457
458 /* untimeout the realtime timers */
459 if (p->p_itimer != NULL)
460 timer_exit();
461
462 if ((tmp_id = p->p_alarmid) != 0) {
463 p->p_alarmid = 0;
464 (void) untimeout(tmp_id);
465 }
466
467 /*
468 * Remove any fpollinfo_t's for this (last) thread from our file
469 * descriptors so closeall() can ASSERT() that they're all gone.
470 */
471 pollcleanup();
472
473 if (p->p_rprof_cyclic != CYCLIC_NONE) {
474 mutex_enter(&cpu_lock);
475 cyclic_remove(p->p_rprof_cyclic);
476 mutex_exit(&cpu_lock);
477 }
478
479 mutex_enter(&p->p_lock);
480
481 /*
482 * Clean up any DTrace probes associated with this process.
483 */
484 if (p->p_dtrace_probes) {
485 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
486 dtrace_fasttrap_exit_ptr(p);
487 }
488
489 while ((tmp_id = p->p_itimerid) != 0) {
490 p->p_itimerid = 0;
491 mutex_exit(&p->p_lock);
492 (void) untimeout(tmp_id);
493 mutex_enter(&p->p_lock);
494 }
495
496 lwp_cleanup();
497
498 /*
499 * We are about to exit; prevent our resource associations from
500 * being changed.
501 */
502 pool_barrier_enter();
503
504 /*
505 * Block the process against /proc now that we have really
506 * acquired p->p_lock (to manipulate p_tlist at least).
507 */
508 prbarrier(p);
509
510 sigfillset(&p->p_ignore);
511 sigemptyset(&p->p_siginfo);
512 sigemptyset(&p->p_sig);
513 sigemptyset(&p->p_extsig);
514 sigemptyset(&t->t_sig);
515 sigemptyset(&t->t_extsig);
516 sigemptyset(&p->p_sigmask);
517 sigdelq(p, t, 0);
518 lwp->lwp_cursig = 0;
519 lwp->lwp_extsig = 0;
520 p->p_flag &= ~(SKILLED | SEXTKILLED);
521 if (lwp->lwp_curinfo) {
522 siginfofree(lwp->lwp_curinfo);
523 lwp->lwp_curinfo = NULL;
524 }
525
526 t->t_proc_flag |= TP_LWPEXIT;
527 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
528 prlwpexit(t); /* notify /proc */
529 lwp_hash_out(p, t->t_tid);
530 prexit(p);
531
532 p->p_lwpcnt = 0;
533 p->p_tlist = NULL;
534 sigqfree(p);
535 term_mstate(t);
536 p->p_mterm = gethrtime();
537
538 exec_vp = p->p_exec;
539 execdir_vp = p->p_execdir;
540 p->p_exec = NULLVP;
541 p->p_execdir = NULLVP;
542 mutex_exit(&p->p_lock);
543
544 pr_free_watched_pages(p);
545
546 closeall(P_FINFO(p));
547
548 /* Free the controlling tty. (freectty() always assumes curproc.) */
549 ASSERT(p == curproc);
550 (void) freectty(B_TRUE);
551
552 #if defined(__sparc)
553 if (p->p_utraps != NULL)
554 utrap_free(p);
555 #endif
556 if (p->p_semacct) /* IPC semaphore exit */
557 semexit(p);
558 rv = wstat(why, what);
559
560 acct(rv & 0xff);
561 exacct_commit_proc(p, rv);
562
563 /*
564 * Release any resources associated with C2 auditing
565 */
566 if (AU_AUDITING()) {
567 /*
568 * audit exit system call
569 */
570 audit_exit(why, what);
571 }
572
573 /*
574 * Free address space.
575 */
576 relvm();
577
578 if (exec_vp) {
579 /*
580 * Close this executable which has been opened when the process
581 * was created by getproc().
582 */
583 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
584 VN_RELE(exec_vp);
585 }
586 if (execdir_vp)
587 VN_RELE(execdir_vp);
588
589 /*
590 * Release held contracts.
591 */
592 contract_exit(p);
593
594 /*
595 * Depart our encapsulating process contract.
596 */
597 if ((p->p_flag & SSYS) == 0) {
598 ASSERT(p->p_ct_process);
599 contract_process_exit(p->p_ct_process, p, rv);
600 }
601
602 /*
603 * Remove pool association, and block if requested by pool_do_bind.
604 */
605 mutex_enter(&p->p_lock);
606 ASSERT(p->p_pool->pool_ref > 0);
607 atomic_dec_32(&p->p_pool->pool_ref);
608 p->p_pool = pool_default;
609 /*
610 * Now that our address space has been freed and all other threads
611 * in this process have exited, set the PEXITED pool flag. This
612 * tells the pools subsystems to ignore this process if it was
613 * requested to rebind this process to a new pool.
614 */
615 p->p_poolflag |= PEXITED;
616 pool_barrier_exit();
617 mutex_exit(&p->p_lock);
618
619 mutex_enter(&pidlock);
620
621 /*
622 * Delete this process from the newstate list of its parent. We
623 * will put it in the right place in the sigcld in the end.
624 */
625 delete_ns(p->p_parent, p);
626
627 /*
628 * Reassign the orphans to the next of kin.
629 * Don't rearrange init's orphanage.
630 */
631 if ((q = p->p_orphan) != NULL && p != proc_init) {
632
633 proc_t *nokp = p->p_nextofkin;
634
635 for (;;) {
636 q->p_nextofkin = nokp;
637 if (q->p_nextorph == NULL)
638 break;
639 q = q->p_nextorph;
640 }
641 q->p_nextorph = nokp->p_orphan;
642 nokp->p_orphan = p->p_orphan;
643 p->p_orphan = NULL;
644 }
645
646 /*
647 * Reassign the children to init.
648 * Don't try to assign init's children to init.
649 */
650 if ((q = p->p_child) != NULL && p != proc_init) {
651 struct proc *np;
652 struct proc *initp = proc_init;
653 boolean_t setzonetop = B_FALSE;
654
655 if (!INGLOBALZONE(curproc))
656 setzonetop = B_TRUE;
657
658 pgdetach(p);
659
660 do {
661 np = q->p_sibling;
662 /*
663 * Delete it from its current parent new state
664 * list and add it to init new state list
665 */
666 delete_ns(q->p_parent, q);
667
668 q->p_ppid = 1;
669 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
670 if (setzonetop) {
671 mutex_enter(&q->p_lock);
672 q->p_flag |= SZONETOP;
673 mutex_exit(&q->p_lock);
674 }
675 q->p_parent = initp;
676
677 /*
678 * Since q will be the first child,
679 * it will not have a previous sibling.
680 */
681 q->p_psibling = NULL;
682 if (initp->p_child) {
683 initp->p_child->p_psibling = q;
684 }
685 q->p_sibling = initp->p_child;
686 initp->p_child = q;
687 if (q->p_proc_flag & P_PR_PTRACE) {
688 mutex_enter(&q->p_lock);
689 sigtoproc(q, NULL, SIGKILL);
690 mutex_exit(&q->p_lock);
691 }
692 /*
693 * sigcld() will add the child to parents
694 * newstate list.
695 */
696 if (q->p_stat == SZOMB)
697 sigcld(q, NULL);
698 } while ((q = np) != NULL);
699
700 p->p_child = NULL;
701 ASSERT(p->p_child_ns == NULL);
702 }
703
704 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
705
706 mutex_enter(&p->p_lock);
707 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
708
709 /*
710 * Have our task accummulate our resource usage data before they
711 * become contaminated by p_cacct etc., and before we renounce
712 * membership of the task.
713 *
714 * We do this regardless of whether or not task accounting is active.
715 * This is to avoid having nonsense data reported for this task if
716 * task accounting is subsequently enabled. The overhead is minimal;
717 * by this point, this process has accounted for the usage of all its
718 * LWPs. We nonetheless do the work here, and under the protection of
719 * pidlock, so that the movement of the process's usage to the task
720 * happens at the same time as the removal of the process from the
721 * task, from the point of view of exacct_snapshot_task_usage().
722 */
723 exacct_update_task_mstate(p);
724
725 hrutime = mstate_aggr_state(p, LMS_USER);
726 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
727 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
728 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
729
730 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
731 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
732 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
733 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
734 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
735 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
736 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
737 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
738 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
739 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
740
741 p->p_ru.minflt += p->p_cru.minflt;
742 p->p_ru.majflt += p->p_cru.majflt;
743 p->p_ru.nswap += p->p_cru.nswap;
744 p->p_ru.inblock += p->p_cru.inblock;
745 p->p_ru.oublock += p->p_cru.oublock;
746 p->p_ru.msgsnd += p->p_cru.msgsnd;
747 p->p_ru.msgrcv += p->p_cru.msgrcv;
748 p->p_ru.nsignals += p->p_cru.nsignals;
749 p->p_ru.nvcsw += p->p_cru.nvcsw;
750 p->p_ru.nivcsw += p->p_cru.nivcsw;
751 p->p_ru.sysc += p->p_cru.sysc;
752 p->p_ru.ioch += p->p_cru.ioch;
753
754 p->p_stat = SZOMB;
755 p->p_proc_flag &= ~P_PR_PTRACE;
756 p->p_wdata = what;
757 p->p_wcode = (char)why;
758
759 cdir = PTOU(p)->u_cdir;
760 rdir = PTOU(p)->u_rdir;
761 cwd = PTOU(p)->u_cwd;
762
763 ASSERT(cdir != NULL || p->p_parent == &p0);
764
765 /*
766 * Release resource controls, as they are no longer enforceable.
767 */
768 rctl_set_free(p->p_rctls);
769
770 /*
771 * Decrement tk_nlwps counter for our task.max-lwps resource control.
772 * An extended accounting record, if that facility is active, is
773 * scheduled to be written. We cannot give up task and project
774 * membership at this point because that would allow zombies to escape
775 * from the max-processes resource controls. Zombies stay in their
776 * current task and project until the process table slot is released
777 * in freeproc().
778 */
779 tk = p->p_task;
780
781 mutex_enter(&p->p_zone->zone_nlwps_lock);
782 tk->tk_nlwps--;
783 tk->tk_proj->kpj_nlwps--;
784 p->p_zone->zone_nlwps--;
785 mutex_exit(&p->p_zone->zone_nlwps_lock);
786
787 /*
788 * Clear the lwp directory and the lwpid hash table
789 * now that /proc can't bother us any more.
790 * We free the memory below, after dropping p->p_lock.
791 */
792 lwpdir = p->p_lwpdir;
793 lwpdir_sz = p->p_lwpdir_sz;
794 tidhash = p->p_tidhash;
795 tidhash_sz = p->p_tidhash_sz;
796 ret_tidhash = p->p_ret_tidhash;
797 p->p_lwpdir = NULL;
798 p->p_lwpfree = NULL;
799 p->p_lwpdir_sz = 0;
800 p->p_tidhash = NULL;
801 p->p_tidhash_sz = 0;
802 p->p_ret_tidhash = NULL;
803
804 /*
805 * If the process has context ops installed, call the exit routine
806 * on behalf of this last remaining thread. Normally exitpctx() is
807 * called during thread_exit() or lwp_exit(), but because this is the
808 * last thread in the process, we must call it here. By the time
809 * thread_exit() is called (below), the association with the relevant
810 * process has been lost.
811 *
812 * We also free the context here.
813 */
814 if (p->p_pctx) {
815 kpreempt_disable();
816 exitpctx(p);
817 kpreempt_enable();
818
819 freepctx(p, 0);
820 }
821
822 /*
823 * curthread's proc pointer is changed to point to the 'sched'
824 * process for the corresponding zone, except in the case when
825 * the exiting process is in fact a zsched instance, in which
826 * case the proc pointer is set to p0. We do so, so that the
827 * process still points at the right zone when we call the VN_RELE()
828 * below.
829 *
830 * This is because curthread's original proc pointer can be freed as
831 * soon as the child sends a SIGCLD to its parent. We use zsched so
832 * that for user processes, even in the final moments of death, the
833 * process is still associated with its zone.
834 */
835 if (p != t->t_procp->p_zone->zone_zsched)
836 t->t_procp = t->t_procp->p_zone->zone_zsched;
837 else
838 t->t_procp = &p0;
839
840 mutex_exit(&p->p_lock);
841 if (!evaporate) {
842 p->p_pidflag &= ~CLDPEND;
843 sigcld(p, sqp);
844 } else {
845 /*
846 * Do what sigcld() would do if the disposition
847 * of the SIGCHLD signal were set to be ignored.
848 */
849 cv_broadcast(&p->p_srwchan_cv);
850 freeproc(p);
851 }
852 mutex_exit(&pidlock);
853
854 /*
855 * We don't release u_cdir and u_rdir until SZOMB is set.
856 * This protects us against dofusers().
857 */
858 if (cdir)
859 VN_RELE(cdir);
860 if (rdir)
861 VN_RELE(rdir);
862 if (cwd)
863 refstr_rele(cwd);
864
865 /*
866 * task_rele() may ultimately cause the zone to go away (or
867 * may cause the last user process in a zone to go away, which
868 * signals zsched to go away). So prior to this call, we must
869 * no longer point at zsched.
870 */
871 t->t_procp = &p0;
872
873 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
874 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
875 while (ret_tidhash != NULL) {
876 ret_tidhash_t *next = ret_tidhash->rth_next;
877 kmem_free(ret_tidhash->rth_tidhash,
878 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
879 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
880 ret_tidhash = next;
881 }
882
883 thread_exit();
884 /* NOTREACHED */
885 }
886
887 /*
888 * Format siginfo structure for wait system calls.
889 */
890 void
winfo(proc_t * pp,k_siginfo_t * ip,int waitflag)891 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
892 {
893 ASSERT(MUTEX_HELD(&pidlock));
894
895 bzero(ip, sizeof (k_siginfo_t));
896 ip->si_signo = SIGCLD;
897 ip->si_code = pp->p_wcode;
898 ip->si_pid = pp->p_pid;
899 ip->si_ctid = PRCTID(pp);
900 ip->si_zoneid = pp->p_zone->zone_id;
901 ip->si_status = pp->p_wdata;
902 ip->si_stime = pp->p_stime;
903 ip->si_utime = pp->p_utime;
904
905 if (waitflag) {
906 pp->p_wcode = 0;
907 pp->p_wdata = 0;
908 pp->p_pidflag &= ~CLDPEND;
909 }
910 }
911
912 /*
913 * Wait system call.
914 * Search for a terminated (zombie) child,
915 * finally lay it to rest, and collect its status.
916 * Look also for stopped children,
917 * and pass back status from them.
918 */
919 int
waitid(idtype_t idtype,id_t id,k_siginfo_t * ip,int options)920 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
921 {
922 int found;
923 proc_t *cp, *pp;
924 int proc_gone;
925 int waitflag = !(options & WNOWAIT);
926
927 /*
928 * Obsolete flag, defined here only for binary compatibility
929 * with old statically linked executables. Delete this when
930 * we no longer care about these old and broken applications.
931 */
932 #define _WNOCHLD 0400
933 options &= ~_WNOCHLD;
934
935 if (options == 0 || (options & ~WOPTMASK))
936 return (EINVAL);
937
938 switch (idtype) {
939 case P_PID:
940 case P_PGID:
941 if (id < 0 || id >= maxpid)
942 return (EINVAL);
943 /* FALLTHROUGH */
944 case P_ALL:
945 break;
946 default:
947 return (EINVAL);
948 }
949
950 pp = ttoproc(curthread);
951
952 /*
953 * lock parent mutex so that sibling chain can be searched.
954 */
955 mutex_enter(&pidlock);
956
957 /*
958 * if we are only looking for exited processes and child_ns list
959 * is empty no reason to look at all children.
960 */
961 if (idtype == P_ALL &&
962 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
963 pp->p_child_ns == NULL) {
964 if (pp->p_child) {
965 mutex_exit(&pidlock);
966 bzero(ip, sizeof (k_siginfo_t));
967 return (0);
968 }
969 mutex_exit(&pidlock);
970 return (ECHILD);
971 }
972
973 while (pp->p_child != NULL) {
974
975 proc_gone = 0;
976
977 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
978 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
979 continue;
980 if (idtype == P_PID && id != cp->p_pid)
981 continue;
982 if (idtype == P_PGID && id != cp->p_pgrp)
983 continue;
984
985 switch (cp->p_wcode) {
986
987 case CLD_TRAPPED:
988 case CLD_STOPPED:
989 case CLD_CONTINUED:
990 cmn_err(CE_PANIC,
991 "waitid: wrong state %d on the p_newstate"
992 " list", cp->p_wcode);
993 break;
994
995 case CLD_EXITED:
996 case CLD_DUMPED:
997 case CLD_KILLED:
998 if (!(options & WEXITED)) {
999 /*
1000 * Count how many are already gone
1001 * for good.
1002 */
1003 proc_gone++;
1004 break;
1005 }
1006 if (!waitflag) {
1007 winfo(cp, ip, 0);
1008 } else {
1009 winfo(cp, ip, 1);
1010 freeproc(cp);
1011 }
1012 mutex_exit(&pidlock);
1013 if (waitflag) { /* accept SIGCLD */
1014 sigcld_delete(ip);
1015 sigcld_repost();
1016 }
1017 return (0);
1018 }
1019
1020 if (idtype == P_PID)
1021 break;
1022 }
1023
1024 /*
1025 * Wow! None of the threads on the p_sibling_ns list were
1026 * interesting threads. Check all the kids!
1027 */
1028 found = 0;
1029 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1030 if (idtype == P_PID && id != cp->p_pid)
1031 continue;
1032 if (idtype == P_PGID && id != cp->p_pgrp)
1033 continue;
1034
1035 switch (cp->p_wcode) {
1036 case CLD_TRAPPED:
1037 if (!(options & WTRAPPED))
1038 break;
1039 winfo(cp, ip, waitflag);
1040 mutex_exit(&pidlock);
1041 if (waitflag) { /* accept SIGCLD */
1042 sigcld_delete(ip);
1043 sigcld_repost();
1044 }
1045 return (0);
1046
1047 case CLD_STOPPED:
1048 if (!(options & WSTOPPED))
1049 break;
1050 /* Is it still stopped? */
1051 mutex_enter(&cp->p_lock);
1052 if (!jobstopped(cp)) {
1053 mutex_exit(&cp->p_lock);
1054 break;
1055 }
1056 mutex_exit(&cp->p_lock);
1057 winfo(cp, ip, waitflag);
1058 mutex_exit(&pidlock);
1059 if (waitflag) { /* accept SIGCLD */
1060 sigcld_delete(ip);
1061 sigcld_repost();
1062 }
1063 return (0);
1064
1065 case CLD_CONTINUED:
1066 if (!(options & WCONTINUED))
1067 break;
1068 winfo(cp, ip, waitflag);
1069 mutex_exit(&pidlock);
1070 if (waitflag) { /* accept SIGCLD */
1071 sigcld_delete(ip);
1072 sigcld_repost();
1073 }
1074 return (0);
1075
1076 case CLD_EXITED:
1077 case CLD_DUMPED:
1078 case CLD_KILLED:
1079 if (idtype != P_PID &&
1080 (cp->p_pidflag & CLDWAITPID))
1081 continue;
1082 /*
1083 * Don't complain if a process was found in
1084 * the first loop but we broke out of the loop
1085 * because of the arguments passed to us.
1086 */
1087 if (proc_gone == 0) {
1088 cmn_err(CE_PANIC,
1089 "waitid: wrong state on the"
1090 " p_child list");
1091 } else {
1092 break;
1093 }
1094 }
1095
1096 found++;
1097
1098 if (idtype == P_PID)
1099 break;
1100 }
1101
1102 /*
1103 * If we found no interesting processes at all,
1104 * break out and return ECHILD.
1105 */
1106 if (found + proc_gone == 0)
1107 break;
1108
1109 if (options & WNOHANG) {
1110 mutex_exit(&pidlock);
1111 bzero(ip, sizeof (k_siginfo_t));
1112 /*
1113 * We should set ip->si_signo = SIGCLD,
1114 * but there is an SVVS test that expects
1115 * ip->si_signo to be zero in this case.
1116 */
1117 return (0);
1118 }
1119
1120 /*
1121 * If we found no processes of interest that could
1122 * change state while we wait, we don't wait at all.
1123 * Get out with ECHILD according to SVID.
1124 */
1125 if (found == proc_gone)
1126 break;
1127
1128 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1129 mutex_exit(&pidlock);
1130 return (EINTR);
1131 }
1132 }
1133 mutex_exit(&pidlock);
1134 return (ECHILD);
1135 }
1136
1137 int
waitsys(idtype_t idtype,id_t id,siginfo_t * infop,int options)1138 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1139 {
1140 int error;
1141 k_siginfo_t info;
1142
1143 if (error = waitid(idtype, id, &info, options))
1144 return (set_errno(error));
1145 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1146 return (set_errno(EFAULT));
1147 return (0);
1148 }
1149
1150 #ifdef _SYSCALL32_IMPL
1151
1152 int
waitsys32(idtype_t idtype,id_t id,siginfo_t * infop,int options)1153 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1154 {
1155 int error;
1156 k_siginfo_t info;
1157 siginfo32_t info32;
1158
1159 if (error = waitid(idtype, id, &info, options))
1160 return (set_errno(error));
1161 siginfo_kto32(&info, &info32);
1162 if (copyout(&info32, infop, sizeof (info32)))
1163 return (set_errno(EFAULT));
1164 return (0);
1165 }
1166
1167 #endif /* _SYSCALL32_IMPL */
1168
1169 void
proc_detach(proc_t * p)1170 proc_detach(proc_t *p)
1171 {
1172 proc_t *q;
1173
1174 ASSERT(MUTEX_HELD(&pidlock));
1175
1176 q = p->p_parent;
1177 ASSERT(q != NULL);
1178
1179 /*
1180 * Take it off the newstate list of its parent
1181 */
1182 delete_ns(q, p);
1183
1184 if (q->p_child == p) {
1185 q->p_child = p->p_sibling;
1186 /*
1187 * If the parent has no children, it better not
1188 * have any with new states either!
1189 */
1190 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1191 }
1192
1193 if (p->p_sibling) {
1194 p->p_sibling->p_psibling = p->p_psibling;
1195 }
1196
1197 if (p->p_psibling) {
1198 p->p_psibling->p_sibling = p->p_sibling;
1199 }
1200 }
1201
1202 /*
1203 * Remove zombie children from the process table.
1204 */
1205 void
freeproc(proc_t * p)1206 freeproc(proc_t *p)
1207 {
1208 proc_t *q;
1209 task_t *tk;
1210
1211 ASSERT(p->p_stat == SZOMB);
1212 ASSERT(p->p_tlist == NULL);
1213 ASSERT(MUTEX_HELD(&pidlock));
1214
1215 sigdelq(p, NULL, 0);
1216 if (p->p_killsqp) {
1217 siginfofree(p->p_killsqp);
1218 p->p_killsqp = NULL;
1219 }
1220
1221 prfree(p); /* inform /proc */
1222
1223 /*
1224 * Don't free the init processes.
1225 * Other dying processes will access it.
1226 */
1227 if (p == proc_init)
1228 return;
1229
1230
1231 /*
1232 * We wait until now to free the cred structure because a
1233 * zombie process's credentials may be examined by /proc.
1234 * No cred locking needed because there are no threads at this point.
1235 */
1236 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1237 crfree(p->p_cred);
1238 if (p->p_corefile != NULL) {
1239 corectl_path_rele(p->p_corefile);
1240 p->p_corefile = NULL;
1241 }
1242 if (p->p_content != NULL) {
1243 corectl_content_rele(p->p_content);
1244 p->p_content = NULL;
1245 }
1246
1247 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1248 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1249 /*
1250 * This should still do the right thing since p_utime/stime
1251 * get set to the correct value on process exit, so it
1252 * should get properly updated
1253 */
1254 p->p_nextofkin->p_cutime += p->p_utime;
1255 p->p_nextofkin->p_cstime += p->p_stime;
1256
1257 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1258 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1259 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1260 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1261 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1262 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1263 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1264 += p->p_acct[LMS_USER_LOCK];
1265 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1266 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1267 += p->p_acct[LMS_WAIT_CPU];
1268 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1269
1270 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1271 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1272 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1273 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1274 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1275 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1276 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1277 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1278 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1279 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1280 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1281 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1282
1283 }
1284
1285 q = p->p_nextofkin;
1286 if (q && q->p_orphan == p)
1287 q->p_orphan = p->p_nextorph;
1288 else if (q) {
1289 for (q = q->p_orphan; q; q = q->p_nextorph)
1290 if (q->p_nextorph == p)
1291 break;
1292 ASSERT(q && q->p_nextorph == p);
1293 q->p_nextorph = p->p_nextorph;
1294 }
1295
1296 /*
1297 * The process table slot is being freed, so it is now safe to give up
1298 * task and project membership.
1299 */
1300 mutex_enter(&p->p_lock);
1301 tk = p->p_task;
1302 task_detach(p);
1303 mutex_exit(&p->p_lock);
1304
1305 proc_detach(p);
1306 pid_exit(p, tk); /* frees pid and proc structure */
1307
1308 task_rele(tk);
1309 }
1310
1311 /*
1312 * Delete process "child" from the newstate list of process "parent"
1313 */
1314 void
delete_ns(proc_t * parent,proc_t * child)1315 delete_ns(proc_t *parent, proc_t *child)
1316 {
1317 proc_t **ns;
1318
1319 ASSERT(MUTEX_HELD(&pidlock));
1320 ASSERT(child->p_parent == parent);
1321 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1322 if (*ns == child) {
1323
1324 ASSERT((*ns)->p_parent == parent);
1325
1326 *ns = child->p_sibling_ns;
1327 child->p_sibling_ns = NULL;
1328 return;
1329 }
1330 }
1331 }
1332
1333 /*
1334 * Add process "child" to the new state list of process "parent"
1335 */
1336 void
add_ns(proc_t * parent,proc_t * child)1337 add_ns(proc_t *parent, proc_t *child)
1338 {
1339 ASSERT(child->p_sibling_ns == NULL);
1340 child->p_sibling_ns = parent->p_child_ns;
1341 parent->p_child_ns = child;
1342 }
1343