1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/user.h>
35 #include <sys/errno.h>
36 #include <sys/proc.h>
37 #include <sys/ucontext.h>
38 #include <sys/procfs.h>
39 #include <sys/vnode.h>
40 #include <sys/acct.h>
41 #include <sys/var.h>
42 #include <sys/cmn_err.h>
43 #include <sys/debug.h>
44 #include <sys/wait.h>
45 #include <sys/siginfo.h>
46 #include <sys/procset.h>
47 #include <sys/class.h>
48 #include <sys/file.h>
49 #include <sys/session.h>
50 #include <sys/kmem.h>
51 #include <sys/vtrace.h>
52 #include <sys/prsystm.h>
53 #include <sys/ipc.h>
54 #include <sys/sem_impl.h>
55 #include <c2/audit.h>
56 #include <sys/aio_impl.h>
57 #include <vm/as.h>
58 #include <sys/poll.h>
59 #include <sys/door.h>
60 #include <sys/lwpchan_impl.h>
61 #include <sys/utrap.h>
62 #include <sys/task.h>
63 #include <sys/exacct.h>
64 #include <sys/cyclic.h>
65 #include <sys/schedctl.h>
66 #include <sys/rctl.h>
67 #include <sys/contract_impl.h>
68 #include <sys/contract/process_impl.h>
69 #include <sys/list.h>
70 #include <sys/dtrace.h>
71 #include <sys/pool.h>
72 #include <sys/sdt.h>
73 #include <sys/corectl.h>
74 #include <sys/brand.h>
75 #include <sys/libc_kernel.h>
76
77 /*
78 * convert code/data pair into old style wait status
79 */
80 int
wstat(int code,int data)81 wstat(int code, int data)
82 {
83 int stat = (data & 0377);
84
85 switch (code) {
86 case CLD_EXITED:
87 stat <<= 8;
88 break;
89 case CLD_DUMPED:
90 stat |= WCOREFLG;
91 break;
92 case CLD_KILLED:
93 break;
94 case CLD_TRAPPED:
95 case CLD_STOPPED:
96 stat <<= 8;
97 stat |= WSTOPFLG;
98 break;
99 case CLD_CONTINUED:
100 stat = WCONTFLG;
101 break;
102 default:
103 cmn_err(CE_PANIC, "wstat: bad code");
104 /* NOTREACHED */
105 }
106 return (stat);
107 }
108
109 static char *
exit_reason(char * buf,size_t bufsz,int what,int why)110 exit_reason(char *buf, size_t bufsz, int what, int why)
111 {
112 switch (why) {
113 case CLD_EXITED:
114 (void) snprintf(buf, bufsz, "exited with status %d", what);
115 break;
116 case CLD_KILLED:
117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
118 break;
119 case CLD_DUMPED:
120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
121 break;
122 default:
123 (void) snprintf(buf, bufsz, "encountered unknown error "
124 "(%d, %d)", why, what);
125 break;
126 }
127
128 return (buf);
129 }
130
131 /*
132 * exit system call: pass back caller's arg.
133 */
134 void
rexit(int rval)135 rexit(int rval)
136 {
137 exit(CLD_EXITED, rval);
138 }
139
140 /*
141 * Called by proc_exit() when a zone's init exits, presumably because
142 * it failed. As long as the given zone is still in the "running"
143 * state, we will re-exec() init, but first we need to reset things
144 * which are usually inherited across exec() but will break init's
145 * assumption that it is being exec()'d from a virgin process. Most
146 * importantly this includes closing all file descriptors (exec only
147 * closes those marked close-on-exec) and resetting signals (exec only
148 * resets handled signals, and we need to clear any signals which
149 * killed init). Anything else that exec(2) says would be inherited,
150 * but would affect the execution of init, needs to be reset.
151 */
152 static int
restart_init(int what,int why)153 restart_init(int what, int why)
154 {
155 kthread_t *t = curthread;
156 klwp_t *lwp = ttolwp(t);
157 proc_t *p = ttoproc(t);
158 user_t *up = PTOU(p);
159
160 vnode_t *oldcd, *oldrd;
161 int i, err;
162 char reason_buf[64];
163
164 /*
165 * Let zone admin (and global zone admin if this is for a non-global
166 * zone) know that init has failed and will be restarted.
167 */
168 zcmn_err(p->p_zone->zone_id, CE_WARN,
169 "init(1M) %s: restarting automatically",
170 exit_reason(reason_buf, sizeof (reason_buf), what, why));
171
172 if (!INGLOBALZONE(p)) {
173 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
174 "restarting automatically",
175 p->p_zone->zone_name, p->p_pid, reason_buf);
176 }
177
178 /*
179 * Remove any fpollinfo_t's for this (last) thread from our file
180 * descriptors so closeall() can ASSERT() that they're all gone.
181 * Then close all open file descriptors in the process.
182 */
183 pollcleanup();
184 closeall(P_FINFO(p));
185
186 /*
187 * Grab p_lock and begin clearing miscellaneous global process
188 * state that needs to be reset before we exec the new init(1M).
189 */
190
191 mutex_enter(&p->p_lock);
192 prbarrier(p);
193
194 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
195 up->u_cmask = CMASK;
196
197 sigemptyset(&t->t_hold);
198 sigemptyset(&t->t_sig);
199 sigemptyset(&t->t_extsig);
200
201 sigemptyset(&p->p_sig);
202 sigemptyset(&p->p_extsig);
203
204 sigdelq(p, t, 0);
205 sigdelq(p, NULL, 0);
206
207 if (p->p_killsqp) {
208 siginfofree(p->p_killsqp);
209 p->p_killsqp = NULL;
210 }
211
212 /*
213 * Reset any signals that are ignored back to the default disposition.
214 * Other u_signal members will be cleared when exec calls sigdefault().
215 */
216 for (i = 1; i < NSIG; i++) {
217 if (up->u_signal[i - 1] == SIG_IGN) {
218 up->u_signal[i - 1] = SIG_DFL;
219 sigemptyset(&up->u_sigmask[i - 1]);
220 }
221 }
222
223 /*
224 * Clear the current signal, any signal info associated with it, and
225 * any signal information from contracts and/or contract templates.
226 */
227 lwp->lwp_cursig = 0;
228 lwp->lwp_extsig = 0;
229 if (lwp->lwp_curinfo != NULL) {
230 siginfofree(lwp->lwp_curinfo);
231 lwp->lwp_curinfo = NULL;
232 }
233 lwp_ctmpl_clear(lwp);
234
235 /*
236 * Reset both the process root directory and the current working
237 * directory to the root of the zone just as we do during boot.
238 */
239 VN_HOLD(p->p_zone->zone_rootvp);
240 oldrd = up->u_rdir;
241 up->u_rdir = p->p_zone->zone_rootvp;
242
243 VN_HOLD(p->p_zone->zone_rootvp);
244 oldcd = up->u_cdir;
245 up->u_cdir = p->p_zone->zone_rootvp;
246
247 if (up->u_cwd != NULL) {
248 refstr_rele(up->u_cwd);
249 up->u_cwd = NULL;
250 }
251
252 mutex_exit(&p->p_lock);
253
254 if (oldrd != NULL)
255 VN_RELE(oldrd);
256 if (oldcd != NULL)
257 VN_RELE(oldcd);
258
259 /* Free the controlling tty. (freectty() always assumes curproc.) */
260 ASSERT(p == curproc);
261 (void) freectty(B_TRUE);
262
263 /*
264 * Now exec() the new init(1M) on top of the current process. If we
265 * succeed, the caller will treat this like a successful system call.
266 * If we fail, we issue messages and the caller will proceed with exit.
267 */
268 err = exec_init(p->p_zone->zone_initname, NULL);
269
270 if (err == 0)
271 return (0);
272
273 zcmn_err(p->p_zone->zone_id, CE_WARN,
274 "failed to restart init(1M) (err=%d): system reboot required", err);
275
276 if (!INGLOBALZONE(p)) {
277 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
278 "(pid %d, err=%d): zoneadm(1M) boot required",
279 p->p_zone->zone_name, p->p_pid, err);
280 }
281
282 return (-1);
283 }
284
285 /*
286 * Release resources.
287 * Enter zombie state.
288 * Wake up parent and init processes,
289 * and dispose of children.
290 */
291 void
exit(int why,int what)292 exit(int why, int what)
293 {
294 /*
295 * If proc_exit() fails, then some other lwp in the process
296 * got there first. We just have to call lwp_exit() to allow
297 * the other lwp to finish exiting the process. Otherwise we're
298 * restarting init, and should return.
299 */
300 if (proc_exit(why, what) != 0) {
301 mutex_enter(&curproc->p_lock);
302 ASSERT(curproc->p_flag & SEXITLWPS);
303 lwp_exit();
304 /* NOTREACHED */
305 }
306 }
307
308 /*
309 * Set the SEXITING flag on the process, after making sure /proc does
310 * not have it locked. This is done in more places than proc_exit(),
311 * so it is a separate function.
312 */
313 void
proc_is_exiting(proc_t * p)314 proc_is_exiting(proc_t *p)
315 {
316 mutex_enter(&p->p_lock);
317 prbarrier(p);
318 p->p_flag |= SEXITING;
319 mutex_exit(&p->p_lock);
320 }
321
322 /*
323 * Return value:
324 * 1 - exitlwps() failed, call (or continue) lwp_exit()
325 * 0 - restarting init. Return through system call path
326 */
327 int
proc_exit(int why,int what)328 proc_exit(int why, int what)
329 {
330 kthread_t *t = curthread;
331 klwp_t *lwp = ttolwp(t);
332 proc_t *p = ttoproc(t);
333 zone_t *z = p->p_zone;
334 timeout_id_t tmp_id;
335 int rv;
336 proc_t *q;
337 task_t *tk;
338 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
339 sigqueue_t *sqp;
340 lwpdir_t *lwpdir;
341 uint_t lwpdir_sz;
342 tidhash_t *tidhash;
343 uint_t tidhash_sz;
344 ret_tidhash_t *ret_tidhash;
345 refstr_t *cwd;
346 hrtime_t hrutime, hrstime;
347 int evaporate;
348
349 /*
350 * Stop and discard the process's lwps except for the current one,
351 * unless some other lwp beat us to it. If exitlwps() fails then
352 * return and the calling lwp will call (or continue in) lwp_exit().
353 */
354 proc_is_exiting(p);
355 if (exitlwps(0) != 0)
356 return (1);
357
358 mutex_enter(&p->p_lock);
359 if (p->p_ttime > 0) {
360 /*
361 * Account any remaining ticks charged to this process
362 * on its way out.
363 */
364 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
365 p->p_ttime = 0;
366 }
367 mutex_exit(&p->p_lock);
368
369 DTRACE_PROC(lwp__exit);
370 DTRACE_PROC1(exit, int, why);
371
372 /*
373 * Will perform any brand specific proc exit processing, since this
374 * is always the last lwp, will also perform lwp_exit and free brand
375 * data
376 */
377 if (PROC_IS_BRANDED(p)) {
378 lwp_detach_brand_hdlrs(lwp);
379 brand_clearbrand(p, B_FALSE);
380 }
381
382 /*
383 * Don't let init exit unless zone_start_init() failed its exec, or
384 * we are shutting down the zone or the machine.
385 *
386 * Since we are single threaded, we don't need to lock the
387 * following accesses to zone_proc_initpid.
388 */
389 if (p->p_pid == z->zone_proc_initpid) {
390 if (z->zone_boot_err == 0 &&
391 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
392 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
393 if (z->zone_restart_init == B_TRUE) {
394 if (restart_init(what, why) == 0)
395 return (0);
396 } else {
397 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
398 CRED());
399 }
400 }
401
402 /*
403 * Since we didn't or couldn't restart init, we clear
404 * the zone's init state and proceed with exit
405 * processing.
406 */
407 z->zone_proc_initpid = -1;
408 }
409
410 lwp_pcb_exit();
411
412 /*
413 * Allocate a sigqueue now, before we grab locks.
414 * It will be given to sigcld(), below.
415 * Special case: If we will be making the process disappear
416 * without a trace because it is either:
417 * * an exiting SSYS process, or
418 * * a posix_spawn() vfork child who requests it,
419 * we don't bother to allocate a useless sigqueue.
420 */
421 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
422 why == CLD_EXITED && what == _EVAPORATE);
423 if (!evaporate)
424 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
425
426 /*
427 * revoke any doors created by the process.
428 */
429 if (p->p_door_list)
430 door_exit();
431
432 /*
433 * Release schedctl data structures.
434 */
435 if (p->p_pagep)
436 schedctl_proc_cleanup();
437
438 /*
439 * make sure all pending kaio has completed.
440 */
441 if (p->p_aio)
442 aio_cleanup_exit();
443
444 /*
445 * discard the lwpchan cache.
446 */
447 if (p->p_lcp != NULL)
448 lwpchan_destroy_cache(0);
449
450 /*
451 * Clean up any DTrace helper actions or probes for the process.
452 */
453 if (p->p_dtrace_helpers != NULL) {
454 ASSERT(dtrace_helpers_cleanup != NULL);
455 (*dtrace_helpers_cleanup)();
456 }
457
458 /*
459 * Clean up any signalfd state for the process.
460 */
461 if (p->p_sigfd != NULL) {
462 VERIFY(sigfd_exit_helper != NULL);
463 (*sigfd_exit_helper)();
464 }
465
466 /* untimeout the realtime timers */
467 if (p->p_itimer != NULL)
468 timer_exit();
469
470 if ((tmp_id = p->p_alarmid) != 0) {
471 p->p_alarmid = 0;
472 (void) untimeout(tmp_id);
473 }
474
475 /*
476 * Remove any fpollinfo_t's for this (last) thread from our file
477 * descriptors so closeall() can ASSERT() that they're all gone.
478 */
479 pollcleanup();
480
481 if (p->p_rprof_cyclic != CYCLIC_NONE) {
482 mutex_enter(&cpu_lock);
483 cyclic_remove(p->p_rprof_cyclic);
484 mutex_exit(&cpu_lock);
485 }
486
487 mutex_enter(&p->p_lock);
488
489 /*
490 * Clean up any DTrace probes associated with this process.
491 */
492 if (p->p_dtrace_probes) {
493 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
494 dtrace_fasttrap_exit_ptr(p);
495 }
496
497 while ((tmp_id = p->p_itimerid) != 0) {
498 p->p_itimerid = 0;
499 mutex_exit(&p->p_lock);
500 (void) untimeout(tmp_id);
501 mutex_enter(&p->p_lock);
502 }
503
504 lwp_cleanup();
505
506 /*
507 * We are about to exit; prevent our resource associations from
508 * being changed.
509 */
510 pool_barrier_enter();
511
512 /*
513 * Block the process against /proc now that we have really
514 * acquired p->p_lock (to manipulate p_tlist at least).
515 */
516 prbarrier(p);
517
518 sigfillset(&p->p_ignore);
519 sigemptyset(&p->p_siginfo);
520 sigemptyset(&p->p_sig);
521 sigemptyset(&p->p_extsig);
522 sigemptyset(&t->t_sig);
523 sigemptyset(&t->t_extsig);
524 sigemptyset(&p->p_sigmask);
525 sigdelq(p, t, 0);
526 lwp->lwp_cursig = 0;
527 lwp->lwp_extsig = 0;
528 p->p_flag &= ~(SKILLED | SEXTKILLED);
529 if (lwp->lwp_curinfo) {
530 siginfofree(lwp->lwp_curinfo);
531 lwp->lwp_curinfo = NULL;
532 }
533
534 t->t_proc_flag |= TP_LWPEXIT;
535 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
536 prlwpexit(t); /* notify /proc */
537 lwp_hash_out(p, t->t_tid);
538 prexit(p);
539
540 p->p_lwpcnt = 0;
541 p->p_tlist = NULL;
542 sigqfree(p);
543 term_mstate(t);
544 p->p_mterm = gethrtime();
545
546 exec_vp = p->p_exec;
547 execdir_vp = p->p_execdir;
548 p->p_exec = NULLVP;
549 p->p_execdir = NULLVP;
550 mutex_exit(&p->p_lock);
551
552 pr_free_watched_pages(p);
553
554 closeall(P_FINFO(p));
555
556 /* Free the controlling tty. (freectty() always assumes curproc.) */
557 ASSERT(p == curproc);
558 (void) freectty(B_TRUE);
559
560 #if defined(__sparc)
561 if (p->p_utraps != NULL)
562 utrap_free(p);
563 #endif
564 if (p->p_semacct) /* IPC semaphore exit */
565 semexit(p);
566 rv = wstat(why, what);
567
568 acct(rv & 0xff);
569 exacct_commit_proc(p, rv);
570
571 /*
572 * Release any resources associated with C2 auditing
573 */
574 if (AU_AUDITING()) {
575 /*
576 * audit exit system call
577 */
578 audit_exit(why, what);
579 }
580
581 /*
582 * Free address space.
583 */
584 relvm();
585
586 if (exec_vp) {
587 /*
588 * Close this executable which has been opened when the process
589 * was created by getproc().
590 */
591 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
592 VN_RELE(exec_vp);
593 }
594 if (execdir_vp)
595 VN_RELE(execdir_vp);
596
597 /*
598 * Release held contracts.
599 */
600 contract_exit(p);
601
602 /*
603 * Depart our encapsulating process contract.
604 */
605 if ((p->p_flag & SSYS) == 0) {
606 ASSERT(p->p_ct_process);
607 contract_process_exit(p->p_ct_process, p, rv);
608 }
609
610 /*
611 * Remove pool association, and block if requested by pool_do_bind.
612 */
613 mutex_enter(&p->p_lock);
614 ASSERT(p->p_pool->pool_ref > 0);
615 atomic_dec_32(&p->p_pool->pool_ref);
616 p->p_pool = pool_default;
617 /*
618 * Now that our address space has been freed and all other threads
619 * in this process have exited, set the PEXITED pool flag. This
620 * tells the pools subsystems to ignore this process if it was
621 * requested to rebind this process to a new pool.
622 */
623 p->p_poolflag |= PEXITED;
624 pool_barrier_exit();
625 mutex_exit(&p->p_lock);
626
627 mutex_enter(&pidlock);
628
629 /*
630 * Delete this process from the newstate list of its parent. We
631 * will put it in the right place in the sigcld in the end.
632 */
633 delete_ns(p->p_parent, p);
634
635 /*
636 * Reassign the orphans to the next of kin.
637 * Don't rearrange init's orphanage.
638 */
639 if ((q = p->p_orphan) != NULL && p != proc_init) {
640
641 proc_t *nokp = p->p_nextofkin;
642
643 for (;;) {
644 q->p_nextofkin = nokp;
645 if (q->p_nextorph == NULL)
646 break;
647 q = q->p_nextorph;
648 }
649 q->p_nextorph = nokp->p_orphan;
650 nokp->p_orphan = p->p_orphan;
651 p->p_orphan = NULL;
652 }
653
654 /*
655 * Reassign the children to init.
656 * Don't try to assign init's children to init.
657 */
658 if ((q = p->p_child) != NULL && p != proc_init) {
659 struct proc *np;
660 struct proc *initp = proc_init;
661 boolean_t setzonetop = B_FALSE;
662
663 if (!INGLOBALZONE(curproc))
664 setzonetop = B_TRUE;
665
666 pgdetach(p);
667
668 do {
669 np = q->p_sibling;
670 /*
671 * Delete it from its current parent new state
672 * list and add it to init new state list
673 */
674 delete_ns(q->p_parent, q);
675
676 q->p_ppid = 1;
677 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
678 if (setzonetop) {
679 mutex_enter(&q->p_lock);
680 q->p_flag |= SZONETOP;
681 mutex_exit(&q->p_lock);
682 }
683 q->p_parent = initp;
684
685 /*
686 * Since q will be the first child,
687 * it will not have a previous sibling.
688 */
689 q->p_psibling = NULL;
690 if (initp->p_child) {
691 initp->p_child->p_psibling = q;
692 }
693 q->p_sibling = initp->p_child;
694 initp->p_child = q;
695 if (q->p_proc_flag & P_PR_PTRACE) {
696 mutex_enter(&q->p_lock);
697 sigtoproc(q, NULL, SIGKILL);
698 mutex_exit(&q->p_lock);
699 }
700 /*
701 * sigcld() will add the child to parents
702 * newstate list.
703 */
704 if (q->p_stat == SZOMB)
705 sigcld(q, NULL);
706 } while ((q = np) != NULL);
707
708 p->p_child = NULL;
709 ASSERT(p->p_child_ns == NULL);
710 }
711
712 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
713
714 mutex_enter(&p->p_lock);
715 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
716
717 /*
718 * Have our task accummulate our resource usage data before they
719 * become contaminated by p_cacct etc., and before we renounce
720 * membership of the task.
721 *
722 * We do this regardless of whether or not task accounting is active.
723 * This is to avoid having nonsense data reported for this task if
724 * task accounting is subsequently enabled. The overhead is minimal;
725 * by this point, this process has accounted for the usage of all its
726 * LWPs. We nonetheless do the work here, and under the protection of
727 * pidlock, so that the movement of the process's usage to the task
728 * happens at the same time as the removal of the process from the
729 * task, from the point of view of exacct_snapshot_task_usage().
730 */
731 exacct_update_task_mstate(p);
732
733 hrutime = mstate_aggr_state(p, LMS_USER);
734 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
735 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
736 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
737
738 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
739 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
740 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
741 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
742 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
743 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
744 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
745 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
746 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
747 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
748
749 p->p_ru.minflt += p->p_cru.minflt;
750 p->p_ru.majflt += p->p_cru.majflt;
751 p->p_ru.nswap += p->p_cru.nswap;
752 p->p_ru.inblock += p->p_cru.inblock;
753 p->p_ru.oublock += p->p_cru.oublock;
754 p->p_ru.msgsnd += p->p_cru.msgsnd;
755 p->p_ru.msgrcv += p->p_cru.msgrcv;
756 p->p_ru.nsignals += p->p_cru.nsignals;
757 p->p_ru.nvcsw += p->p_cru.nvcsw;
758 p->p_ru.nivcsw += p->p_cru.nivcsw;
759 p->p_ru.sysc += p->p_cru.sysc;
760 p->p_ru.ioch += p->p_cru.ioch;
761
762 p->p_stat = SZOMB;
763 p->p_proc_flag &= ~P_PR_PTRACE;
764 p->p_wdata = what;
765 p->p_wcode = (char)why;
766
767 cdir = PTOU(p)->u_cdir;
768 rdir = PTOU(p)->u_rdir;
769 cwd = PTOU(p)->u_cwd;
770
771 ASSERT(cdir != NULL || p->p_parent == &p0);
772
773 /*
774 * Release resource controls, as they are no longer enforceable.
775 */
776 rctl_set_free(p->p_rctls);
777
778 /*
779 * Decrement tk_nlwps counter for our task.max-lwps resource control.
780 * An extended accounting record, if that facility is active, is
781 * scheduled to be written. We cannot give up task and project
782 * membership at this point because that would allow zombies to escape
783 * from the max-processes resource controls. Zombies stay in their
784 * current task and project until the process table slot is released
785 * in freeproc().
786 */
787 tk = p->p_task;
788
789 mutex_enter(&p->p_zone->zone_nlwps_lock);
790 tk->tk_nlwps--;
791 tk->tk_proj->kpj_nlwps--;
792 p->p_zone->zone_nlwps--;
793 mutex_exit(&p->p_zone->zone_nlwps_lock);
794
795 /*
796 * Clear the lwp directory and the lwpid hash table
797 * now that /proc can't bother us any more.
798 * We free the memory below, after dropping p->p_lock.
799 */
800 lwpdir = p->p_lwpdir;
801 lwpdir_sz = p->p_lwpdir_sz;
802 tidhash = p->p_tidhash;
803 tidhash_sz = p->p_tidhash_sz;
804 ret_tidhash = p->p_ret_tidhash;
805 p->p_lwpdir = NULL;
806 p->p_lwpfree = NULL;
807 p->p_lwpdir_sz = 0;
808 p->p_tidhash = NULL;
809 p->p_tidhash_sz = 0;
810 p->p_ret_tidhash = NULL;
811
812 /*
813 * If the process has context ops installed, call the exit routine
814 * on behalf of this last remaining thread. Normally exitpctx() is
815 * called during thread_exit() or lwp_exit(), but because this is the
816 * last thread in the process, we must call it here. By the time
817 * thread_exit() is called (below), the association with the relevant
818 * process has been lost.
819 *
820 * We also free the context here.
821 */
822 if (p->p_pctx) {
823 kpreempt_disable();
824 exitpctx(p);
825 kpreempt_enable();
826
827 freepctx(p, 0);
828 }
829
830 /*
831 * curthread's proc pointer is changed to point to the 'sched'
832 * process for the corresponding zone, except in the case when
833 * the exiting process is in fact a zsched instance, in which
834 * case the proc pointer is set to p0. We do so, so that the
835 * process still points at the right zone when we call the VN_RELE()
836 * below.
837 *
838 * This is because curthread's original proc pointer can be freed as
839 * soon as the child sends a SIGCLD to its parent. We use zsched so
840 * that for user processes, even in the final moments of death, the
841 * process is still associated with its zone.
842 */
843 if (p != t->t_procp->p_zone->zone_zsched)
844 t->t_procp = t->t_procp->p_zone->zone_zsched;
845 else
846 t->t_procp = &p0;
847
848 mutex_exit(&p->p_lock);
849 if (!evaporate) {
850 p->p_pidflag &= ~CLDPEND;
851 sigcld(p, sqp);
852 } else {
853 /*
854 * Do what sigcld() would do if the disposition
855 * of the SIGCHLD signal were set to be ignored.
856 */
857 cv_broadcast(&p->p_srwchan_cv);
858 freeproc(p);
859 }
860 mutex_exit(&pidlock);
861
862 /*
863 * We don't release u_cdir and u_rdir until SZOMB is set.
864 * This protects us against dofusers().
865 */
866 if (cdir)
867 VN_RELE(cdir);
868 if (rdir)
869 VN_RELE(rdir);
870 if (cwd)
871 refstr_rele(cwd);
872
873 /*
874 * task_rele() may ultimately cause the zone to go away (or
875 * may cause the last user process in a zone to go away, which
876 * signals zsched to go away). So prior to this call, we must
877 * no longer point at zsched.
878 */
879 t->t_procp = &p0;
880
881 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
882 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
883 while (ret_tidhash != NULL) {
884 ret_tidhash_t *next = ret_tidhash->rth_next;
885 kmem_free(ret_tidhash->rth_tidhash,
886 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
887 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
888 ret_tidhash = next;
889 }
890
891 thread_exit();
892 /* NOTREACHED */
893 }
894
895 /*
896 * Format siginfo structure for wait system calls.
897 */
898 void
winfo(proc_t * pp,k_siginfo_t * ip,int waitflag)899 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
900 {
901 ASSERT(MUTEX_HELD(&pidlock));
902
903 bzero(ip, sizeof (k_siginfo_t));
904 ip->si_signo = SIGCLD;
905 ip->si_code = pp->p_wcode;
906 ip->si_pid = pp->p_pid;
907 ip->si_ctid = PRCTID(pp);
908 ip->si_zoneid = pp->p_zone->zone_id;
909 ip->si_status = pp->p_wdata;
910 ip->si_stime = pp->p_stime;
911 ip->si_utime = pp->p_utime;
912
913 if (waitflag) {
914 pp->p_wcode = 0;
915 pp->p_wdata = 0;
916 pp->p_pidflag &= ~CLDPEND;
917 }
918 }
919
920 /*
921 * Wait system call.
922 * Search for a terminated (zombie) child,
923 * finally lay it to rest, and collect its status.
924 * Look also for stopped children,
925 * and pass back status from them.
926 */
927 int
waitid(idtype_t idtype,id_t id,k_siginfo_t * ip,int options)928 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
929 {
930 int found;
931 proc_t *cp, *pp;
932 int proc_gone;
933 int waitflag = !(options & WNOWAIT);
934
935 /*
936 * Obsolete flag, defined here only for binary compatibility
937 * with old statically linked executables. Delete this when
938 * we no longer care about these old and broken applications.
939 */
940 #define _WNOCHLD 0400
941 options &= ~_WNOCHLD;
942
943 if (options == 0 || (options & ~WOPTMASK))
944 return (EINVAL);
945
946 switch (idtype) {
947 case P_PID:
948 case P_PGID:
949 if (id < 0 || id >= maxpid)
950 return (EINVAL);
951 /* FALLTHROUGH */
952 case P_ALL:
953 break;
954 default:
955 return (EINVAL);
956 }
957
958 pp = ttoproc(curthread);
959
960 /*
961 * lock parent mutex so that sibling chain can be searched.
962 */
963 mutex_enter(&pidlock);
964
965 /*
966 * if we are only looking for exited processes and child_ns list
967 * is empty no reason to look at all children.
968 */
969 if (idtype == P_ALL &&
970 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
971 pp->p_child_ns == NULL) {
972 if (pp->p_child) {
973 mutex_exit(&pidlock);
974 bzero(ip, sizeof (k_siginfo_t));
975 return (0);
976 }
977 mutex_exit(&pidlock);
978 return (ECHILD);
979 }
980
981 while (pp->p_child != NULL) {
982
983 proc_gone = 0;
984
985 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
986 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
987 continue;
988 if (idtype == P_PID && id != cp->p_pid)
989 continue;
990 if (idtype == P_PGID && id != cp->p_pgrp)
991 continue;
992
993 switch (cp->p_wcode) {
994
995 case CLD_TRAPPED:
996 case CLD_STOPPED:
997 case CLD_CONTINUED:
998 cmn_err(CE_PANIC,
999 "waitid: wrong state %d on the p_newstate"
1000 " list", cp->p_wcode);
1001 break;
1002
1003 case CLD_EXITED:
1004 case CLD_DUMPED:
1005 case CLD_KILLED:
1006 if (!(options & WEXITED)) {
1007 /*
1008 * Count how many are already gone
1009 * for good.
1010 */
1011 proc_gone++;
1012 break;
1013 }
1014 if (!waitflag) {
1015 winfo(cp, ip, 0);
1016 } else {
1017 winfo(cp, ip, 1);
1018 freeproc(cp);
1019 }
1020 mutex_exit(&pidlock);
1021 if (waitflag) { /* accept SIGCLD */
1022 sigcld_delete(ip);
1023 sigcld_repost();
1024 }
1025 return (0);
1026 }
1027
1028 if (idtype == P_PID)
1029 break;
1030 }
1031
1032 /*
1033 * Wow! None of the threads on the p_sibling_ns list were
1034 * interesting threads. Check all the kids!
1035 */
1036 found = 0;
1037 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1038 if (idtype == P_PID && id != cp->p_pid)
1039 continue;
1040 if (idtype == P_PGID && id != cp->p_pgrp)
1041 continue;
1042
1043 switch (cp->p_wcode) {
1044 case CLD_TRAPPED:
1045 if (!(options & WTRAPPED))
1046 break;
1047 winfo(cp, ip, waitflag);
1048 mutex_exit(&pidlock);
1049 if (waitflag) { /* accept SIGCLD */
1050 sigcld_delete(ip);
1051 sigcld_repost();
1052 }
1053 return (0);
1054
1055 case CLD_STOPPED:
1056 if (!(options & WSTOPPED))
1057 break;
1058 /* Is it still stopped? */
1059 mutex_enter(&cp->p_lock);
1060 if (!jobstopped(cp)) {
1061 mutex_exit(&cp->p_lock);
1062 break;
1063 }
1064 mutex_exit(&cp->p_lock);
1065 winfo(cp, ip, waitflag);
1066 mutex_exit(&pidlock);
1067 if (waitflag) { /* accept SIGCLD */
1068 sigcld_delete(ip);
1069 sigcld_repost();
1070 }
1071 return (0);
1072
1073 case CLD_CONTINUED:
1074 if (!(options & WCONTINUED))
1075 break;
1076 winfo(cp, ip, waitflag);
1077 mutex_exit(&pidlock);
1078 if (waitflag) { /* accept SIGCLD */
1079 sigcld_delete(ip);
1080 sigcld_repost();
1081 }
1082 return (0);
1083
1084 case CLD_EXITED:
1085 case CLD_DUMPED:
1086 case CLD_KILLED:
1087 if (idtype != P_PID &&
1088 (cp->p_pidflag & CLDWAITPID))
1089 continue;
1090 /*
1091 * Don't complain if a process was found in
1092 * the first loop but we broke out of the loop
1093 * because of the arguments passed to us.
1094 */
1095 if (proc_gone == 0) {
1096 cmn_err(CE_PANIC,
1097 "waitid: wrong state on the"
1098 " p_child list");
1099 } else {
1100 break;
1101 }
1102 }
1103
1104 found++;
1105
1106 if (idtype == P_PID)
1107 break;
1108 }
1109
1110 /*
1111 * If we found no interesting processes at all,
1112 * break out and return ECHILD.
1113 */
1114 if (found + proc_gone == 0)
1115 break;
1116
1117 if (options & WNOHANG) {
1118 mutex_exit(&pidlock);
1119 bzero(ip, sizeof (k_siginfo_t));
1120 /*
1121 * We should set ip->si_signo = SIGCLD,
1122 * but there is an SVVS test that expects
1123 * ip->si_signo to be zero in this case.
1124 */
1125 return (0);
1126 }
1127
1128 /*
1129 * If we found no processes of interest that could
1130 * change state while we wait, we don't wait at all.
1131 * Get out with ECHILD according to SVID.
1132 */
1133 if (found == proc_gone)
1134 break;
1135
1136 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1137 mutex_exit(&pidlock);
1138 return (EINTR);
1139 }
1140 }
1141 mutex_exit(&pidlock);
1142 return (ECHILD);
1143 }
1144
1145 int
waitsys(idtype_t idtype,id_t id,siginfo_t * infop,int options)1146 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1147 {
1148 int error;
1149 k_siginfo_t info;
1150
1151 if (error = waitid(idtype, id, &info, options))
1152 return (set_errno(error));
1153 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1154 return (set_errno(EFAULT));
1155 return (0);
1156 }
1157
1158 #ifdef _SYSCALL32_IMPL
1159
1160 int
waitsys32(idtype_t idtype,id_t id,siginfo_t * infop,int options)1161 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1162 {
1163 int error;
1164 k_siginfo_t info;
1165 siginfo32_t info32;
1166
1167 if (error = waitid(idtype, id, &info, options))
1168 return (set_errno(error));
1169 siginfo_kto32(&info, &info32);
1170 if (copyout(&info32, infop, sizeof (info32)))
1171 return (set_errno(EFAULT));
1172 return (0);
1173 }
1174
1175 #endif /* _SYSCALL32_IMPL */
1176
1177 void
proc_detach(proc_t * p)1178 proc_detach(proc_t *p)
1179 {
1180 proc_t *q;
1181
1182 ASSERT(MUTEX_HELD(&pidlock));
1183
1184 q = p->p_parent;
1185 ASSERT(q != NULL);
1186
1187 /*
1188 * Take it off the newstate list of its parent
1189 */
1190 delete_ns(q, p);
1191
1192 if (q->p_child == p) {
1193 q->p_child = p->p_sibling;
1194 /*
1195 * If the parent has no children, it better not
1196 * have any with new states either!
1197 */
1198 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1199 }
1200
1201 if (p->p_sibling) {
1202 p->p_sibling->p_psibling = p->p_psibling;
1203 }
1204
1205 if (p->p_psibling) {
1206 p->p_psibling->p_sibling = p->p_sibling;
1207 }
1208 }
1209
1210 /*
1211 * Remove zombie children from the process table.
1212 */
1213 void
freeproc(proc_t * p)1214 freeproc(proc_t *p)
1215 {
1216 proc_t *q;
1217 task_t *tk;
1218
1219 ASSERT(p->p_stat == SZOMB);
1220 ASSERT(p->p_tlist == NULL);
1221 ASSERT(MUTEX_HELD(&pidlock));
1222
1223 sigdelq(p, NULL, 0);
1224 if (p->p_killsqp) {
1225 siginfofree(p->p_killsqp);
1226 p->p_killsqp = NULL;
1227 }
1228
1229 prfree(p); /* inform /proc */
1230
1231 /*
1232 * Don't free the init processes.
1233 * Other dying processes will access it.
1234 */
1235 if (p == proc_init)
1236 return;
1237
1238
1239 /*
1240 * We wait until now to free the cred structure because a
1241 * zombie process's credentials may be examined by /proc.
1242 * No cred locking needed because there are no threads at this point.
1243 */
1244 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1245 crfree(p->p_cred);
1246 if (p->p_corefile != NULL) {
1247 corectl_path_rele(p->p_corefile);
1248 p->p_corefile = NULL;
1249 }
1250 if (p->p_content != NULL) {
1251 corectl_content_rele(p->p_content);
1252 p->p_content = NULL;
1253 }
1254
1255 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1256 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1257 /*
1258 * This should still do the right thing since p_utime/stime
1259 * get set to the correct value on process exit, so it
1260 * should get properly updated
1261 */
1262 p->p_nextofkin->p_cutime += p->p_utime;
1263 p->p_nextofkin->p_cstime += p->p_stime;
1264
1265 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1266 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1267 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1268 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1269 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1270 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1271 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1272 += p->p_acct[LMS_USER_LOCK];
1273 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1274 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1275 += p->p_acct[LMS_WAIT_CPU];
1276 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1277
1278 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1279 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1280 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1281 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1282 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1283 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1284 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1285 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1286 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1287 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1288 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1289 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1290
1291 }
1292
1293 q = p->p_nextofkin;
1294 if (q && q->p_orphan == p)
1295 q->p_orphan = p->p_nextorph;
1296 else if (q) {
1297 for (q = q->p_orphan; q; q = q->p_nextorph)
1298 if (q->p_nextorph == p)
1299 break;
1300 ASSERT(q && q->p_nextorph == p);
1301 q->p_nextorph = p->p_nextorph;
1302 }
1303
1304 /*
1305 * The process table slot is being freed, so it is now safe to give up
1306 * task and project membership.
1307 */
1308 mutex_enter(&p->p_lock);
1309 tk = p->p_task;
1310 task_detach(p);
1311 mutex_exit(&p->p_lock);
1312
1313 proc_detach(p);
1314 pid_exit(p, tk); /* frees pid and proc structure */
1315
1316 task_rele(tk);
1317 }
1318
1319 /*
1320 * Delete process "child" from the newstate list of process "parent"
1321 */
1322 void
delete_ns(proc_t * parent,proc_t * child)1323 delete_ns(proc_t *parent, proc_t *child)
1324 {
1325 proc_t **ns;
1326
1327 ASSERT(MUTEX_HELD(&pidlock));
1328 ASSERT(child->p_parent == parent);
1329 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1330 if (*ns == child) {
1331
1332 ASSERT((*ns)->p_parent == parent);
1333
1334 *ns = child->p_sibling_ns;
1335 child->p_sibling_ns = NULL;
1336 return;
1337 }
1338 }
1339 }
1340
1341 /*
1342 * Add process "child" to the new state list of process "parent"
1343 */
1344 void
add_ns(proc_t * parent,proc_t * child)1345 add_ns(proc_t *parent, proc_t *child)
1346 {
1347 ASSERT(child->p_sibling_ns == NULL);
1348 child->p_sibling_ns = parent->p_child_ns;
1349 parent->p_child_ns = child;
1350 }
1351