xref: /illumos-gate/usr/src/uts/common/fs/proc/prcontrol.c (revision 3cf7d3e96c394bb30710bd264c0bb61f4646639f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/uio.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/policy.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/file.h>
40 #include <sys/inline.h>
41 #include <sys/kmem.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/regset.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/vfs.h>
48 #include <sys/vnode.h>
49 #include <sys/signal.h>
50 #include <sys/auxv.h>
51 #include <sys/user.h>
52 #include <sys/class.h>
53 #include <sys/fault.h>
54 #include <sys/syscall.h>
55 #include <sys/procfs.h>
56 #include <sys/zone.h>
57 #include <sys/copyops.h>
58 #include <sys/schedctl.h>
59 #include <vm/as.h>
60 #include <vm/seg.h>
61 #include <fs/proc/prdata.h>
62 #include <sys/contract/process_impl.h>
63 
64 static	void	pr_settrace(proc_t *, sigset_t *);
65 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
66 #if defined(__sparc)
67 static	int	pr_setxregs(prnode_t *, prxregset_t *);
68 static	int	pr_setasrs(prnode_t *, asrset_t);
69 #endif
70 static	int	pr_setvaddr(prnode_t *, caddr_t);
71 static	int	pr_clearsig(prnode_t *);
72 static	int	pr_clearflt(prnode_t *);
73 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
74 static	int	pr_agent(prnode_t *, prgregset_t, int *);
75 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
76 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
77 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
78 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
79 static	void	pauselwps(proc_t *);
80 static	void	unpauselwps(proc_t *);
81 
82 typedef union {
83 	long		sig;		/* PCKILL, PCUNKILL */
84 	long		nice;		/* PCNICE */
85 	long		timeo;		/* PCTWSTOP */
86 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
87 	caddr_t		vaddr;		/* PCSVADDR */
88 	siginfo_t	siginfo;	/* PCSSIG */
89 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
90 	fltset_t	fltset;		/* PCSFAULT */
91 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
92 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
93 	prfpregset_t	prfpregset;	/* PCSFPREG */
94 #if defined(__sparc)
95 	prxregset_t	prxregset;	/* PCSXREG */
96 	asrset_t	asrset;		/* PCSASRS */
97 #endif
98 	prwatch_t	prwatch;	/* PCWATCH */
99 	priovec_t	priovec;	/* PCREAD, PCWRITE */
100 	prcred_t	prcred;		/* PCSCRED */
101 	prpriv_t	prpriv;		/* PCSPRIV */
102 	long		przoneid;	/* PCSZONE */
103 } arg_t;
104 
105 static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
106 
107 static size_t
108 ctlsize(long cmd, size_t resid, arg_t *argp)
109 {
110 	size_t size = sizeof (long);
111 	size_t rnd;
112 	int ngrp;
113 
114 	switch (cmd) {
115 	case PCNULL:
116 	case PCSTOP:
117 	case PCDSTOP:
118 	case PCWSTOP:
119 	case PCCSIG:
120 	case PCCFAULT:
121 		break;
122 	case PCSSIG:
123 		size += sizeof (siginfo_t);
124 		break;
125 	case PCTWSTOP:
126 		size += sizeof (long);
127 		break;
128 	case PCKILL:
129 	case PCUNKILL:
130 	case PCNICE:
131 		size += sizeof (long);
132 		break;
133 	case PCRUN:
134 	case PCSET:
135 	case PCUNSET:
136 		size += sizeof (ulong_t);
137 		break;
138 	case PCSVADDR:
139 		size += sizeof (caddr_t);
140 		break;
141 	case PCSTRACE:
142 	case PCSHOLD:
143 		size += sizeof (sigset_t);
144 		break;
145 	case PCSFAULT:
146 		size += sizeof (fltset_t);
147 		break;
148 	case PCSENTRY:
149 	case PCSEXIT:
150 		size += sizeof (sysset_t);
151 		break;
152 	case PCSREG:
153 	case PCAGENT:
154 		size += sizeof (prgregset_t);
155 		break;
156 	case PCSFPREG:
157 		size += sizeof (prfpregset_t);
158 		break;
159 #if defined(__sparc)
160 	case PCSXREG:
161 		size += sizeof (prxregset_t);
162 		break;
163 	case PCSASRS:
164 		size += sizeof (asrset_t);
165 		break;
166 #endif
167 	case PCWATCH:
168 		size += sizeof (prwatch_t);
169 		break;
170 	case PCREAD:
171 	case PCWRITE:
172 		size += sizeof (priovec_t);
173 		break;
174 	case PCSCRED:
175 		size += sizeof (prcred_t);
176 		break;
177 	case PCSCREDX:
178 		/*
179 		 * We cannot derefence the pr_ngroups fields if it
180 		 * we don't have enough data.
181 		 */
182 		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
183 			return (0);
184 		ngrp = argp->prcred.pr_ngroups;
185 		if (ngrp < 0 || ngrp > ngroups_max)
186 			return (0);
187 
188 		/* The result can be smaller than sizeof (prcred_t) */
189 		size += sizeof (prcred_t) - sizeof (gid_t);
190 		size += ngrp * sizeof (gid_t);
191 		break;
192 	case PCSPRIV:
193 		if (resid >= size + sizeof (prpriv_t))
194 			size += priv_prgetprivsize(&argp->prpriv);
195 		else
196 			return (0);
197 		break;
198 	case PCSZONE:
199 		size += sizeof (long);
200 		break;
201 	default:
202 		return (0);
203 	}
204 
205 	/* Round up to a multiple of long, unless exact amount written */
206 	if (size < resid) {
207 		rnd = size & (sizeof (long) - 1);
208 
209 		if (rnd != 0)
210 			size += sizeof (long) - rnd;
211 	}
212 
213 	if (size > resid)
214 		return (0);
215 	return (size);
216 }
217 
218 /*
219  * Control operations (lots).
220  */
221 int
222 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
223 {
224 #define	MY_BUFFER_SIZE \
225 		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
226 		100 : 1 + sizeof (arg_t) / sizeof (long)
227 	long buf[MY_BUFFER_SIZE];
228 	long *bufp;
229 	size_t resid = 0;
230 	size_t size;
231 	prnode_t *pnp = VTOP(vp);
232 	int error;
233 	int locked = 0;
234 
235 	while (uiop->uio_resid) {
236 		/*
237 		 * Read several commands in one gulp.
238 		 */
239 		bufp = buf;
240 		if (resid) {	/* move incomplete command to front of buffer */
241 			long *tail;
242 
243 			if (resid >= sizeof (buf))
244 				break;
245 			tail = (long *)((char *)buf + sizeof (buf) - resid);
246 			do {
247 				*bufp++ = *tail++;
248 			} while ((resid -= sizeof (long)) != 0);
249 		}
250 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
251 		if (resid > uiop->uio_resid)
252 			resid = uiop->uio_resid;
253 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
254 			return (error);
255 		resid += (char *)bufp - (char *)buf;
256 		bufp = buf;
257 
258 		do {		/* loop over commands in buffer */
259 			long cmd = bufp[0];
260 			arg_t *argp = (arg_t *)&bufp[1];
261 
262 			size = ctlsize(cmd, resid, argp);
263 			if (size == 0)	/* incomplete or invalid command */
264 				break;
265 			/*
266 			 * Perform the specified control operation.
267 			 */
268 			if (!locked) {
269 				if ((error = prlock(pnp, ZNO)) != 0)
270 					return (error);
271 				locked = 1;
272 			}
273 			if (error = pr_control(cmd, argp, pnp, cr)) {
274 				if (error == -1)	/* -1 is timeout */
275 					locked = 0;
276 				else
277 					return (error);
278 			}
279 			bufp = (long *)((char *)bufp + size);
280 		} while ((resid -= size) != 0);
281 
282 		if (locked) {
283 			prunlock(pnp);
284 			locked = 0;
285 		}
286 	}
287 	return (resid? EINVAL : 0);
288 }
289 
290 static int
291 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
292 {
293 	prcommon_t *pcp;
294 	proc_t *p;
295 	int unlocked;
296 	int error = 0;
297 
298 	if (cmd == PCNULL)
299 		return (0);
300 
301 	pcp = pnp->pr_common;
302 	p = pcp->prc_proc;
303 	ASSERT(p != NULL);
304 
305 	/* System processes defy control. */
306 	if (p->p_flag & SSYS) {
307 		prunlock(pnp);
308 		return (EBUSY);
309 	}
310 
311 	switch (cmd) {
312 
313 	default:
314 		error = EINVAL;
315 		break;
316 
317 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
318 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
319 	case PCWSTOP:	/* wait for process or lwp to stop */
320 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
321 		{
322 			time_t timeo;
323 
324 			/*
325 			 * Can't apply to a system process.
326 			 */
327 			if (p->p_as == &kas) {
328 				error = EBUSY;
329 				break;
330 			}
331 
332 			if (cmd == PCSTOP || cmd == PCDSTOP)
333 				pr_stop(pnp);
334 
335 			if (cmd == PCDSTOP)
336 				break;
337 
338 			/*
339 			 * If an lwp is waiting for itself or its process,
340 			 * don't wait. The stopped lwp would never see the
341 			 * fact that it is stopped.
342 			 */
343 			if ((pcp->prc_flags & PRC_LWP)?
344 			    (pcp->prc_thread == curthread) : (p == curproc)) {
345 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
346 					error = EBUSY;
347 				break;
348 			}
349 
350 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
351 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
352 				return (error);
353 
354 			break;
355 		}
356 
357 	case PCRUN:	/* make lwp or process runnable */
358 		error = pr_setrun(pnp, argp->flags);
359 		break;
360 
361 	case PCSTRACE:	/* set signal trace mask */
362 		pr_settrace(p,  &argp->sigset);
363 		break;
364 
365 	case PCSSIG:	/* set current signal */
366 		error = pr_setsig(pnp, &argp->siginfo);
367 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
368 			prunlock(pnp);
369 			pr_wait_die(pnp);
370 			return (-1);
371 		}
372 		break;
373 
374 	case PCKILL:	/* send signal */
375 		error = pr_kill(pnp, (int)argp->sig, cr);
376 		if (error == 0 && argp->sig == SIGKILL) {
377 			prunlock(pnp);
378 			pr_wait_die(pnp);
379 			return (-1);
380 		}
381 		break;
382 
383 	case PCUNKILL:	/* delete a pending signal */
384 		error = pr_unkill(pnp, (int)argp->sig);
385 		break;
386 
387 	case PCNICE:	/* set nice priority */
388 		error = pr_nice(p, (int)argp->nice, cr);
389 		break;
390 
391 	case PCSENTRY:	/* set syscall entry bit mask */
392 	case PCSEXIT:	/* set syscall exit bit mask */
393 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
394 		break;
395 
396 	case PCSET:	/* set process flags */
397 		error = pr_set(p, argp->flags);
398 		break;
399 
400 	case PCUNSET:	/* unset process flags */
401 		error = pr_unset(p, argp->flags);
402 		break;
403 
404 	case PCSREG:	/* set general registers */
405 		{
406 			kthread_t *t = pr_thread(pnp);
407 
408 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
409 				thread_unlock(t);
410 				error = EBUSY;
411 			} else {
412 				thread_unlock(t);
413 				mutex_exit(&p->p_lock);
414 				prsetprregs(ttolwp(t), argp->prgregset, 0);
415 				mutex_enter(&p->p_lock);
416 			}
417 			break;
418 		}
419 
420 	case PCSFPREG:	/* set floating-point registers */
421 		error = pr_setfpregs(pnp, &argp->prfpregset);
422 		break;
423 
424 	case PCSXREG:	/* set extra registers */
425 #if defined(__sparc)
426 		error = pr_setxregs(pnp, &argp->prxregset);
427 #else
428 		error = EINVAL;
429 #endif
430 		break;
431 
432 #if defined(__sparc)
433 	case PCSASRS:	/* set ancillary state registers */
434 		error = pr_setasrs(pnp, argp->asrset);
435 		break;
436 #endif
437 
438 	case PCSVADDR:	/* set virtual address at which to resume */
439 		error = pr_setvaddr(pnp, argp->vaddr);
440 		break;
441 
442 	case PCSHOLD:	/* set signal-hold mask */
443 		pr_sethold(pnp, &argp->sigset);
444 		break;
445 
446 	case PCSFAULT:	/* set mask of traced faults */
447 		pr_setfault(p, &argp->fltset);
448 		break;
449 
450 	case PCCSIG:	/* clear current signal */
451 		error = pr_clearsig(pnp);
452 		break;
453 
454 	case PCCFAULT:	/* clear current fault */
455 		error = pr_clearflt(pnp);
456 		break;
457 
458 	case PCWATCH:	/* set or clear watched areas */
459 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
460 		if (error && unlocked)
461 			return (error);
462 		break;
463 
464 	case PCAGENT:	/* create the /proc agent lwp in the target process */
465 		error = pr_agent(pnp, argp->prgregset, &unlocked);
466 		if (error && unlocked)
467 			return (error);
468 		break;
469 
470 	case PCREAD:	/* read from the address space */
471 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
472 		break;
473 
474 	case PCWRITE:	/* write to the address space */
475 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
476 		break;
477 
478 	case PCSCRED:	/* set the process credentials */
479 	case PCSCREDX:
480 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
481 		break;
482 
483 	case PCSPRIV:	/* set the process privileges */
484 		error = pr_spriv(p, &argp->prpriv, cr);
485 		break;
486 	case PCSZONE:	/* set the process's zoneid credentials */
487 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
488 		break;
489 	}
490 
491 	if (error)
492 		prunlock(pnp);
493 	return (error);
494 }
495 
496 #ifdef _SYSCALL32_IMPL
497 
498 typedef union {
499 	int32_t		sig;		/* PCKILL, PCUNKILL */
500 	int32_t		nice;		/* PCNICE */
501 	int32_t		timeo;		/* PCTWSTOP */
502 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
503 	caddr32_t	vaddr;		/* PCSVADDR */
504 	siginfo32_t	siginfo;	/* PCSSIG */
505 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
506 	fltset_t	fltset;		/* PCSFAULT */
507 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
508 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
509 	prfpregset32_t	prfpregset;	/* PCSFPREG */
510 #if defined(__sparc)
511 	prxregset_t	prxregset;	/* PCSXREG */
512 #endif
513 	prwatch32_t	prwatch;	/* PCWATCH */
514 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
515 	prcred32_t	prcred;		/* PCSCRED */
516 	prpriv_t	prpriv;		/* PCSPRIV */
517 	int32_t		przoneid;	/* PCSZONE */
518 } arg32_t;
519 
520 static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
521 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
522 
523 /*
524  * Note that while ctlsize32() can use argp, it must do so only in a way
525  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
526  * to an array of 32-bit values and only 32-bit alignment is ensured.
527  */
528 static size_t
529 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
530 {
531 	size_t size = sizeof (int32_t);
532 	size_t rnd;
533 	int ngrp;
534 
535 	switch (cmd) {
536 	case PCNULL:
537 	case PCSTOP:
538 	case PCDSTOP:
539 	case PCWSTOP:
540 	case PCCSIG:
541 	case PCCFAULT:
542 		break;
543 	case PCSSIG:
544 		size += sizeof (siginfo32_t);
545 		break;
546 	case PCTWSTOP:
547 		size += sizeof (int32_t);
548 		break;
549 	case PCKILL:
550 	case PCUNKILL:
551 	case PCNICE:
552 		size += sizeof (int32_t);
553 		break;
554 	case PCRUN:
555 	case PCSET:
556 	case PCUNSET:
557 		size += sizeof (uint32_t);
558 		break;
559 	case PCSVADDR:
560 		size += sizeof (caddr32_t);
561 		break;
562 	case PCSTRACE:
563 	case PCSHOLD:
564 		size += sizeof (sigset_t);
565 		break;
566 	case PCSFAULT:
567 		size += sizeof (fltset_t);
568 		break;
569 	case PCSENTRY:
570 	case PCSEXIT:
571 		size += sizeof (sysset_t);
572 		break;
573 	case PCSREG:
574 	case PCAGENT:
575 		size += sizeof (prgregset32_t);
576 		break;
577 	case PCSFPREG:
578 		size += sizeof (prfpregset32_t);
579 		break;
580 #if defined(__sparc)
581 	case PCSXREG:
582 		size += sizeof (prxregset_t);
583 		break;
584 #endif
585 	case PCWATCH:
586 		size += sizeof (prwatch32_t);
587 		break;
588 	case PCREAD:
589 	case PCWRITE:
590 		size += sizeof (priovec32_t);
591 		break;
592 	case PCSCRED:
593 		size += sizeof (prcred32_t);
594 		break;
595 	case PCSCREDX:
596 		/*
597 		 * We cannot derefence the pr_ngroups fields if it
598 		 * we don't have enough data.
599 		 */
600 		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
601 			return (0);
602 		ngrp = argp->prcred.pr_ngroups;
603 		if (ngrp < 0 || ngrp > ngroups_max)
604 			return (0);
605 
606 		/* The result can be smaller than sizeof (prcred32_t) */
607 		size += sizeof (prcred32_t) - sizeof (gid32_t);
608 		size += ngrp * sizeof (gid32_t);
609 		break;
610 	case PCSPRIV:
611 		if (resid >= size + sizeof (prpriv_t))
612 			size += priv_prgetprivsize(&argp->prpriv);
613 		else
614 			return (0);
615 		break;
616 	case PCSZONE:
617 		size += sizeof (int32_t);
618 		break;
619 	default:
620 		return (0);
621 	}
622 
623 	/* Round up to a multiple of int32_t */
624 	rnd = size & (sizeof (int32_t) - 1);
625 
626 	if (rnd != 0)
627 		size += sizeof (int32_t) - rnd;
628 
629 	if (size > resid)
630 		return (0);
631 	return (size);
632 }
633 
634 /*
635  * Control operations (lots).
636  */
637 int
638 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
639 {
640 #define	MY_BUFFER_SIZE32 \
641 		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
642 		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
643 	int32_t buf[MY_BUFFER_SIZE32];
644 	int32_t *bufp;
645 	arg32_t arg;
646 	size_t resid = 0;
647 	size_t size;
648 	prnode_t *pnp = VTOP(vp);
649 	int error;
650 	int locked = 0;
651 
652 	while (uiop->uio_resid) {
653 		/*
654 		 * Read several commands in one gulp.
655 		 */
656 		bufp = buf;
657 		if (resid) {	/* move incomplete command to front of buffer */
658 			int32_t *tail;
659 
660 			if (resid >= sizeof (buf))
661 				break;
662 			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
663 			do {
664 				*bufp++ = *tail++;
665 			} while ((resid -= sizeof (int32_t)) != 0);
666 		}
667 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
668 		if (resid > uiop->uio_resid)
669 			resid = uiop->uio_resid;
670 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
671 			return (error);
672 		resid += (char *)bufp - (char *)buf;
673 		bufp = buf;
674 
675 		do {		/* loop over commands in buffer */
676 			int32_t cmd = bufp[0];
677 			arg32_t *argp = (arg32_t *)&bufp[1];
678 
679 			size = ctlsize32(cmd, resid, argp);
680 			if (size == 0)	/* incomplete or invalid command */
681 				break;
682 			/*
683 			 * Perform the specified control operation.
684 			 */
685 			if (!locked) {
686 				if ((error = prlock(pnp, ZNO)) != 0)
687 					return (error);
688 				locked = 1;
689 			}
690 
691 			/*
692 			 * Since some members of the arg32_t union contain
693 			 * 64-bit values (which must be 64-bit aligned), we
694 			 * can't simply pass a pointer to the structure as
695 			 * it may be unaligned. Note that we do pass the
696 			 * potentially unaligned structure to ctlsize32()
697 			 * above, but that uses it a way that makes no
698 			 * assumptions about alignment.
699 			 */
700 			ASSERT(size - sizeof (cmd) <= sizeof (arg));
701 			bcopy(argp, &arg, size - sizeof (cmd));
702 
703 			if (error = pr_control32(cmd, &arg, pnp, cr)) {
704 				if (error == -1)	/* -1 is timeout */
705 					locked = 0;
706 				else
707 					return (error);
708 			}
709 			bufp = (int32_t *)((char *)bufp + size);
710 		} while ((resid -= size) != 0);
711 
712 		if (locked) {
713 			prunlock(pnp);
714 			locked = 0;
715 		}
716 	}
717 	return (resid? EINVAL : 0);
718 }
719 
720 static int
721 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
722 {
723 	prcommon_t *pcp;
724 	proc_t *p;
725 	int unlocked;
726 	int error = 0;
727 
728 	if (cmd == PCNULL)
729 		return (0);
730 
731 	pcp = pnp->pr_common;
732 	p = pcp->prc_proc;
733 	ASSERT(p != NULL);
734 
735 	if (p->p_flag & SSYS) {
736 		prunlock(pnp);
737 		return (EBUSY);
738 	}
739 
740 	switch (cmd) {
741 
742 	default:
743 		error = EINVAL;
744 		break;
745 
746 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
747 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
748 	case PCWSTOP:	/* wait for process or lwp to stop */
749 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
750 		{
751 			time_t timeo;
752 
753 			/*
754 			 * Can't apply to a system process.
755 			 */
756 			if (p->p_as == &kas) {
757 				error = EBUSY;
758 				break;
759 			}
760 
761 			if (cmd == PCSTOP || cmd == PCDSTOP)
762 				pr_stop(pnp);
763 
764 			if (cmd == PCDSTOP)
765 				break;
766 
767 			/*
768 			 * If an lwp is waiting for itself or its process,
769 			 * don't wait. The lwp will never see the fact that
770 			 * itself is stopped.
771 			 */
772 			if ((pcp->prc_flags & PRC_LWP)?
773 			    (pcp->prc_thread == curthread) : (p == curproc)) {
774 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
775 					error = EBUSY;
776 				break;
777 			}
778 
779 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
780 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
781 				return (error);
782 
783 			break;
784 		}
785 
786 	case PCRUN:	/* make lwp or process runnable */
787 		error = pr_setrun(pnp, (ulong_t)argp->flags);
788 		break;
789 
790 	case PCSTRACE:	/* set signal trace mask */
791 		pr_settrace(p,  &argp->sigset);
792 		break;
793 
794 	case PCSSIG:	/* set current signal */
795 		if (PROCESS_NOT_32BIT(p))
796 			error = EOVERFLOW;
797 		else {
798 			int sig = (int)argp->siginfo.si_signo;
799 			siginfo_t siginfo;
800 
801 			bzero(&siginfo, sizeof (siginfo));
802 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
803 			error = pr_setsig(pnp, &siginfo);
804 			if (sig == SIGKILL && error == 0) {
805 				prunlock(pnp);
806 				pr_wait_die(pnp);
807 				return (-1);
808 			}
809 		}
810 		break;
811 
812 	case PCKILL:	/* send signal */
813 		error = pr_kill(pnp, (int)argp->sig, cr);
814 		if (error == 0 && argp->sig == SIGKILL) {
815 			prunlock(pnp);
816 			pr_wait_die(pnp);
817 			return (-1);
818 		}
819 		break;
820 
821 	case PCUNKILL:	/* delete a pending signal */
822 		error = pr_unkill(pnp, (int)argp->sig);
823 		break;
824 
825 	case PCNICE:	/* set nice priority */
826 		error = pr_nice(p, (int)argp->nice, cr);
827 		break;
828 
829 	case PCSENTRY:	/* set syscall entry bit mask */
830 	case PCSEXIT:	/* set syscall exit bit mask */
831 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
832 		break;
833 
834 	case PCSET:	/* set process flags */
835 		error = pr_set(p, (long)argp->flags);
836 		break;
837 
838 	case PCUNSET:	/* unset process flags */
839 		error = pr_unset(p, (long)argp->flags);
840 		break;
841 
842 	case PCSREG:	/* set general registers */
843 		if (PROCESS_NOT_32BIT(p))
844 			error = EOVERFLOW;
845 		else {
846 			kthread_t *t = pr_thread(pnp);
847 
848 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
849 				thread_unlock(t);
850 				error = EBUSY;
851 			} else {
852 				prgregset_t prgregset;
853 				klwp_t *lwp = ttolwp(t);
854 
855 				thread_unlock(t);
856 				mutex_exit(&p->p_lock);
857 				prgregset_32ton(lwp, argp->prgregset,
858 				    prgregset);
859 				prsetprregs(lwp, prgregset, 0);
860 				mutex_enter(&p->p_lock);
861 			}
862 		}
863 		break;
864 
865 	case PCSFPREG:	/* set floating-point registers */
866 		if (PROCESS_NOT_32BIT(p))
867 			error = EOVERFLOW;
868 		else
869 			error = pr_setfpregs32(pnp, &argp->prfpregset);
870 		break;
871 
872 	case PCSXREG:	/* set extra registers */
873 #if defined(__sparc)
874 		if (PROCESS_NOT_32BIT(p))
875 			error = EOVERFLOW;
876 		else
877 			error = pr_setxregs(pnp, &argp->prxregset);
878 #else
879 		error = EINVAL;
880 #endif
881 		break;
882 
883 	case PCSVADDR:	/* set virtual address at which to resume */
884 		if (PROCESS_NOT_32BIT(p))
885 			error = EOVERFLOW;
886 		else
887 			error = pr_setvaddr(pnp,
888 			    (caddr_t)(uintptr_t)argp->vaddr);
889 		break;
890 
891 	case PCSHOLD:	/* set signal-hold mask */
892 		pr_sethold(pnp, &argp->sigset);
893 		break;
894 
895 	case PCSFAULT:	/* set mask of traced faults */
896 		pr_setfault(p, &argp->fltset);
897 		break;
898 
899 	case PCCSIG:	/* clear current signal */
900 		error = pr_clearsig(pnp);
901 		break;
902 
903 	case PCCFAULT:	/* clear current fault */
904 		error = pr_clearflt(pnp);
905 		break;
906 
907 	case PCWATCH:	/* set or clear watched areas */
908 		if (PROCESS_NOT_32BIT(p))
909 			error = EOVERFLOW;
910 		else {
911 			prwatch_t prwatch;
912 
913 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
914 			prwatch.pr_size = argp->prwatch.pr_size;
915 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
916 			prwatch.pr_pad = argp->prwatch.pr_pad;
917 			error = pr_watch(pnp, &prwatch, &unlocked);
918 			if (error && unlocked)
919 				return (error);
920 		}
921 		break;
922 
923 	case PCAGENT:	/* create the /proc agent lwp in the target process */
924 		if (PROCESS_NOT_32BIT(p))
925 			error = EOVERFLOW;
926 		else {
927 			prgregset_t prgregset;
928 			kthread_t *t = pr_thread(pnp);
929 			klwp_t *lwp = ttolwp(t);
930 			thread_unlock(t);
931 			mutex_exit(&p->p_lock);
932 			prgregset_32ton(lwp, argp->prgregset, prgregset);
933 			mutex_enter(&p->p_lock);
934 			error = pr_agent(pnp, prgregset, &unlocked);
935 			if (error && unlocked)
936 				return (error);
937 		}
938 		break;
939 
940 	case PCREAD:	/* read from the address space */
941 	case PCWRITE:	/* write to the address space */
942 		if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
943 			error = EOVERFLOW;
944 		else {
945 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
946 			priovec_t priovec;
947 
948 			priovec.pio_base =
949 			    (void *)(uintptr_t)argp->priovec.pio_base;
950 			priovec.pio_len = (size_t)argp->priovec.pio_len;
951 			priovec.pio_offset = (off_t)
952 			    (uint32_t)argp->priovec.pio_offset;
953 			error = pr_rdwr(p, rw, &priovec);
954 		}
955 		break;
956 
957 	case PCSCRED:	/* set the process credentials */
958 	case PCSCREDX:
959 		{
960 			/*
961 			 * All the fields in these structures are exactly the
962 			 * same and so the structures are compatible.  In case
963 			 * this ever changes, we catch this with the ASSERT
964 			 * below.
965 			 */
966 			prcred_t *prcred = (prcred_t *)&argp->prcred;
967 
968 #ifndef __lint
969 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
970 #endif
971 
972 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
973 			break;
974 		}
975 
976 	case PCSPRIV:	/* set the process privileges */
977 		error = pr_spriv(p, &argp->prpriv, cr);
978 		break;
979 
980 	case PCSZONE:	/* set the process's zoneid */
981 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
982 		break;
983 	}
984 
985 	if (error)
986 		prunlock(pnp);
987 	return (error);
988 }
989 
990 #endif	/* _SYSCALL32_IMPL */
991 
992 /*
993  * Return the specific or chosen thread/lwp for a control operation.
994  * Returns with the thread locked via thread_lock(t).
995  */
996 kthread_t *
997 pr_thread(prnode_t *pnp)
998 {
999 	prcommon_t *pcp = pnp->pr_common;
1000 	kthread_t *t;
1001 
1002 	if (pcp->prc_flags & PRC_LWP) {
1003 		t = pcp->prc_thread;
1004 		ASSERT(t != NULL);
1005 		thread_lock(t);
1006 	} else {
1007 		proc_t *p = pcp->prc_proc;
1008 		t = prchoose(p);	/* returns locked thread */
1009 		ASSERT(t != NULL);
1010 	}
1011 
1012 	return (t);
1013 }
1014 
1015 /*
1016  * Direct the process or lwp to stop.
1017  */
1018 void
1019 pr_stop(prnode_t *pnp)
1020 {
1021 	prcommon_t *pcp = pnp->pr_common;
1022 	proc_t *p = pcp->prc_proc;
1023 	kthread_t *t;
1024 	vnode_t *vp;
1025 
1026 	/*
1027 	 * If already stopped, do nothing; otherwise flag
1028 	 * it to be stopped the next time it tries to run.
1029 	 * If sleeping at interruptible priority, set it
1030 	 * running so it will stop within cv_wait_sig().
1031 	 *
1032 	 * Take care to cooperate with jobcontrol: if an lwp
1033 	 * is stopped due to the default action of a jobcontrol
1034 	 * stop signal, flag it to be stopped the next time it
1035 	 * starts due to a SIGCONT signal.
1036 	 */
1037 	if (pcp->prc_flags & PRC_LWP)
1038 		t = pcp->prc_thread;
1039 	else
1040 		t = p->p_tlist;
1041 	ASSERT(t != NULL);
1042 
1043 	do {
1044 		int notify;
1045 
1046 		notify = 0;
1047 		thread_lock(t);
1048 		if (!ISTOPPED(t)) {
1049 			t->t_proc_flag |= TP_PRSTOP;
1050 			t->t_sig_check = 1;	/* do ISSIG */
1051 		}
1052 
1053 		/* Move the thread from wait queue to run queue */
1054 		if (ISWAITING(t))
1055 			setrun_locked(t);
1056 
1057 		if (ISWAKEABLE(t)) {
1058 			if (t->t_wchan0 == NULL)
1059 				setrun_locked(t);
1060 			else if (!VSTOPPED(t)) {
1061 				/*
1062 				 * Mark it virtually stopped.
1063 				 */
1064 				t->t_proc_flag |= TP_PRVSTOP;
1065 				notify = 1;
1066 			}
1067 		}
1068 		/*
1069 		 * force the thread into the kernel
1070 		 * if it is not already there.
1071 		 */
1072 		prpokethread(t);
1073 		thread_unlock(t);
1074 		if (notify &&
1075 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1076 			prnotify(vp);
1077 		if (pcp->prc_flags & PRC_LWP)
1078 			break;
1079 	} while ((t = t->t_forw) != p->p_tlist);
1080 
1081 	/*
1082 	 * We do this just in case the thread we asked
1083 	 * to stop is in holdlwps() (called from cfork()).
1084 	 */
1085 	cv_broadcast(&p->p_holdlwps);
1086 }
1087 
1088 /*
1089  * Sleep until the lwp stops, but cooperate with
1090  * jobcontrol:  Don't wake up if the lwp is stopped
1091  * due to the default action of a jobcontrol stop signal.
1092  * If this is the process file descriptor, sleep
1093  * until all of the process's lwps stop.
1094  */
1095 int
1096 pr_wait_stop(prnode_t *pnp, time_t timeo)
1097 {
1098 	prcommon_t *pcp = pnp->pr_common;
1099 	proc_t *p = pcp->prc_proc;
1100 	timestruc_t rqtime;
1101 	timestruc_t *rqtp = NULL;
1102 	int timecheck = 0;
1103 	kthread_t *t;
1104 	int error;
1105 
1106 	if (timeo > 0) {	/* millisecond timeout */
1107 		/*
1108 		 * Determine the precise future time of the requested timeout.
1109 		 */
1110 		timestruc_t now;
1111 
1112 		timecheck = timechanged;
1113 		gethrestime(&now);
1114 		rqtp = &rqtime;
1115 		rqtp->tv_sec = timeo / MILLISEC;
1116 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1117 		timespecadd(rqtp, &now);
1118 	}
1119 
1120 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1121 		t = pcp->prc_thread;
1122 		ASSERT(t != NULL);
1123 		thread_lock(t);
1124 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1125 			thread_unlock(t);
1126 			mutex_enter(&pcp->prc_mutex);
1127 			prunlock(pnp);
1128 			error = pr_wait(pcp, rqtp, timecheck);
1129 			if (error)	/* -1 is timeout */
1130 				return (error);
1131 			if ((error = prlock(pnp, ZNO)) != 0)
1132 				return (error);
1133 			ASSERT(p == pcp->prc_proc);
1134 			ASSERT(t == pcp->prc_thread);
1135 			thread_lock(t);
1136 		}
1137 		thread_unlock(t);
1138 	} else {			/* process file descriptor */
1139 		t = prchoose(p);	/* returns locked thread */
1140 		ASSERT(t != NULL);
1141 		ASSERT(MUTEX_HELD(&p->p_lock));
1142 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1143 		    (p->p_flag & SEXITLWPS)) {
1144 			thread_unlock(t);
1145 			mutex_enter(&pcp->prc_mutex);
1146 			prunlock(pnp);
1147 			error = pr_wait(pcp, rqtp, timecheck);
1148 			if (error)	/* -1 is timeout */
1149 				return (error);
1150 			if ((error = prlock(pnp, ZNO)) != 0)
1151 				return (error);
1152 			ASSERT(p == pcp->prc_proc);
1153 			t = prchoose(p);	/* returns locked t */
1154 			ASSERT(t != NULL);
1155 		}
1156 		thread_unlock(t);
1157 	}
1158 
1159 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1160 	    t != NULL && t->t_state != TS_ZOMB);
1161 
1162 	return (0);
1163 }
1164 
1165 int
1166 pr_setrun(prnode_t *pnp, ulong_t flags)
1167 {
1168 	prcommon_t *pcp = pnp->pr_common;
1169 	proc_t *p = pcp->prc_proc;
1170 	kthread_t *t;
1171 	klwp_t *lwp;
1172 
1173 	/*
1174 	 * Cannot set an lwp running if it is not stopped.
1175 	 * Also, no lwp other than the /proc agent lwp can
1176 	 * be set running so long as the /proc agent lwp exists.
1177 	 */
1178 	t = pr_thread(pnp);	/* returns locked thread */
1179 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1180 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1181 	    (p->p_agenttp != NULL &&
1182 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1183 		thread_unlock(t);
1184 		return (EBUSY);
1185 	}
1186 	thread_unlock(t);
1187 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1188 		return (EINVAL);
1189 	lwp = ttolwp(t);
1190 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1191 		/*
1192 		 * Discard current siginfo_t, if any.
1193 		 */
1194 		lwp->lwp_cursig = 0;
1195 		lwp->lwp_extsig = 0;
1196 		if (lwp->lwp_curinfo) {
1197 			siginfofree(lwp->lwp_curinfo);
1198 			lwp->lwp_curinfo = NULL;
1199 		}
1200 	}
1201 	if (flags & PRCFAULT)
1202 		lwp->lwp_curflt = 0;
1203 	/*
1204 	 * We can't hold p->p_lock when we touch the lwp's registers.
1205 	 * It may be swapped out and we will get a page fault.
1206 	 */
1207 	if (flags & PRSTEP) {
1208 		mutex_exit(&p->p_lock);
1209 		prstep(lwp, 0);
1210 		mutex_enter(&p->p_lock);
1211 	}
1212 	if (flags & PRSTOP) {
1213 		t->t_proc_flag |= TP_PRSTOP;
1214 		t->t_sig_check = 1;	/* do ISSIG */
1215 	}
1216 	if (flags & PRSABORT)
1217 		lwp->lwp_sysabort = 1;
1218 	thread_lock(t);
1219 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1220 		/*
1221 		 * Here, we are dealing with a single lwp.
1222 		 */
1223 		if (ISTOPPED(t)) {
1224 			t->t_schedflag |= TS_PSTART;
1225 			t->t_dtrace_stop = 0;
1226 			setrun_locked(t);
1227 		} else if (flags & PRSABORT) {
1228 			t->t_proc_flag &=
1229 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230 			setrun_locked(t);
1231 		} else if (!(flags & PRSTOP)) {
1232 			t->t_proc_flag &=
1233 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1234 		}
1235 		thread_unlock(t);
1236 	} else {
1237 		/*
1238 		 * Here, we are dealing with the whole process.
1239 		 */
1240 		if (ISTOPPED(t)) {
1241 			/*
1242 			 * The representative lwp is stopped on an event
1243 			 * of interest.  We demote it to PR_REQUESTED and
1244 			 * choose another representative lwp.  If the new
1245 			 * representative lwp is not stopped on an event of
1246 			 * interest (other than PR_REQUESTED), we set the
1247 			 * whole process running, else we leave the process
1248 			 * stopped showing the next event of interest.
1249 			 */
1250 			kthread_t *tx = NULL;
1251 
1252 			if (!(flags & PRSABORT) &&
1253 			    t->t_whystop == PR_SYSENTRY &&
1254 			    t->t_whatstop == SYS_lwp_exit)
1255 				tx = t;		/* remember the exiting lwp */
1256 			t->t_whystop = PR_REQUESTED;
1257 			t->t_whatstop = 0;
1258 			thread_unlock(t);
1259 			t = prchoose(p);	/* returns locked t */
1260 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1261 			if (VSTOPPED(t) ||
1262 			    t->t_whystop == PR_REQUESTED) {
1263 				thread_unlock(t);
1264 				allsetrun(p);
1265 			} else {
1266 				thread_unlock(t);
1267 				/*
1268 				 * As a special case, if the old representative
1269 				 * lwp was stopped on entry to _lwp_exit()
1270 				 * (and we are not aborting the system call),
1271 				 * we set the old representative lwp running.
1272 				 * We do this so that the next process stop
1273 				 * will find the exiting lwp gone.
1274 				 */
1275 				if (tx != NULL) {
1276 					thread_lock(tx);
1277 					tx->t_schedflag |= TS_PSTART;
1278 					t->t_dtrace_stop = 0;
1279 					setrun_locked(tx);
1280 					thread_unlock(tx);
1281 				}
1282 			}
1283 		} else {
1284 			/*
1285 			 * No event of interest; set all of the lwps running.
1286 			 */
1287 			if (flags & PRSABORT) {
1288 				t->t_proc_flag &=
1289 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1290 				setrun_locked(t);
1291 			}
1292 			thread_unlock(t);
1293 			allsetrun(p);
1294 		}
1295 	}
1296 	return (0);
1297 }
1298 
1299 /*
1300  * Wait until process/lwp stops or until timer expires.
1301  * Return EINTR for an interruption, -1 for timeout, else 0.
1302  */
1303 int
1304 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1305 	timestruc_t *ts,	/* absolute time of timeout, if any */
1306 	int timecheck)
1307 {
1308 	int rval;
1309 
1310 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1311 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1312 	mutex_exit(&pcp->prc_mutex);
1313 	switch (rval) {
1314 	case 0:
1315 		return (EINTR);
1316 	case -1:
1317 		return (-1);
1318 	default:
1319 		return (0);
1320 	}
1321 }
1322 
1323 /*
1324  * Make all threads in the process runnable.
1325  */
1326 void
1327 allsetrun(proc_t *p)
1328 {
1329 	kthread_t *t;
1330 
1331 	ASSERT(MUTEX_HELD(&p->p_lock));
1332 
1333 	if ((t = p->p_tlist) != NULL) {
1334 		do {
1335 			thread_lock(t);
1336 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1337 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1338 			if (ISTOPPED(t)) {
1339 				t->t_schedflag |= TS_PSTART;
1340 				t->t_dtrace_stop = 0;
1341 				setrun_locked(t);
1342 			}
1343 			thread_unlock(t);
1344 		} while ((t = t->t_forw) != p->p_tlist);
1345 	}
1346 }
1347 
1348 /*
1349  * Wait for the process to die.
1350  * We do this after sending SIGKILL because we know it will
1351  * die soon and we want subsequent operations to return ENOENT.
1352  */
1353 void
1354 pr_wait_die(prnode_t *pnp)
1355 {
1356 	proc_t *p;
1357 
1358 	mutex_enter(&pidlock);
1359 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1360 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1361 			break;
1362 	}
1363 	mutex_exit(&pidlock);
1364 }
1365 
1366 static void
1367 pr_settrace(proc_t *p, sigset_t *sp)
1368 {
1369 	prdelset(sp, SIGKILL);
1370 	prassignset(&p->p_sigmask, sp);
1371 	if (!sigisempty(&p->p_sigmask))
1372 		p->p_proc_flag |= P_PR_TRACE;
1373 	else if (prisempty(&p->p_fltmask)) {
1374 		user_t *up = PTOU(p);
1375 		if (up->u_systrap == 0)
1376 			p->p_proc_flag &= ~P_PR_TRACE;
1377 	}
1378 }
1379 
1380 int
1381 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1382 {
1383 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1384 	int sig = sip->si_signo;
1385 	prcommon_t *pcp = pnp->pr_common;
1386 	proc_t *p = pcp->prc_proc;
1387 	kthread_t *t;
1388 	klwp_t *lwp;
1389 	int error = 0;
1390 
1391 	t = pr_thread(pnp);	/* returns locked thread */
1392 	thread_unlock(t);
1393 	lwp = ttolwp(t);
1394 	if (sig < 0 || sig >= nsig)
1395 		/* Zero allowed here */
1396 		error = EINVAL;
1397 	else if (lwp->lwp_cursig == SIGKILL)
1398 		/* "can't happen", but just in case */
1399 		error = EBUSY;
1400 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1401 		lwp->lwp_extsig = 0;
1402 		/*
1403 		 * Discard current siginfo_t, if any.
1404 		 */
1405 		if (lwp->lwp_curinfo) {
1406 			siginfofree(lwp->lwp_curinfo);
1407 			lwp->lwp_curinfo = NULL;
1408 		}
1409 	} else {
1410 		kthread_t *tx;
1411 		sigqueue_t *sqp;
1412 
1413 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1414 		mutex_exit(&p->p_lock);
1415 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1416 		mutex_enter(&p->p_lock);
1417 
1418 		if (lwp->lwp_curinfo == NULL)
1419 			lwp->lwp_curinfo = sqp;
1420 		else
1421 			kmem_free(sqp, sizeof (sigqueue_t));
1422 		/*
1423 		 * Copy contents of info to current siginfo_t.
1424 		 */
1425 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1426 		    sizeof (lwp->lwp_curinfo->sq_info));
1427 		/*
1428 		 * Prevent contents published by si_zoneid-unaware /proc
1429 		 * consumers from being incorrectly filtered.  Because
1430 		 * an uninitialized si_zoneid is the same as
1431 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1432 		 * process in a non-global zone with a siginfo which
1433 		 * appears to come from the global zone.
1434 		 */
1435 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1436 			lwp->lwp_curinfo->sq_info.si_zoneid =
1437 			    p->p_zone->zone_id;
1438 		/*
1439 		 * Side-effects for SIGKILL and jobcontrol signals.
1440 		 */
1441 		if (sig == SIGKILL) {
1442 			p->p_flag |= SKILLED;
1443 			p->p_flag &= ~SEXTKILLED;
1444 		} else if (sig == SIGCONT) {
1445 			p->p_flag |= SSCONT;
1446 			sigdelq(p, NULL, SIGSTOP);
1447 			sigdelq(p, NULL, SIGTSTP);
1448 			sigdelq(p, NULL, SIGTTOU);
1449 			sigdelq(p, NULL, SIGTTIN);
1450 			sigdiffset(&p->p_sig, &stopdefault);
1451 			sigdiffset(&p->p_extsig, &stopdefault);
1452 			if ((tx = p->p_tlist) != NULL) {
1453 				do {
1454 					sigdelq(p, tx, SIGSTOP);
1455 					sigdelq(p, tx, SIGTSTP);
1456 					sigdelq(p, tx, SIGTTOU);
1457 					sigdelq(p, tx, SIGTTIN);
1458 					sigdiffset(&tx->t_sig, &stopdefault);
1459 					sigdiffset(&tx->t_extsig, &stopdefault);
1460 				} while ((tx = tx->t_forw) != p->p_tlist);
1461 			}
1462 		} else if (sigismember(&stopdefault, sig)) {
1463 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1464 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1465 				p->p_flag &= ~SSCONT;
1466 			sigdelq(p, NULL, SIGCONT);
1467 			sigdelset(&p->p_sig, SIGCONT);
1468 			sigdelset(&p->p_extsig, SIGCONT);
1469 			if ((tx = p->p_tlist) != NULL) {
1470 				do {
1471 					sigdelq(p, tx, SIGCONT);
1472 					sigdelset(&tx->t_sig, SIGCONT);
1473 					sigdelset(&tx->t_extsig, SIGCONT);
1474 				} while ((tx = tx->t_forw) != p->p_tlist);
1475 			}
1476 		}
1477 		thread_lock(t);
1478 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1479 			/* Set signaled sleeping/waiting lwp running */
1480 			setrun_locked(t);
1481 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1482 			/* If SIGKILL, set stopped lwp running */
1483 			p->p_stopsig = 0;
1484 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1485 			t->t_dtrace_stop = 0;
1486 			setrun_locked(t);
1487 		}
1488 		t->t_sig_check = 1;	/* so ISSIG will be done */
1489 		thread_unlock(t);
1490 		/*
1491 		 * More jobcontrol side-effects.
1492 		 */
1493 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1494 			p->p_stopsig = 0;
1495 			do {
1496 				thread_lock(tx);
1497 				if (tx->t_state == TS_STOPPED &&
1498 				    tx->t_whystop == PR_JOBCONTROL) {
1499 					tx->t_schedflag |= TS_XSTART;
1500 					setrun_locked(tx);
1501 				}
1502 				thread_unlock(tx);
1503 			} while ((tx = tx->t_forw) != p->p_tlist);
1504 		}
1505 	}
1506 	return (error);
1507 }
1508 
1509 int
1510 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1511 {
1512 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1513 	prcommon_t *pcp = pnp->pr_common;
1514 	proc_t *p = pcp->prc_proc;
1515 	k_siginfo_t info;
1516 
1517 	if (sig <= 0 || sig >= nsig)
1518 		return (EINVAL);
1519 
1520 	bzero(&info, sizeof (info));
1521 	info.si_signo = sig;
1522 	info.si_code = SI_USER;
1523 	info.si_pid = curproc->p_pid;
1524 	info.si_ctid = PRCTID(curproc);
1525 	info.si_zoneid = getzoneid();
1526 	info.si_uid = crgetruid(cr);
1527 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1528 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1529 
1530 	return (0);
1531 }
1532 
1533 int
1534 pr_unkill(prnode_t *pnp, int sig)
1535 {
1536 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1537 	prcommon_t *pcp = pnp->pr_common;
1538 	proc_t *p = pcp->prc_proc;
1539 	sigqueue_t *infop = NULL;
1540 
1541 	if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1542 		return (EINVAL);
1543 
1544 	if (pcp->prc_flags & PRC_LWP)
1545 		sigdeq(p, pcp->prc_thread, sig, &infop);
1546 	else
1547 		sigdeq(p, NULL, sig, &infop);
1548 
1549 	if (infop)
1550 		siginfofree(infop);
1551 
1552 	return (0);
1553 }
1554 
1555 int
1556 pr_nice(proc_t *p, int nice, cred_t *cr)
1557 {
1558 	kthread_t *t;
1559 	int err;
1560 	int error = 0;
1561 
1562 	t = p->p_tlist;
1563 	do {
1564 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1565 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1566 		schedctl_set_cidpri(t);
1567 		if (error == 0)
1568 			error = err;
1569 	} while ((t = t->t_forw) != p->p_tlist);
1570 
1571 	return (error);
1572 }
1573 
1574 void
1575 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1576 {
1577 	user_t *up = PTOU(p);
1578 
1579 	if (entry) {
1580 		prassignset(&up->u_entrymask, sysset);
1581 	} else {
1582 		prassignset(&up->u_exitmask, sysset);
1583 	}
1584 	if (!prisempty(&up->u_entrymask) ||
1585 	    !prisempty(&up->u_exitmask)) {
1586 		up->u_systrap = 1;
1587 		p->p_proc_flag |= P_PR_TRACE;
1588 		set_proc_sys(p);	/* set pre and post-sys flags */
1589 	} else {
1590 		up->u_systrap = 0;
1591 		if (sigisempty(&p->p_sigmask) &&
1592 		    prisempty(&p->p_fltmask))
1593 			p->p_proc_flag &= ~P_PR_TRACE;
1594 	}
1595 }
1596 
1597 #define	ALLFLAGS	\
1598 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1599 
1600 int
1601 pr_set(proc_t *p, long flags)
1602 {
1603 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1604 		return (EBUSY);
1605 
1606 	if (flags & ~ALLFLAGS)
1607 		return (EINVAL);
1608 
1609 	if (flags & PR_FORK)
1610 		p->p_proc_flag |= P_PR_FORK;
1611 	if (flags & PR_RLC)
1612 		p->p_proc_flag |= P_PR_RUNLCL;
1613 	if (flags & PR_KLC)
1614 		p->p_proc_flag |= P_PR_KILLCL;
1615 	if (flags & PR_ASYNC)
1616 		p->p_proc_flag |= P_PR_ASYNC;
1617 	if (flags & PR_BPTADJ)
1618 		p->p_proc_flag |= P_PR_BPTADJ;
1619 	if (flags & PR_MSACCT)
1620 		if ((p->p_flag & SMSACCT) == 0)
1621 			estimate_msacct(p->p_tlist, gethrtime());
1622 	if (flags & PR_MSFORK)
1623 		p->p_flag |= SMSFORK;
1624 	if (flags & PR_PTRACE) {
1625 		p->p_proc_flag |= P_PR_PTRACE;
1626 		/* ptraced process must die if parent dead */
1627 		if (p->p_ppid == 1)
1628 			sigtoproc(p, NULL, SIGKILL);
1629 	}
1630 
1631 	return (0);
1632 }
1633 
1634 int
1635 pr_unset(proc_t *p, long flags)
1636 {
1637 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1638 		return (EBUSY);
1639 
1640 	if (flags & ~ALLFLAGS)
1641 		return (EINVAL);
1642 
1643 	if (flags & PR_FORK)
1644 		p->p_proc_flag &= ~P_PR_FORK;
1645 	if (flags & PR_RLC)
1646 		p->p_proc_flag &= ~P_PR_RUNLCL;
1647 	if (flags & PR_KLC)
1648 		p->p_proc_flag &= ~P_PR_KILLCL;
1649 	if (flags & PR_ASYNC)
1650 		p->p_proc_flag &= ~P_PR_ASYNC;
1651 	if (flags & PR_BPTADJ)
1652 		p->p_proc_flag &= ~P_PR_BPTADJ;
1653 	if (flags & PR_MSACCT)
1654 		disable_msacct(p);
1655 	if (flags & PR_MSFORK)
1656 		p->p_flag &= ~SMSFORK;
1657 	if (flags & PR_PTRACE)
1658 		p->p_proc_flag &= ~P_PR_PTRACE;
1659 
1660 	return (0);
1661 }
1662 
1663 static int
1664 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1665 {
1666 	proc_t *p = pnp->pr_common->prc_proc;
1667 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1668 
1669 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1670 		thread_unlock(t);
1671 		return (EBUSY);
1672 	}
1673 	if (!prhasfp()) {
1674 		thread_unlock(t);
1675 		return (EINVAL);	/* No FP support */
1676 	}
1677 
1678 	/* drop p_lock while touching the lwp's stack */
1679 	thread_unlock(t);
1680 	mutex_exit(&p->p_lock);
1681 	prsetprfpregs(ttolwp(t), prfpregset);
1682 	mutex_enter(&p->p_lock);
1683 
1684 	return (0);
1685 }
1686 
1687 #ifdef	_SYSCALL32_IMPL
1688 static int
1689 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1690 {
1691 	proc_t *p = pnp->pr_common->prc_proc;
1692 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1693 
1694 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1695 		thread_unlock(t);
1696 		return (EBUSY);
1697 	}
1698 	if (!prhasfp()) {
1699 		thread_unlock(t);
1700 		return (EINVAL);	/* No FP support */
1701 	}
1702 
1703 	/* drop p_lock while touching the lwp's stack */
1704 	thread_unlock(t);
1705 	mutex_exit(&p->p_lock);
1706 	prsetprfpregs32(ttolwp(t), prfpregset);
1707 	mutex_enter(&p->p_lock);
1708 
1709 	return (0);
1710 }
1711 #endif	/* _SYSCALL32_IMPL */
1712 
1713 #if defined(__sparc)
1714 /* ARGSUSED */
1715 static int
1716 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1717 {
1718 	proc_t *p = pnp->pr_common->prc_proc;
1719 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1720 
1721 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1722 		thread_unlock(t);
1723 		return (EBUSY);
1724 	}
1725 	thread_unlock(t);
1726 
1727 	if (!prhasx(p))
1728 		return (EINVAL);	/* No extra register support */
1729 
1730 	/* drop p_lock while touching the lwp's stack */
1731 	mutex_exit(&p->p_lock);
1732 	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1733 	mutex_enter(&p->p_lock);
1734 
1735 	return (0);
1736 }
1737 
1738 static int
1739 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1740 {
1741 	proc_t *p = pnp->pr_common->prc_proc;
1742 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1743 
1744 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1745 		thread_unlock(t);
1746 		return (EBUSY);
1747 	}
1748 	thread_unlock(t);
1749 
1750 	/* drop p_lock while touching the lwp's stack */
1751 	mutex_exit(&p->p_lock);
1752 	prsetasregs(ttolwp(t), asrset);
1753 	mutex_enter(&p->p_lock);
1754 
1755 	return (0);
1756 }
1757 #endif
1758 
1759 static int
1760 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1761 {
1762 	proc_t *p = pnp->pr_common->prc_proc;
1763 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1764 
1765 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1766 		thread_unlock(t);
1767 		return (EBUSY);
1768 	}
1769 
1770 	/* drop p_lock while touching the lwp's stack */
1771 	thread_unlock(t);
1772 	mutex_exit(&p->p_lock);
1773 	prsvaddr(ttolwp(t), vaddr);
1774 	mutex_enter(&p->p_lock);
1775 
1776 	return (0);
1777 }
1778 
1779 void
1780 pr_sethold(prnode_t *pnp, sigset_t *sp)
1781 {
1782 	proc_t *p = pnp->pr_common->prc_proc;
1783 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1784 
1785 	schedctl_finish_sigblock(t);
1786 	sigutok(sp, &t->t_hold);
1787 	if (ISWAKEABLE(t) &&
1788 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1789 		setrun_locked(t);
1790 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1791 	thread_unlock(t);
1792 }
1793 
1794 void
1795 pr_setfault(proc_t *p, fltset_t *fltp)
1796 {
1797 	prassignset(&p->p_fltmask, fltp);
1798 	if (!prisempty(&p->p_fltmask))
1799 		p->p_proc_flag |= P_PR_TRACE;
1800 	else if (sigisempty(&p->p_sigmask)) {
1801 		user_t *up = PTOU(p);
1802 		if (up->u_systrap == 0)
1803 			p->p_proc_flag &= ~P_PR_TRACE;
1804 	}
1805 }
1806 
1807 static int
1808 pr_clearsig(prnode_t *pnp)
1809 {
1810 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1811 	klwp_t *lwp = ttolwp(t);
1812 
1813 	thread_unlock(t);
1814 	if (lwp->lwp_cursig == SIGKILL)
1815 		return (EBUSY);
1816 
1817 	/*
1818 	 * Discard current siginfo_t, if any.
1819 	 */
1820 	lwp->lwp_cursig = 0;
1821 	lwp->lwp_extsig = 0;
1822 	if (lwp->lwp_curinfo) {
1823 		siginfofree(lwp->lwp_curinfo);
1824 		lwp->lwp_curinfo = NULL;
1825 	}
1826 
1827 	return (0);
1828 }
1829 
1830 static int
1831 pr_clearflt(prnode_t *pnp)
1832 {
1833 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1834 
1835 	thread_unlock(t);
1836 	ttolwp(t)->lwp_curflt = 0;
1837 
1838 	return (0);
1839 }
1840 
1841 static int
1842 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1843 {
1844 	proc_t *p = pnp->pr_common->prc_proc;
1845 	struct as *as = p->p_as;
1846 	uintptr_t vaddr = pwp->pr_vaddr;
1847 	size_t size = pwp->pr_size;
1848 	int wflags = pwp->pr_wflags;
1849 	ulong_t newpage = 0;
1850 	struct watched_area *pwa;
1851 	int error;
1852 
1853 	*unlocked = 0;
1854 
1855 	/*
1856 	 * Can't apply to a system process.
1857 	 */
1858 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1859 		return (EBUSY);
1860 
1861 	/*
1862 	 * Verify that the address range does not wrap
1863 	 * and that only the proper flags were specified.
1864 	 */
1865 	if ((wflags & ~WA_TRAPAFTER) == 0)
1866 		size = 0;
1867 	if (vaddr + size < vaddr ||
1868 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1869 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1870 		return (EINVAL);
1871 
1872 	/*
1873 	 * Don't let the address range go above as->a_userlimit.
1874 	 * There is no error here, just a limitation.
1875 	 */
1876 	if (vaddr >= (uintptr_t)as->a_userlimit)
1877 		return (0);
1878 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1879 		size = (uintptr_t)as->a_userlimit - vaddr;
1880 
1881 	/*
1882 	 * Compute maximum number of pages this will add.
1883 	 */
1884 	if ((wflags & ~WA_TRAPAFTER) != 0) {
1885 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1886 		newpage = btopr(pagespan);
1887 		if (newpage > 2 * prnwatch)
1888 			return (E2BIG);
1889 	}
1890 
1891 	/*
1892 	 * Force the process to be fully stopped.
1893 	 */
1894 	if (p == curproc) {
1895 		prunlock(pnp);
1896 		while (holdwatch() != 0)
1897 			continue;
1898 		if ((error = prlock(pnp, ZNO)) != 0) {
1899 			continuelwps(p);
1900 			*unlocked = 1;
1901 			return (error);
1902 		}
1903 	} else {
1904 		pauselwps(p);
1905 		while (pr_allstopped(p, 0) > 0) {
1906 			/*
1907 			 * This cv/mutex pair is persistent even
1908 			 * if the process disappears after we
1909 			 * unmark it and drop p->p_lock.
1910 			 */
1911 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1912 			kmutex_t *mp = &p->p_lock;
1913 
1914 			prunmark(p);
1915 			(void) cv_wait(cv, mp);
1916 			mutex_exit(mp);
1917 			if ((error = prlock(pnp, ZNO)) != 0) {
1918 				/*
1919 				 * Unpause the process if it exists.
1920 				 */
1921 				p = pr_p_lock(pnp);
1922 				mutex_exit(&pr_pidlock);
1923 				if (p != NULL) {
1924 					unpauselwps(p);
1925 					prunlock(pnp);
1926 				}
1927 				*unlocked = 1;
1928 				return (error);
1929 			}
1930 		}
1931 	}
1932 
1933 	/*
1934 	 * Drop p->p_lock in order to perform the rest of this.
1935 	 * The process is still locked with the P_PR_LOCK flag.
1936 	 */
1937 	mutex_exit(&p->p_lock);
1938 
1939 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1940 	pwa->wa_vaddr = (caddr_t)vaddr;
1941 	pwa->wa_eaddr = (caddr_t)vaddr + size;
1942 	pwa->wa_flags = (ulong_t)wflags;
1943 
1944 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1945 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1946 
1947 	if (p == curproc) {
1948 		setallwatch();
1949 		mutex_enter(&p->p_lock);
1950 		continuelwps(p);
1951 	} else {
1952 		mutex_enter(&p->p_lock);
1953 		unpauselwps(p);
1954 	}
1955 
1956 	return (error);
1957 }
1958 
1959 /* jobcontrol stopped, but with a /proc directed stop in effect */
1960 #define	JDSTOPPED(t)	\
1961 	((t)->t_state == TS_STOPPED && \
1962 	(t)->t_whystop == PR_JOBCONTROL && \
1963 	((t)->t_proc_flag & TP_PRSTOP))
1964 
1965 /*
1966  * pr_agent() creates the agent lwp. If the process is exiting while
1967  * we are creating an agent lwp, then exitlwps() waits until the
1968  * agent has been created using prbarrier().
1969  */
1970 static int
1971 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1972 {
1973 	proc_t *p = pnp->pr_common->prc_proc;
1974 	prcommon_t *pcp;
1975 	kthread_t *t;
1976 	kthread_t *ct;
1977 	klwp_t *clwp;
1978 	k_sigset_t smask;
1979 	int cid;
1980 	void *bufp = NULL;
1981 	int error;
1982 
1983 	*unlocked = 0;
1984 
1985 	/*
1986 	 * Cannot create the /proc agent lwp if :-
1987 	 * - the process is not fully stopped or directed to stop.
1988 	 * - there is an agent lwp already.
1989 	 * - the process has been killed.
1990 	 * - the process is exiting.
1991 	 * - it's a vfork(2) parent.
1992 	 */
1993 	t = prchoose(p);	/* returns locked thread */
1994 	ASSERT(t != NULL);
1995 
1996 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1997 	    p->p_agenttp != NULL ||
1998 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1999 		thread_unlock(t);
2000 		return (EBUSY);
2001 	}
2002 
2003 	thread_unlock(t);
2004 	mutex_exit(&p->p_lock);
2005 
2006 	sigfillset(&smask);
2007 	sigdiffset(&smask, &cantmask);
2008 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2009 	    t->t_pri, &smask, NOCLASS, 0);
2010 	if (clwp == NULL) {
2011 		mutex_enter(&p->p_lock);
2012 		return (ENOMEM);
2013 	}
2014 	prsetprregs(clwp, prgregset, 1);
2015 
2016 	/*
2017 	 * Because abandoning the agent inside the target process leads to
2018 	 * a state that is essentially undebuggable, we record the psinfo of
2019 	 * the process creating the agent and hang that off of the lwp.
2020 	 */
2021 	clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2022 	mutex_enter(&curproc->p_lock);
2023 	prgetpsinfo(curproc, clwp->lwp_spymaster);
2024 	mutex_exit(&curproc->p_lock);
2025 
2026 	/*
2027 	 * We overload pr_time in the spymaster to denote the time at which the
2028 	 * agent was created.
2029 	 */
2030 	gethrestime(&clwp->lwp_spymaster->pr_time);
2031 
2032 retry:
2033 	cid = t->t_cid;
2034 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2035 	mutex_enter(&p->p_lock);
2036 	if (cid != t->t_cid) {
2037 		/*
2038 		 * Someone just changed this thread's scheduling class,
2039 		 * so try pre-allocating the buffer again.  Hopefully we
2040 		 * don't hit this often.
2041 		 */
2042 		mutex_exit(&p->p_lock);
2043 		CL_FREE(cid, bufp);
2044 		goto retry;
2045 	}
2046 
2047 	clwp->lwp_ap = clwp->lwp_arg;
2048 	clwp->lwp_eosys = NORMALRETURN;
2049 	ct = lwptot(clwp);
2050 	ct->t_clfuncs = t->t_clfuncs;
2051 	CL_FORK(t, ct, bufp);
2052 	ct->t_cid = t->t_cid;
2053 	ct->t_proc_flag |= TP_PRSTOP;
2054 	/*
2055 	 * Setting t_sysnum to zero causes post_syscall()
2056 	 * to bypass all syscall checks and go directly to
2057 	 *	if (issig()) psig();
2058 	 * so that the agent lwp will stop in issig_forreal()
2059 	 * showing PR_REQUESTED.
2060 	 */
2061 	ct->t_sysnum = 0;
2062 	ct->t_post_sys = 1;
2063 	ct->t_sig_check = 1;
2064 	p->p_agenttp = ct;
2065 	ct->t_proc_flag &= ~TP_HOLDLWP;
2066 
2067 	pcp = pnp->pr_pcommon;
2068 	mutex_enter(&pcp->prc_mutex);
2069 
2070 	lwp_create_done(ct);
2071 
2072 	/*
2073 	 * Don't return until the agent is stopped on PR_REQUESTED.
2074 	 */
2075 
2076 	for (;;) {
2077 		prunlock(pnp);
2078 		*unlocked = 1;
2079 
2080 		/*
2081 		 * Wait for the agent to stop and notify us.
2082 		 * If we've been interrupted, return that information.
2083 		 */
2084 		error = pr_wait(pcp, NULL, 0);
2085 		if (error == EINTR) {
2086 			error = 0;
2087 			break;
2088 		}
2089 
2090 		/*
2091 		 * Confirm that the agent LWP has stopped.
2092 		 */
2093 
2094 		if ((error = prlock(pnp, ZNO)) != 0)
2095 			break;
2096 		*unlocked = 0;
2097 
2098 		/*
2099 		 * Since we dropped the lock on the process, the agent
2100 		 * may have disappeared or changed. Grab the current
2101 		 * agent and check fail if it has disappeared.
2102 		 */
2103 		if ((ct = p->p_agenttp) == NULL) {
2104 			error = ENOENT;
2105 			break;
2106 		}
2107 
2108 		mutex_enter(&pcp->prc_mutex);
2109 		thread_lock(ct);
2110 
2111 		if (ISTOPPED(ct)) {
2112 			thread_unlock(ct);
2113 			mutex_exit(&pcp->prc_mutex);
2114 			break;
2115 		}
2116 
2117 		thread_unlock(ct);
2118 	}
2119 
2120 	return (error ? error : -1);
2121 }
2122 
2123 static int
2124 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2125 {
2126 	caddr_t base = (caddr_t)pio->pio_base;
2127 	size_t cnt = pio->pio_len;
2128 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2129 	struct uio auio;
2130 	struct iovec aiov;
2131 	int error = 0;
2132 
2133 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2134 		error = EIO;
2135 	else if ((base + cnt) < base || (offset + cnt) < offset)
2136 		error = EINVAL;
2137 	else if (cnt != 0) {
2138 		aiov.iov_base = base;
2139 		aiov.iov_len = cnt;
2140 
2141 		auio.uio_loffset = offset;
2142 		auio.uio_iov = &aiov;
2143 		auio.uio_iovcnt = 1;
2144 		auio.uio_resid = cnt;
2145 		auio.uio_segflg = UIO_USERSPACE;
2146 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2147 		auio.uio_fmode = FREAD|FWRITE;
2148 		auio.uio_extflg = UIO_COPY_DEFAULT;
2149 
2150 		mutex_exit(&p->p_lock);
2151 		error = prusrio(p, rw, &auio, 0);
2152 		mutex_enter(&p->p_lock);
2153 
2154 		/*
2155 		 * We have no way to return the i/o count,
2156 		 * like read() or write() would do, so we
2157 		 * return an error if the i/o was truncated.
2158 		 */
2159 		if (auio.uio_resid != 0 && error == 0)
2160 			error = EIO;
2161 	}
2162 
2163 	return (error);
2164 }
2165 
2166 static int
2167 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2168 {
2169 	kthread_t *t;
2170 	cred_t *oldcred;
2171 	cred_t *newcred;
2172 	uid_t oldruid;
2173 	int error;
2174 	zone_t *zone = crgetzone(cr);
2175 
2176 	if (!VALID_UID(prcred->pr_euid, zone) ||
2177 	    !VALID_UID(prcred->pr_ruid, zone) ||
2178 	    !VALID_UID(prcred->pr_suid, zone) ||
2179 	    !VALID_GID(prcred->pr_egid, zone) ||
2180 	    !VALID_GID(prcred->pr_rgid, zone) ||
2181 	    !VALID_GID(prcred->pr_sgid, zone))
2182 		return (EINVAL);
2183 
2184 	if (dogrps) {
2185 		int ngrp = prcred->pr_ngroups;
2186 		int i;
2187 
2188 		if (ngrp < 0 || ngrp > ngroups_max)
2189 			return (EINVAL);
2190 
2191 		for (i = 0; i < ngrp; i++) {
2192 			if (!VALID_GID(prcred->pr_groups[i], zone))
2193 				return (EINVAL);
2194 		}
2195 	}
2196 
2197 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2198 
2199 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2200 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2201 
2202 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2203 	    prcred->pr_suid != prcred->pr_ruid)
2204 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2205 
2206 	if (error)
2207 		return (error);
2208 
2209 	mutex_exit(&p->p_lock);
2210 
2211 	/* hold old cred so it doesn't disappear while we dup it */
2212 	mutex_enter(&p->p_crlock);
2213 	crhold(oldcred = p->p_cred);
2214 	mutex_exit(&p->p_crlock);
2215 	newcred = crdup(oldcred);
2216 	oldruid = crgetruid(oldcred);
2217 	crfree(oldcred);
2218 
2219 	/* Error checking done above */
2220 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2221 	    prcred->pr_suid);
2222 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2223 	    prcred->pr_sgid);
2224 
2225 	if (dogrps) {
2226 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2227 		    prcred->pr_groups);
2228 
2229 	}
2230 
2231 	mutex_enter(&p->p_crlock);
2232 	oldcred = p->p_cred;
2233 	p->p_cred = newcred;
2234 	mutex_exit(&p->p_crlock);
2235 	crfree(oldcred);
2236 
2237 	/*
2238 	 * Keep count of processes per uid consistent.
2239 	 */
2240 	if (oldruid != prcred->pr_ruid) {
2241 		zoneid_t zoneid = crgetzoneid(newcred);
2242 
2243 		mutex_enter(&pidlock);
2244 		upcount_dec(oldruid, zoneid);
2245 		upcount_inc(prcred->pr_ruid, zoneid);
2246 		mutex_exit(&pidlock);
2247 	}
2248 
2249 	/*
2250 	 * Broadcast the cred change to the threads.
2251 	 */
2252 	mutex_enter(&p->p_lock);
2253 	t = p->p_tlist;
2254 	do {
2255 		t->t_pre_sys = 1; /* so syscall will get new cred */
2256 	} while ((t = t->t_forw) != p->p_tlist);
2257 
2258 	return (0);
2259 }
2260 
2261 /*
2262  * Change process credentials to specified zone.  Used to temporarily
2263  * set a process to run in the global zone; only transitions between
2264  * the process's actual zone and the global zone are allowed.
2265  */
2266 static int
2267 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2268 {
2269 	kthread_t *t;
2270 	cred_t *oldcred;
2271 	cred_t *newcred;
2272 	zone_t *zptr;
2273 	zoneid_t oldzoneid;
2274 
2275 	if (secpolicy_zone_config(cr) != 0)
2276 		return (EPERM);
2277 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2278 		return (EINVAL);
2279 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2280 		return (EINVAL);
2281 	mutex_exit(&p->p_lock);
2282 	mutex_enter(&p->p_crlock);
2283 	oldcred = p->p_cred;
2284 	crhold(oldcred);
2285 	mutex_exit(&p->p_crlock);
2286 	newcred = crdup(oldcred);
2287 	oldzoneid = crgetzoneid(oldcred);
2288 	crfree(oldcred);
2289 
2290 	crsetzone(newcred, zptr);
2291 	zone_rele(zptr);
2292 
2293 	mutex_enter(&p->p_crlock);
2294 	oldcred = p->p_cred;
2295 	p->p_cred = newcred;
2296 	mutex_exit(&p->p_crlock);
2297 	crfree(oldcred);
2298 
2299 	/*
2300 	 * The target process is changing zones (according to its cred), so
2301 	 * update the per-zone upcounts, which are based on process creds.
2302 	 */
2303 	if (oldzoneid != zoneid) {
2304 		uid_t ruid = crgetruid(newcred);
2305 
2306 		mutex_enter(&pidlock);
2307 		upcount_dec(ruid, oldzoneid);
2308 		upcount_inc(ruid, zoneid);
2309 		mutex_exit(&pidlock);
2310 	}
2311 	/*
2312 	 * Broadcast the cred change to the threads.
2313 	 */
2314 	mutex_enter(&p->p_lock);
2315 	t = p->p_tlist;
2316 	do {
2317 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2318 	} while ((t = t->t_forw) != p->p_tlist);
2319 
2320 	return (0);
2321 }
2322 
2323 static int
2324 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2325 {
2326 	kthread_t *t;
2327 	int err;
2328 
2329 	ASSERT(MUTEX_HELD(&p->p_lock));
2330 
2331 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2332 		/*
2333 		 * Broadcast the cred change to the threads.
2334 		 */
2335 		t = p->p_tlist;
2336 		do {
2337 			t->t_pre_sys = 1; /* so syscall will get new cred */
2338 		} while ((t = t->t_forw) != p->p_tlist);
2339 	}
2340 
2341 	return (err);
2342 }
2343 
2344 /*
2345  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2346  * terminate or perform an exec(2).
2347  *
2348  * Returns 0 if the process is fully stopped except for the current thread (if
2349  * we are operating on our own process), 1 otherwise.
2350  *
2351  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2352  * See holdwatch() for details.
2353  */
2354 int
2355 pr_allstopped(proc_t *p, int watchstop)
2356 {
2357 	kthread_t *t;
2358 	int rv = 0;
2359 
2360 	ASSERT(MUTEX_HELD(&p->p_lock));
2361 
2362 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2363 		return (-1);
2364 
2365 	if ((t = p->p_tlist) != NULL) {
2366 		do {
2367 			if (t == curthread || VSTOPPED(t) ||
2368 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2369 				continue;
2370 			thread_lock(t);
2371 			switch (t->t_state) {
2372 			case TS_ZOMB:
2373 			case TS_STOPPED:
2374 				break;
2375 			case TS_SLEEP:
2376 				if (!(t->t_flag & T_WAKEABLE) ||
2377 				    t->t_wchan0 == NULL)
2378 					rv = 1;
2379 				break;
2380 			default:
2381 				rv = 1;
2382 				break;
2383 			}
2384 			thread_unlock(t);
2385 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2386 	}
2387 
2388 	return (rv);
2389 }
2390 
2391 /*
2392  * Cause all lwps in the process to pause (for watchpoint operations).
2393  */
2394 static void
2395 pauselwps(proc_t *p)
2396 {
2397 	kthread_t *t;
2398 
2399 	ASSERT(MUTEX_HELD(&p->p_lock));
2400 	ASSERT(p != curproc);
2401 
2402 	if ((t = p->p_tlist) != NULL) {
2403 		do {
2404 			thread_lock(t);
2405 			t->t_proc_flag |= TP_PAUSE;
2406 			aston(t);
2407 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2408 			    ISWAITING(t)) {
2409 				setrun_locked(t);
2410 			}
2411 			prpokethread(t);
2412 			thread_unlock(t);
2413 		} while ((t = t->t_forw) != p->p_tlist);
2414 	}
2415 }
2416 
2417 /*
2418  * undo the effects of pauselwps()
2419  */
2420 static void
2421 unpauselwps(proc_t *p)
2422 {
2423 	kthread_t *t;
2424 
2425 	ASSERT(MUTEX_HELD(&p->p_lock));
2426 	ASSERT(p != curproc);
2427 
2428 	if ((t = p->p_tlist) != NULL) {
2429 		do {
2430 			thread_lock(t);
2431 			t->t_proc_flag &= ~TP_PAUSE;
2432 			if (t->t_state == TS_STOPPED) {
2433 				t->t_schedflag |= TS_UNPAUSE;
2434 				t->t_dtrace_stop = 0;
2435 				setrun_locked(t);
2436 			}
2437 			thread_unlock(t);
2438 		} while ((t = t->t_forw) != p->p_tlist);
2439 	}
2440 }
2441 
2442 /*
2443  * Cancel all watched areas.  Called from prclose().
2444  */
2445 proc_t *
2446 pr_cancel_watch(prnode_t *pnp)
2447 {
2448 	proc_t *p = pnp->pr_pcommon->prc_proc;
2449 	struct as *as;
2450 	kthread_t *t;
2451 
2452 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2453 
2454 	if (!pr_watch_active(p))
2455 		return (p);
2456 
2457 	/*
2458 	 * Pause the process before dealing with the watchpoints.
2459 	 */
2460 	if (p == curproc) {
2461 		prunlock(pnp);
2462 		while (holdwatch() != 0)
2463 			continue;
2464 		p = pr_p_lock(pnp);
2465 		mutex_exit(&pr_pidlock);
2466 		ASSERT(p == curproc);
2467 	} else {
2468 		pauselwps(p);
2469 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2470 			/*
2471 			 * This cv/mutex pair is persistent even
2472 			 * if the process disappears after we
2473 			 * unmark it and drop p->p_lock.
2474 			 */
2475 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2476 			kmutex_t *mp = &p->p_lock;
2477 
2478 			prunmark(p);
2479 			(void) cv_wait(cv, mp);
2480 			mutex_exit(mp);
2481 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2482 			mutex_exit(&pr_pidlock);
2483 		}
2484 	}
2485 
2486 	if (p == NULL)		/* the process disappeared */
2487 		return (NULL);
2488 
2489 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2490 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2491 
2492 	if (pr_watch_active(p)) {
2493 		pr_free_watchpoints(p);
2494 		if ((t = p->p_tlist) != NULL) {
2495 			do {
2496 				watch_disable(t);
2497 
2498 			} while ((t = t->t_forw) != p->p_tlist);
2499 		}
2500 	}
2501 
2502 	if ((as = p->p_as) != NULL) {
2503 		avl_tree_t *tree;
2504 		struct watched_page *pwp;
2505 
2506 		/*
2507 		 * If this is the parent of a vfork, the watched page
2508 		 * list has been moved temporarily to p->p_wpage.
2509 		 */
2510 		if (avl_numnodes(&p->p_wpage) != 0)
2511 			tree = &p->p_wpage;
2512 		else
2513 			tree = &as->a_wpage;
2514 
2515 		mutex_exit(&p->p_lock);
2516 		AS_LOCK_ENTER(as, RW_WRITER);
2517 
2518 		for (pwp = avl_first(tree); pwp != NULL;
2519 		    pwp = AVL_NEXT(tree, pwp)) {
2520 			pwp->wp_read = 0;
2521 			pwp->wp_write = 0;
2522 			pwp->wp_exec = 0;
2523 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2524 				pwp->wp_flags |= WP_SETPROT;
2525 				pwp->wp_prot = pwp->wp_oprot;
2526 				pwp->wp_list = p->p_wprot;
2527 				p->p_wprot = pwp;
2528 			}
2529 		}
2530 
2531 		AS_LOCK_EXIT(as);
2532 		mutex_enter(&p->p_lock);
2533 	}
2534 
2535 	/*
2536 	 * Unpause the process now.
2537 	 */
2538 	if (p == curproc)
2539 		continuelwps(p);
2540 	else
2541 		unpauselwps(p);
2542 
2543 	return (p);
2544 }
2545