xref: /titanic_41/usr/src/uts/common/fs/proc/prcontrol.c (revision dc5982c9e1ebe3315aac975f1c4f1ad46c83c2d2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/uio.h>
29 #include <sys/param.h>
30 #include <sys/cmn_err.h>
31 #include <sys/cred.h>
32 #include <sys/policy.h>
33 #include <sys/debug.h>
34 #include <sys/errno.h>
35 #include <sys/file.h>
36 #include <sys/inline.h>
37 #include <sys/kmem.h>
38 #include <sys/proc.h>
39 #include <sys/brand.h>
40 #include <sys/regset.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/signal.h>
46 #include <sys/auxv.h>
47 #include <sys/user.h>
48 #include <sys/class.h>
49 #include <sys/fault.h>
50 #include <sys/syscall.h>
51 #include <sys/procfs.h>
52 #include <sys/zone.h>
53 #include <sys/copyops.h>
54 #include <sys/schedctl.h>
55 #include <vm/as.h>
56 #include <vm/seg.h>
57 #include <fs/proc/prdata.h>
58 #include <sys/contract/process_impl.h>
59 
60 static	void	pr_settrace(proc_t *, sigset_t *);
61 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
62 #if defined(__sparc)
63 static	int	pr_setxregs(prnode_t *, prxregset_t *);
64 static	int	pr_setasrs(prnode_t *, asrset_t);
65 #endif
66 static	int	pr_setvaddr(prnode_t *, caddr_t);
67 static	int	pr_clearsig(prnode_t *);
68 static	int	pr_clearflt(prnode_t *);
69 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
70 static	int	pr_agent(prnode_t *, prgregset_t, int *);
71 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
72 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
73 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
74 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
75 static	void	pauselwps(proc_t *);
76 static	void	unpauselwps(proc_t *);
77 
78 typedef union {
79 	long		sig;		/* PCKILL, PCUNKILL */
80 	long		nice;		/* PCNICE */
81 	long		timeo;		/* PCTWSTOP */
82 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
83 	caddr_t		vaddr;		/* PCSVADDR */
84 	siginfo_t	siginfo;	/* PCSSIG */
85 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
86 	fltset_t	fltset;		/* PCSFAULT */
87 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
88 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
89 	prfpregset_t	prfpregset;	/* PCSFPREG */
90 #if defined(__sparc)
91 	prxregset_t	prxregset;	/* PCSXREG */
92 	asrset_t	asrset;		/* PCSASRS */
93 #endif
94 	prwatch_t	prwatch;	/* PCWATCH */
95 	priovec_t	priovec;	/* PCREAD, PCWRITE */
96 	prcred_t	prcred;		/* PCSCRED */
97 	prpriv_t	prpriv;		/* PCSPRIV */
98 	long		przoneid;	/* PCSZONE */
99 } arg_t;
100 
101 static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
102 
103 static size_t
104 ctlsize(long cmd, size_t resid, arg_t *argp)
105 {
106 	size_t size = sizeof (long);
107 	size_t rnd;
108 	int ngrp;
109 
110 	switch (cmd) {
111 	case PCNULL:
112 	case PCSTOP:
113 	case PCDSTOP:
114 	case PCWSTOP:
115 	case PCCSIG:
116 	case PCCFAULT:
117 		break;
118 	case PCSSIG:
119 		size += sizeof (siginfo_t);
120 		break;
121 	case PCTWSTOP:
122 		size += sizeof (long);
123 		break;
124 	case PCKILL:
125 	case PCUNKILL:
126 	case PCNICE:
127 		size += sizeof (long);
128 		break;
129 	case PCRUN:
130 	case PCSET:
131 	case PCUNSET:
132 		size += sizeof (ulong_t);
133 		break;
134 	case PCSVADDR:
135 		size += sizeof (caddr_t);
136 		break;
137 	case PCSTRACE:
138 	case PCSHOLD:
139 		size += sizeof (sigset_t);
140 		break;
141 	case PCSFAULT:
142 		size += sizeof (fltset_t);
143 		break;
144 	case PCSENTRY:
145 	case PCSEXIT:
146 		size += sizeof (sysset_t);
147 		break;
148 	case PCSREG:
149 	case PCAGENT:
150 		size += sizeof (prgregset_t);
151 		break;
152 	case PCSFPREG:
153 		size += sizeof (prfpregset_t);
154 		break;
155 #if defined(__sparc)
156 	case PCSXREG:
157 		size += sizeof (prxregset_t);
158 		break;
159 	case PCSASRS:
160 		size += sizeof (asrset_t);
161 		break;
162 #endif
163 	case PCWATCH:
164 		size += sizeof (prwatch_t);
165 		break;
166 	case PCREAD:
167 	case PCWRITE:
168 		size += sizeof (priovec_t);
169 		break;
170 	case PCSCRED:
171 		size += sizeof (prcred_t);
172 		break;
173 	case PCSCREDX:
174 		/*
175 		 * We cannot derefence the pr_ngroups fields if it
176 		 * we don't have enough data.
177 		 */
178 		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
179 			return (0);
180 		ngrp = argp->prcred.pr_ngroups;
181 		if (ngrp < 0 || ngrp > ngroups_max)
182 			return (0);
183 
184 		/* The result can be smaller than sizeof (prcred_t) */
185 		size += sizeof (prcred_t) - sizeof (gid_t);
186 		size += ngrp * sizeof (gid_t);
187 		break;
188 	case PCSPRIV:
189 		if (resid >= size + sizeof (prpriv_t))
190 			size += priv_prgetprivsize(&argp->prpriv);
191 		else
192 			return (0);
193 		break;
194 	case PCSZONE:
195 		size += sizeof (long);
196 		break;
197 	default:
198 		return (0);
199 	}
200 
201 	/* Round up to a multiple of long, unless exact amount written */
202 	if (size < resid) {
203 		rnd = size & (sizeof (long) - 1);
204 
205 		if (rnd != 0)
206 			size += sizeof (long) - rnd;
207 	}
208 
209 	if (size > resid)
210 		return (0);
211 	return (size);
212 }
213 
214 /*
215  * Control operations (lots).
216  */
217 int
218 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
219 {
220 #define	MY_BUFFER_SIZE \
221 		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
222 		100 : 1 + sizeof (arg_t) / sizeof (long)
223 	long buf[MY_BUFFER_SIZE];
224 	long *bufp;
225 	size_t resid = 0;
226 	size_t size;
227 	prnode_t *pnp = VTOP(vp);
228 	int error;
229 	int locked = 0;
230 
231 	while (uiop->uio_resid) {
232 		/*
233 		 * Read several commands in one gulp.
234 		 */
235 		bufp = buf;
236 		if (resid) {	/* move incomplete command to front of buffer */
237 			long *tail;
238 
239 			if (resid >= sizeof (buf))
240 				break;
241 			tail = (long *)((char *)buf + sizeof (buf) - resid);
242 			do {
243 				*bufp++ = *tail++;
244 			} while ((resid -= sizeof (long)) != 0);
245 		}
246 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
247 		if (resid > uiop->uio_resid)
248 			resid = uiop->uio_resid;
249 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
250 			return (error);
251 		resid += (char *)bufp - (char *)buf;
252 		bufp = buf;
253 
254 		do {		/* loop over commands in buffer */
255 			long cmd = bufp[0];
256 			arg_t *argp = (arg_t *)&bufp[1];
257 
258 			size = ctlsize(cmd, resid, argp);
259 			if (size == 0)	/* incomplete or invalid command */
260 				break;
261 			/*
262 			 * Perform the specified control operation.
263 			 */
264 			if (!locked) {
265 				if ((error = prlock(pnp, ZNO)) != 0)
266 					return (error);
267 				locked = 1;
268 			}
269 			if (error = pr_control(cmd, argp, pnp, cr)) {
270 				if (error == -1)	/* -1 is timeout */
271 					locked = 0;
272 				else
273 					return (error);
274 			}
275 			bufp = (long *)((char *)bufp + size);
276 		} while ((resid -= size) != 0);
277 
278 		if (locked) {
279 			prunlock(pnp);
280 			locked = 0;
281 		}
282 	}
283 	return (resid? EINVAL : 0);
284 }
285 
286 static int
287 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
288 {
289 	prcommon_t *pcp;
290 	proc_t *p;
291 	int unlocked;
292 	int error = 0;
293 
294 	if (cmd == PCNULL)
295 		return (0);
296 
297 	pcp = pnp->pr_common;
298 	p = pcp->prc_proc;
299 	ASSERT(p != NULL);
300 
301 	/* System processes defy control. */
302 	if (p->p_flag & SSYS) {
303 		prunlock(pnp);
304 		return (EBUSY);
305 	}
306 
307 	switch (cmd) {
308 
309 	default:
310 		error = EINVAL;
311 		break;
312 
313 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
314 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
315 	case PCWSTOP:	/* wait for process or lwp to stop */
316 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
317 		{
318 			time_t timeo;
319 
320 			/*
321 			 * Can't apply to a system process.
322 			 */
323 			if (p->p_as == &kas) {
324 				error = EBUSY;
325 				break;
326 			}
327 
328 			if (cmd == PCSTOP || cmd == PCDSTOP)
329 				pr_stop(pnp);
330 
331 			if (cmd == PCDSTOP)
332 				break;
333 
334 			/*
335 			 * If an lwp is waiting for itself or its process,
336 			 * don't wait. The stopped lwp would never see the
337 			 * fact that it is stopped.
338 			 */
339 			if ((pcp->prc_flags & PRC_LWP)?
340 			    (pcp->prc_thread == curthread) : (p == curproc)) {
341 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
342 					error = EBUSY;
343 				break;
344 			}
345 
346 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
347 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
348 				return (error);
349 
350 			break;
351 		}
352 
353 	case PCRUN:	/* make lwp or process runnable */
354 		error = pr_setrun(pnp, argp->flags);
355 		break;
356 
357 	case PCSTRACE:	/* set signal trace mask */
358 		pr_settrace(p,  &argp->sigset);
359 		break;
360 
361 	case PCSSIG:	/* set current signal */
362 		error = pr_setsig(pnp, &argp->siginfo);
363 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
364 			prunlock(pnp);
365 			pr_wait_die(pnp);
366 			return (-1);
367 		}
368 		break;
369 
370 	case PCKILL:	/* send signal */
371 		error = pr_kill(pnp, (int)argp->sig, cr);
372 		if (error == 0 && argp->sig == SIGKILL) {
373 			prunlock(pnp);
374 			pr_wait_die(pnp);
375 			return (-1);
376 		}
377 		break;
378 
379 	case PCUNKILL:	/* delete a pending signal */
380 		error = pr_unkill(pnp, (int)argp->sig);
381 		break;
382 
383 	case PCNICE:	/* set nice priority */
384 		error = pr_nice(p, (int)argp->nice, cr);
385 		break;
386 
387 	case PCSENTRY:	/* set syscall entry bit mask */
388 	case PCSEXIT:	/* set syscall exit bit mask */
389 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
390 		break;
391 
392 	case PCSET:	/* set process flags */
393 		error = pr_set(p, argp->flags);
394 		break;
395 
396 	case PCUNSET:	/* unset process flags */
397 		error = pr_unset(p, argp->flags);
398 		break;
399 
400 	case PCSREG:	/* set general registers */
401 		{
402 			kthread_t *t = pr_thread(pnp);
403 
404 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
405 				thread_unlock(t);
406 				error = EBUSY;
407 			} else {
408 				thread_unlock(t);
409 				mutex_exit(&p->p_lock);
410 				prsetprregs(ttolwp(t), argp->prgregset, 0);
411 				mutex_enter(&p->p_lock);
412 			}
413 			break;
414 		}
415 
416 	case PCSFPREG:	/* set floating-point registers */
417 		error = pr_setfpregs(pnp, &argp->prfpregset);
418 		break;
419 
420 	case PCSXREG:	/* set extra registers */
421 #if defined(__sparc)
422 		error = pr_setxregs(pnp, &argp->prxregset);
423 #else
424 		error = EINVAL;
425 #endif
426 		break;
427 
428 #if defined(__sparc)
429 	case PCSASRS:	/* set ancillary state registers */
430 		error = pr_setasrs(pnp, argp->asrset);
431 		break;
432 #endif
433 
434 	case PCSVADDR:	/* set virtual address at which to resume */
435 		error = pr_setvaddr(pnp, argp->vaddr);
436 		break;
437 
438 	case PCSHOLD:	/* set signal-hold mask */
439 		pr_sethold(pnp, &argp->sigset);
440 		break;
441 
442 	case PCSFAULT:	/* set mask of traced faults */
443 		pr_setfault(p, &argp->fltset);
444 		break;
445 
446 	case PCCSIG:	/* clear current signal */
447 		error = pr_clearsig(pnp);
448 		break;
449 
450 	case PCCFAULT:	/* clear current fault */
451 		error = pr_clearflt(pnp);
452 		break;
453 
454 	case PCWATCH:	/* set or clear watched areas */
455 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
456 		if (error && unlocked)
457 			return (error);
458 		break;
459 
460 	case PCAGENT:	/* create the /proc agent lwp in the target process */
461 		error = pr_agent(pnp, argp->prgregset, &unlocked);
462 		if (error && unlocked)
463 			return (error);
464 		break;
465 
466 	case PCREAD:	/* read from the address space */
467 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
468 		break;
469 
470 	case PCWRITE:	/* write to the address space */
471 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
472 		break;
473 
474 	case PCSCRED:	/* set the process credentials */
475 	case PCSCREDX:
476 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
477 		break;
478 
479 	case PCSPRIV:	/* set the process privileges */
480 		error = pr_spriv(p, &argp->prpriv, cr);
481 		break;
482 	case PCSZONE:	/* set the process's zoneid credentials */
483 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
484 		break;
485 	}
486 
487 	if (error)
488 		prunlock(pnp);
489 	return (error);
490 }
491 
492 #ifdef _SYSCALL32_IMPL
493 
494 typedef union {
495 	int32_t		sig;		/* PCKILL, PCUNKILL */
496 	int32_t		nice;		/* PCNICE */
497 	int32_t		timeo;		/* PCTWSTOP */
498 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
499 	caddr32_t	vaddr;		/* PCSVADDR */
500 	siginfo32_t	siginfo;	/* PCSSIG */
501 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
502 	fltset_t	fltset;		/* PCSFAULT */
503 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
504 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
505 	prfpregset32_t	prfpregset;	/* PCSFPREG */
506 #if defined(__sparc)
507 	prxregset_t	prxregset;	/* PCSXREG */
508 #endif
509 	prwatch32_t	prwatch;	/* PCWATCH */
510 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
511 	prcred32_t	prcred;		/* PCSCRED */
512 	prpriv_t	prpriv;		/* PCSPRIV */
513 	int32_t		przoneid;	/* PCSZONE */
514 } arg32_t;
515 
516 static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
517 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
518 
519 /*
520  * Note that while ctlsize32() can use argp, it must do so only in a way
521  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
522  * to an array of 32-bit values and only 32-bit alignment is ensured.
523  */
524 static size_t
525 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
526 {
527 	size_t size = sizeof (int32_t);
528 	size_t rnd;
529 	int ngrp;
530 
531 	switch (cmd) {
532 	case PCNULL:
533 	case PCSTOP:
534 	case PCDSTOP:
535 	case PCWSTOP:
536 	case PCCSIG:
537 	case PCCFAULT:
538 		break;
539 	case PCSSIG:
540 		size += sizeof (siginfo32_t);
541 		break;
542 	case PCTWSTOP:
543 		size += sizeof (int32_t);
544 		break;
545 	case PCKILL:
546 	case PCUNKILL:
547 	case PCNICE:
548 		size += sizeof (int32_t);
549 		break;
550 	case PCRUN:
551 	case PCSET:
552 	case PCUNSET:
553 		size += sizeof (uint32_t);
554 		break;
555 	case PCSVADDR:
556 		size += sizeof (caddr32_t);
557 		break;
558 	case PCSTRACE:
559 	case PCSHOLD:
560 		size += sizeof (sigset_t);
561 		break;
562 	case PCSFAULT:
563 		size += sizeof (fltset_t);
564 		break;
565 	case PCSENTRY:
566 	case PCSEXIT:
567 		size += sizeof (sysset_t);
568 		break;
569 	case PCSREG:
570 	case PCAGENT:
571 		size += sizeof (prgregset32_t);
572 		break;
573 	case PCSFPREG:
574 		size += sizeof (prfpregset32_t);
575 		break;
576 #if defined(__sparc)
577 	case PCSXREG:
578 		size += sizeof (prxregset_t);
579 		break;
580 #endif
581 	case PCWATCH:
582 		size += sizeof (prwatch32_t);
583 		break;
584 	case PCREAD:
585 	case PCWRITE:
586 		size += sizeof (priovec32_t);
587 		break;
588 	case PCSCRED:
589 		size += sizeof (prcred32_t);
590 		break;
591 	case PCSCREDX:
592 		/*
593 		 * We cannot derefence the pr_ngroups fields if it
594 		 * we don't have enough data.
595 		 */
596 		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
597 			return (0);
598 		ngrp = argp->prcred.pr_ngroups;
599 		if (ngrp < 0 || ngrp > ngroups_max)
600 			return (0);
601 
602 		/* The result can be smaller than sizeof (prcred32_t) */
603 		size += sizeof (prcred32_t) - sizeof (gid32_t);
604 		size += ngrp * sizeof (gid32_t);
605 		break;
606 	case PCSPRIV:
607 		if (resid >= size + sizeof (prpriv_t))
608 			size += priv_prgetprivsize(&argp->prpriv);
609 		else
610 			return (0);
611 		break;
612 	case PCSZONE:
613 		size += sizeof (int32_t);
614 		break;
615 	default:
616 		return (0);
617 	}
618 
619 	/* Round up to a multiple of int32_t */
620 	rnd = size & (sizeof (int32_t) - 1);
621 
622 	if (rnd != 0)
623 		size += sizeof (int32_t) - rnd;
624 
625 	if (size > resid)
626 		return (0);
627 	return (size);
628 }
629 
630 /*
631  * Control operations (lots).
632  */
633 int
634 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
635 {
636 #define	MY_BUFFER_SIZE32 \
637 		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
638 		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
639 	int32_t buf[MY_BUFFER_SIZE32];
640 	int32_t *bufp;
641 	arg32_t arg;
642 	size_t resid = 0;
643 	size_t size;
644 	prnode_t *pnp = VTOP(vp);
645 	int error;
646 	int locked = 0;
647 
648 	while (uiop->uio_resid) {
649 		/*
650 		 * Read several commands in one gulp.
651 		 */
652 		bufp = buf;
653 		if (resid) {	/* move incomplete command to front of buffer */
654 			int32_t *tail;
655 
656 			if (resid >= sizeof (buf))
657 				break;
658 			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
659 			do {
660 				*bufp++ = *tail++;
661 			} while ((resid -= sizeof (int32_t)) != 0);
662 		}
663 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
664 		if (resid > uiop->uio_resid)
665 			resid = uiop->uio_resid;
666 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
667 			return (error);
668 		resid += (char *)bufp - (char *)buf;
669 		bufp = buf;
670 
671 		do {		/* loop over commands in buffer */
672 			int32_t cmd = bufp[0];
673 			arg32_t *argp = (arg32_t *)&bufp[1];
674 
675 			size = ctlsize32(cmd, resid, argp);
676 			if (size == 0)	/* incomplete or invalid command */
677 				break;
678 			/*
679 			 * Perform the specified control operation.
680 			 */
681 			if (!locked) {
682 				if ((error = prlock(pnp, ZNO)) != 0)
683 					return (error);
684 				locked = 1;
685 			}
686 
687 			/*
688 			 * Since some members of the arg32_t union contain
689 			 * 64-bit values (which must be 64-bit aligned), we
690 			 * can't simply pass a pointer to the structure as
691 			 * it may be unaligned. Note that we do pass the
692 			 * potentially unaligned structure to ctlsize32()
693 			 * above, but that uses it a way that makes no
694 			 * assumptions about alignment.
695 			 */
696 			ASSERT(size - sizeof (cmd) <= sizeof (arg));
697 			bcopy(argp, &arg, size - sizeof (cmd));
698 
699 			if (error = pr_control32(cmd, &arg, pnp, cr)) {
700 				if (error == -1)	/* -1 is timeout */
701 					locked = 0;
702 				else
703 					return (error);
704 			}
705 			bufp = (int32_t *)((char *)bufp + size);
706 		} while ((resid -= size) != 0);
707 
708 		if (locked) {
709 			prunlock(pnp);
710 			locked = 0;
711 		}
712 	}
713 	return (resid? EINVAL : 0);
714 }
715 
716 static int
717 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
718 {
719 	prcommon_t *pcp;
720 	proc_t *p;
721 	int unlocked;
722 	int error = 0;
723 
724 	if (cmd == PCNULL)
725 		return (0);
726 
727 	pcp = pnp->pr_common;
728 	p = pcp->prc_proc;
729 	ASSERT(p != NULL);
730 
731 	if (p->p_flag & SSYS) {
732 		prunlock(pnp);
733 		return (EBUSY);
734 	}
735 
736 	switch (cmd) {
737 
738 	default:
739 		error = EINVAL;
740 		break;
741 
742 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
743 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
744 	case PCWSTOP:	/* wait for process or lwp to stop */
745 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
746 		{
747 			time_t timeo;
748 
749 			/*
750 			 * Can't apply to a system process.
751 			 */
752 			if (p->p_as == &kas) {
753 				error = EBUSY;
754 				break;
755 			}
756 
757 			if (cmd == PCSTOP || cmd == PCDSTOP)
758 				pr_stop(pnp);
759 
760 			if (cmd == PCDSTOP)
761 				break;
762 
763 			/*
764 			 * If an lwp is waiting for itself or its process,
765 			 * don't wait. The lwp will never see the fact that
766 			 * itself is stopped.
767 			 */
768 			if ((pcp->prc_flags & PRC_LWP)?
769 			    (pcp->prc_thread == curthread) : (p == curproc)) {
770 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
771 					error = EBUSY;
772 				break;
773 			}
774 
775 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
776 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
777 				return (error);
778 
779 			break;
780 		}
781 
782 	case PCRUN:	/* make lwp or process runnable */
783 		error = pr_setrun(pnp, (ulong_t)argp->flags);
784 		break;
785 
786 	case PCSTRACE:	/* set signal trace mask */
787 		pr_settrace(p,  &argp->sigset);
788 		break;
789 
790 	case PCSSIG:	/* set current signal */
791 		if (PROCESS_NOT_32BIT(p))
792 			error = EOVERFLOW;
793 		else {
794 			int sig = (int)argp->siginfo.si_signo;
795 			siginfo_t siginfo;
796 
797 			bzero(&siginfo, sizeof (siginfo));
798 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
799 			error = pr_setsig(pnp, &siginfo);
800 			if (sig == SIGKILL && error == 0) {
801 				prunlock(pnp);
802 				pr_wait_die(pnp);
803 				return (-1);
804 			}
805 		}
806 		break;
807 
808 	case PCKILL:	/* send signal */
809 		error = pr_kill(pnp, (int)argp->sig, cr);
810 		if (error == 0 && argp->sig == SIGKILL) {
811 			prunlock(pnp);
812 			pr_wait_die(pnp);
813 			return (-1);
814 		}
815 		break;
816 
817 	case PCUNKILL:	/* delete a pending signal */
818 		error = pr_unkill(pnp, (int)argp->sig);
819 		break;
820 
821 	case PCNICE:	/* set nice priority */
822 		error = pr_nice(p, (int)argp->nice, cr);
823 		break;
824 
825 	case PCSENTRY:	/* set syscall entry bit mask */
826 	case PCSEXIT:	/* set syscall exit bit mask */
827 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
828 		break;
829 
830 	case PCSET:	/* set process flags */
831 		error = pr_set(p, (long)argp->flags);
832 		break;
833 
834 	case PCUNSET:	/* unset process flags */
835 		error = pr_unset(p, (long)argp->flags);
836 		break;
837 
838 	case PCSREG:	/* set general registers */
839 		if (PROCESS_NOT_32BIT(p))
840 			error = EOVERFLOW;
841 		else {
842 			kthread_t *t = pr_thread(pnp);
843 
844 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
845 				thread_unlock(t);
846 				error = EBUSY;
847 			} else {
848 				prgregset_t prgregset;
849 				klwp_t *lwp = ttolwp(t);
850 
851 				thread_unlock(t);
852 				mutex_exit(&p->p_lock);
853 				prgregset_32ton(lwp, argp->prgregset,
854 				    prgregset);
855 				prsetprregs(lwp, prgregset, 0);
856 				mutex_enter(&p->p_lock);
857 			}
858 		}
859 		break;
860 
861 	case PCSFPREG:	/* set floating-point registers */
862 		if (PROCESS_NOT_32BIT(p))
863 			error = EOVERFLOW;
864 		else
865 			error = pr_setfpregs32(pnp, &argp->prfpregset);
866 		break;
867 
868 	case PCSXREG:	/* set extra registers */
869 #if defined(__sparc)
870 		if (PROCESS_NOT_32BIT(p))
871 			error = EOVERFLOW;
872 		else
873 			error = pr_setxregs(pnp, &argp->prxregset);
874 #else
875 		error = EINVAL;
876 #endif
877 		break;
878 
879 	case PCSVADDR:	/* set virtual address at which to resume */
880 		if (PROCESS_NOT_32BIT(p))
881 			error = EOVERFLOW;
882 		else
883 			error = pr_setvaddr(pnp,
884 			    (caddr_t)(uintptr_t)argp->vaddr);
885 		break;
886 
887 	case PCSHOLD:	/* set signal-hold mask */
888 		pr_sethold(pnp, &argp->sigset);
889 		break;
890 
891 	case PCSFAULT:	/* set mask of traced faults */
892 		pr_setfault(p, &argp->fltset);
893 		break;
894 
895 	case PCCSIG:	/* clear current signal */
896 		error = pr_clearsig(pnp);
897 		break;
898 
899 	case PCCFAULT:	/* clear current fault */
900 		error = pr_clearflt(pnp);
901 		break;
902 
903 	case PCWATCH:	/* set or clear watched areas */
904 		if (PROCESS_NOT_32BIT(p))
905 			error = EOVERFLOW;
906 		else {
907 			prwatch_t prwatch;
908 
909 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
910 			prwatch.pr_size = argp->prwatch.pr_size;
911 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
912 			prwatch.pr_pad = argp->prwatch.pr_pad;
913 			error = pr_watch(pnp, &prwatch, &unlocked);
914 			if (error && unlocked)
915 				return (error);
916 		}
917 		break;
918 
919 	case PCAGENT:	/* create the /proc agent lwp in the target process */
920 		if (PROCESS_NOT_32BIT(p))
921 			error = EOVERFLOW;
922 		else {
923 			prgregset_t prgregset;
924 			kthread_t *t = pr_thread(pnp);
925 			klwp_t *lwp = ttolwp(t);
926 			thread_unlock(t);
927 			mutex_exit(&p->p_lock);
928 			prgregset_32ton(lwp, argp->prgregset, prgregset);
929 			mutex_enter(&p->p_lock);
930 			error = pr_agent(pnp, prgregset, &unlocked);
931 			if (error && unlocked)
932 				return (error);
933 		}
934 		break;
935 
936 	case PCREAD:	/* read from the address space */
937 	case PCWRITE:	/* write to the address space */
938 		if (PROCESS_NOT_32BIT(p))
939 			error = EOVERFLOW;
940 		else {
941 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
942 			priovec_t priovec;
943 
944 			priovec.pio_base =
945 			    (void *)(uintptr_t)argp->priovec.pio_base;
946 			priovec.pio_len = (size_t)argp->priovec.pio_len;
947 			priovec.pio_offset = (off_t)
948 			    (uint32_t)argp->priovec.pio_offset;
949 			error = pr_rdwr(p, rw, &priovec);
950 		}
951 		break;
952 
953 	case PCSCRED:	/* set the process credentials */
954 	case PCSCREDX:
955 		{
956 			/*
957 			 * All the fields in these structures are exactly the
958 			 * same and so the structures are compatible.  In case
959 			 * this ever changes, we catch this with the ASSERT
960 			 * below.
961 			 */
962 			prcred_t *prcred = (prcred_t *)&argp->prcred;
963 
964 #ifndef __lint
965 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
966 #endif
967 
968 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
969 			break;
970 		}
971 
972 	case PCSPRIV:	/* set the process privileges */
973 		error = pr_spriv(p, &argp->prpriv, cr);
974 		break;
975 
976 	case PCSZONE:	/* set the process's zoneid */
977 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
978 		break;
979 	}
980 
981 	if (error)
982 		prunlock(pnp);
983 	return (error);
984 }
985 
986 #endif	/* _SYSCALL32_IMPL */
987 
988 /*
989  * Return the specific or chosen thread/lwp for a control operation.
990  * Returns with the thread locked via thread_lock(t).
991  */
992 kthread_t *
993 pr_thread(prnode_t *pnp)
994 {
995 	prcommon_t *pcp = pnp->pr_common;
996 	kthread_t *t;
997 
998 	if (pcp->prc_flags & PRC_LWP) {
999 		t = pcp->prc_thread;
1000 		ASSERT(t != NULL);
1001 		thread_lock(t);
1002 	} else {
1003 		proc_t *p = pcp->prc_proc;
1004 		t = prchoose(p);	/* returns locked thread */
1005 		ASSERT(t != NULL);
1006 	}
1007 
1008 	return (t);
1009 }
1010 
1011 /*
1012  * Direct the process or lwp to stop.
1013  */
1014 void
1015 pr_stop(prnode_t *pnp)
1016 {
1017 	prcommon_t *pcp = pnp->pr_common;
1018 	proc_t *p = pcp->prc_proc;
1019 	kthread_t *t;
1020 	vnode_t *vp;
1021 
1022 	/*
1023 	 * If already stopped, do nothing; otherwise flag
1024 	 * it to be stopped the next time it tries to run.
1025 	 * If sleeping at interruptible priority, set it
1026 	 * running so it will stop within cv_wait_sig().
1027 	 *
1028 	 * Take care to cooperate with jobcontrol: if an lwp
1029 	 * is stopped due to the default action of a jobcontrol
1030 	 * stop signal, flag it to be stopped the next time it
1031 	 * starts due to a SIGCONT signal.
1032 	 */
1033 	if (pcp->prc_flags & PRC_LWP)
1034 		t = pcp->prc_thread;
1035 	else
1036 		t = p->p_tlist;
1037 	ASSERT(t != NULL);
1038 
1039 	do {
1040 		int notify;
1041 
1042 		notify = 0;
1043 		thread_lock(t);
1044 		if (!ISTOPPED(t)) {
1045 			t->t_proc_flag |= TP_PRSTOP;
1046 			t->t_sig_check = 1;	/* do ISSIG */
1047 		}
1048 
1049 		/* Move the thread from wait queue to run queue */
1050 		if (ISWAITING(t))
1051 			setrun_locked(t);
1052 
1053 		if (ISWAKEABLE(t)) {
1054 			if (t->t_wchan0 == NULL)
1055 				setrun_locked(t);
1056 			else if (!VSTOPPED(t)) {
1057 				/*
1058 				 * Mark it virtually stopped.
1059 				 */
1060 				t->t_proc_flag |= TP_PRVSTOP;
1061 				notify = 1;
1062 			}
1063 		}
1064 		/*
1065 		 * force the thread into the kernel
1066 		 * if it is not already there.
1067 		 */
1068 		prpokethread(t);
1069 		thread_unlock(t);
1070 		if (notify &&
1071 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1072 			prnotify(vp);
1073 		if (pcp->prc_flags & PRC_LWP)
1074 			break;
1075 	} while ((t = t->t_forw) != p->p_tlist);
1076 
1077 	/*
1078 	 * We do this just in case the thread we asked
1079 	 * to stop is in holdlwps() (called from cfork()).
1080 	 */
1081 	cv_broadcast(&p->p_holdlwps);
1082 }
1083 
1084 /*
1085  * Sleep until the lwp stops, but cooperate with
1086  * jobcontrol:  Don't wake up if the lwp is stopped
1087  * due to the default action of a jobcontrol stop signal.
1088  * If this is the process file descriptor, sleep
1089  * until all of the process's lwps stop.
1090  */
1091 int
1092 pr_wait_stop(prnode_t *pnp, time_t timeo)
1093 {
1094 	prcommon_t *pcp = pnp->pr_common;
1095 	proc_t *p = pcp->prc_proc;
1096 	timestruc_t rqtime;
1097 	timestruc_t *rqtp = NULL;
1098 	int timecheck = 0;
1099 	kthread_t *t;
1100 	int error;
1101 
1102 	if (timeo > 0) {	/* millisecond timeout */
1103 		/*
1104 		 * Determine the precise future time of the requested timeout.
1105 		 */
1106 		timestruc_t now;
1107 
1108 		timecheck = timechanged;
1109 		gethrestime(&now);
1110 		rqtp = &rqtime;
1111 		rqtp->tv_sec = timeo / MILLISEC;
1112 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1113 		timespecadd(rqtp, &now);
1114 	}
1115 
1116 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1117 		t = pcp->prc_thread;
1118 		ASSERT(t != NULL);
1119 		thread_lock(t);
1120 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1121 			thread_unlock(t);
1122 			mutex_enter(&pcp->prc_mutex);
1123 			prunlock(pnp);
1124 			error = pr_wait(pcp, rqtp, timecheck);
1125 			if (error)	/* -1 is timeout */
1126 				return (error);
1127 			if ((error = prlock(pnp, ZNO)) != 0)
1128 				return (error);
1129 			ASSERT(p == pcp->prc_proc);
1130 			ASSERT(t == pcp->prc_thread);
1131 			thread_lock(t);
1132 		}
1133 		thread_unlock(t);
1134 	} else {			/* process file descriptor */
1135 		t = prchoose(p);	/* returns locked thread */
1136 		ASSERT(t != NULL);
1137 		ASSERT(MUTEX_HELD(&p->p_lock));
1138 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1139 		    (p->p_flag & SEXITLWPS)) {
1140 			thread_unlock(t);
1141 			mutex_enter(&pcp->prc_mutex);
1142 			prunlock(pnp);
1143 			error = pr_wait(pcp, rqtp, timecheck);
1144 			if (error)	/* -1 is timeout */
1145 				return (error);
1146 			if ((error = prlock(pnp, ZNO)) != 0)
1147 				return (error);
1148 			ASSERT(p == pcp->prc_proc);
1149 			t = prchoose(p);	/* returns locked t */
1150 			ASSERT(t != NULL);
1151 		}
1152 		thread_unlock(t);
1153 	}
1154 
1155 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1156 	    t != NULL && t->t_state != TS_ZOMB);
1157 
1158 	return (0);
1159 }
1160 
1161 int
1162 pr_setrun(prnode_t *pnp, ulong_t flags)
1163 {
1164 	prcommon_t *pcp = pnp->pr_common;
1165 	proc_t *p = pcp->prc_proc;
1166 	kthread_t *t;
1167 	klwp_t *lwp;
1168 
1169 	/*
1170 	 * Cannot set an lwp running if it is not stopped.
1171 	 * Also, no lwp other than the /proc agent lwp can
1172 	 * be set running so long as the /proc agent lwp exists.
1173 	 */
1174 	t = pr_thread(pnp);	/* returns locked thread */
1175 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1176 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1177 	    (p->p_agenttp != NULL &&
1178 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1179 		thread_unlock(t);
1180 		return (EBUSY);
1181 	}
1182 	thread_unlock(t);
1183 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1184 		return (EINVAL);
1185 	lwp = ttolwp(t);
1186 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1187 		/*
1188 		 * Discard current siginfo_t, if any.
1189 		 */
1190 		lwp->lwp_cursig = 0;
1191 		lwp->lwp_extsig = 0;
1192 		if (lwp->lwp_curinfo) {
1193 			siginfofree(lwp->lwp_curinfo);
1194 			lwp->lwp_curinfo = NULL;
1195 		}
1196 	}
1197 	if (flags & PRCFAULT)
1198 		lwp->lwp_curflt = 0;
1199 	/*
1200 	 * We can't hold p->p_lock when we touch the lwp's registers.
1201 	 * It may be swapped out and we will get a page fault.
1202 	 */
1203 	if (flags & PRSTEP) {
1204 		mutex_exit(&p->p_lock);
1205 		prstep(lwp, 0);
1206 		mutex_enter(&p->p_lock);
1207 	}
1208 	if (flags & PRSTOP) {
1209 		t->t_proc_flag |= TP_PRSTOP;
1210 		t->t_sig_check = 1;	/* do ISSIG */
1211 	}
1212 	if (flags & PRSABORT)
1213 		lwp->lwp_sysabort = 1;
1214 	thread_lock(t);
1215 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1216 		/*
1217 		 * Here, we are dealing with a single lwp.
1218 		 */
1219 		if (ISTOPPED(t)) {
1220 			t->t_schedflag |= TS_PSTART;
1221 			t->t_dtrace_stop = 0;
1222 			setrun_locked(t);
1223 		} else if (flags & PRSABORT) {
1224 			t->t_proc_flag &=
1225 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1226 			setrun_locked(t);
1227 		} else if (!(flags & PRSTOP)) {
1228 			t->t_proc_flag &=
1229 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230 		}
1231 		thread_unlock(t);
1232 	} else {
1233 		/*
1234 		 * Here, we are dealing with the whole process.
1235 		 */
1236 		if (ISTOPPED(t)) {
1237 			/*
1238 			 * The representative lwp is stopped on an event
1239 			 * of interest.  We demote it to PR_REQUESTED and
1240 			 * choose another representative lwp.  If the new
1241 			 * representative lwp is not stopped on an event of
1242 			 * interest (other than PR_REQUESTED), we set the
1243 			 * whole process running, else we leave the process
1244 			 * stopped showing the next event of interest.
1245 			 */
1246 			kthread_t *tx = NULL;
1247 
1248 			if (!(flags & PRSABORT) &&
1249 			    t->t_whystop == PR_SYSENTRY &&
1250 			    t->t_whatstop == SYS_lwp_exit)
1251 				tx = t;		/* remember the exiting lwp */
1252 			t->t_whystop = PR_REQUESTED;
1253 			t->t_whatstop = 0;
1254 			thread_unlock(t);
1255 			t = prchoose(p);	/* returns locked t */
1256 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1257 			if (VSTOPPED(t) ||
1258 			    t->t_whystop == PR_REQUESTED) {
1259 				thread_unlock(t);
1260 				allsetrun(p);
1261 			} else {
1262 				thread_unlock(t);
1263 				/*
1264 				 * As a special case, if the old representative
1265 				 * lwp was stopped on entry to _lwp_exit()
1266 				 * (and we are not aborting the system call),
1267 				 * we set the old representative lwp running.
1268 				 * We do this so that the next process stop
1269 				 * will find the exiting lwp gone.
1270 				 */
1271 				if (tx != NULL) {
1272 					thread_lock(tx);
1273 					tx->t_schedflag |= TS_PSTART;
1274 					t->t_dtrace_stop = 0;
1275 					setrun_locked(tx);
1276 					thread_unlock(tx);
1277 				}
1278 			}
1279 		} else {
1280 			/*
1281 			 * No event of interest; set all of the lwps running.
1282 			 */
1283 			if (flags & PRSABORT) {
1284 				t->t_proc_flag &=
1285 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1286 				setrun_locked(t);
1287 			}
1288 			thread_unlock(t);
1289 			allsetrun(p);
1290 		}
1291 	}
1292 	return (0);
1293 }
1294 
1295 /*
1296  * Wait until process/lwp stops or until timer expires.
1297  * Return EINTR for an interruption, -1 for timeout, else 0.
1298  */
1299 int
1300 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1301 	timestruc_t *ts,	/* absolute time of timeout, if any */
1302 	int timecheck)
1303 {
1304 	int rval;
1305 
1306 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1307 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1308 	mutex_exit(&pcp->prc_mutex);
1309 	switch (rval) {
1310 	case 0:
1311 		return (EINTR);
1312 	case -1:
1313 		return (-1);
1314 	default:
1315 		return (0);
1316 	}
1317 }
1318 
1319 /*
1320  * Make all threads in the process runnable.
1321  */
1322 void
1323 allsetrun(proc_t *p)
1324 {
1325 	kthread_t *t;
1326 
1327 	ASSERT(MUTEX_HELD(&p->p_lock));
1328 
1329 	if ((t = p->p_tlist) != NULL) {
1330 		do {
1331 			thread_lock(t);
1332 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1333 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1334 			if (ISTOPPED(t)) {
1335 				t->t_schedflag |= TS_PSTART;
1336 				t->t_dtrace_stop = 0;
1337 				setrun_locked(t);
1338 			}
1339 			thread_unlock(t);
1340 		} while ((t = t->t_forw) != p->p_tlist);
1341 	}
1342 }
1343 
1344 /*
1345  * Wait for the process to die.
1346  * We do this after sending SIGKILL because we know it will
1347  * die soon and we want subsequent operations to return ENOENT.
1348  */
1349 void
1350 pr_wait_die(prnode_t *pnp)
1351 {
1352 	proc_t *p;
1353 
1354 	mutex_enter(&pidlock);
1355 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1356 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1357 			break;
1358 	}
1359 	mutex_exit(&pidlock);
1360 }
1361 
1362 static void
1363 pr_settrace(proc_t *p, sigset_t *sp)
1364 {
1365 	prdelset(sp, SIGKILL);
1366 	prassignset(&p->p_sigmask, sp);
1367 	if (!sigisempty(&p->p_sigmask))
1368 		p->p_proc_flag |= P_PR_TRACE;
1369 	else if (prisempty(&p->p_fltmask)) {
1370 		user_t *up = PTOU(p);
1371 		if (up->u_systrap == 0)
1372 			p->p_proc_flag &= ~P_PR_TRACE;
1373 	}
1374 }
1375 
1376 int
1377 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1378 {
1379 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1380 	int sig = sip->si_signo;
1381 	prcommon_t *pcp = pnp->pr_common;
1382 	proc_t *p = pcp->prc_proc;
1383 	kthread_t *t;
1384 	klwp_t *lwp;
1385 	int error = 0;
1386 
1387 	t = pr_thread(pnp);	/* returns locked thread */
1388 	thread_unlock(t);
1389 	lwp = ttolwp(t);
1390 	if (sig < 0 || sig >= nsig)
1391 		/* Zero allowed here */
1392 		error = EINVAL;
1393 	else if (lwp->lwp_cursig == SIGKILL)
1394 		/* "can't happen", but just in case */
1395 		error = EBUSY;
1396 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1397 		lwp->lwp_extsig = 0;
1398 		/*
1399 		 * Discard current siginfo_t, if any.
1400 		 */
1401 		if (lwp->lwp_curinfo) {
1402 			siginfofree(lwp->lwp_curinfo);
1403 			lwp->lwp_curinfo = NULL;
1404 		}
1405 	} else {
1406 		kthread_t *tx;
1407 		sigqueue_t *sqp;
1408 
1409 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1410 		mutex_exit(&p->p_lock);
1411 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1412 		mutex_enter(&p->p_lock);
1413 
1414 		if (lwp->lwp_curinfo == NULL)
1415 			lwp->lwp_curinfo = sqp;
1416 		else
1417 			kmem_free(sqp, sizeof (sigqueue_t));
1418 		/*
1419 		 * Copy contents of info to current siginfo_t.
1420 		 */
1421 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1422 		    sizeof (lwp->lwp_curinfo->sq_info));
1423 		/*
1424 		 * Prevent contents published by si_zoneid-unaware /proc
1425 		 * consumers from being incorrectly filtered.  Because
1426 		 * an uninitialized si_zoneid is the same as
1427 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1428 		 * process in a non-global zone with a siginfo which
1429 		 * appears to come from the global zone.
1430 		 */
1431 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1432 			lwp->lwp_curinfo->sq_info.si_zoneid =
1433 			    p->p_zone->zone_id;
1434 		/*
1435 		 * Side-effects for SIGKILL and jobcontrol signals.
1436 		 */
1437 		if (sig == SIGKILL) {
1438 			p->p_flag |= SKILLED;
1439 			p->p_flag &= ~SEXTKILLED;
1440 		} else if (sig == SIGCONT) {
1441 			p->p_flag |= SSCONT;
1442 			sigdelq(p, NULL, SIGSTOP);
1443 			sigdelq(p, NULL, SIGTSTP);
1444 			sigdelq(p, NULL, SIGTTOU);
1445 			sigdelq(p, NULL, SIGTTIN);
1446 			sigdiffset(&p->p_sig, &stopdefault);
1447 			sigdiffset(&p->p_extsig, &stopdefault);
1448 			if ((tx = p->p_tlist) != NULL) {
1449 				do {
1450 					sigdelq(p, tx, SIGSTOP);
1451 					sigdelq(p, tx, SIGTSTP);
1452 					sigdelq(p, tx, SIGTTOU);
1453 					sigdelq(p, tx, SIGTTIN);
1454 					sigdiffset(&tx->t_sig, &stopdefault);
1455 					sigdiffset(&tx->t_extsig, &stopdefault);
1456 				} while ((tx = tx->t_forw) != p->p_tlist);
1457 			}
1458 		} else if (sigismember(&stopdefault, sig)) {
1459 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1460 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1461 				p->p_flag &= ~SSCONT;
1462 			sigdelq(p, NULL, SIGCONT);
1463 			sigdelset(&p->p_sig, SIGCONT);
1464 			sigdelset(&p->p_extsig, SIGCONT);
1465 			if ((tx = p->p_tlist) != NULL) {
1466 				do {
1467 					sigdelq(p, tx, SIGCONT);
1468 					sigdelset(&tx->t_sig, SIGCONT);
1469 					sigdelset(&tx->t_extsig, SIGCONT);
1470 				} while ((tx = tx->t_forw) != p->p_tlist);
1471 			}
1472 		}
1473 		thread_lock(t);
1474 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1475 			/* Set signaled sleeping/waiting lwp running */
1476 			setrun_locked(t);
1477 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1478 			/* If SIGKILL, set stopped lwp running */
1479 			p->p_stopsig = 0;
1480 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1481 			t->t_dtrace_stop = 0;
1482 			setrun_locked(t);
1483 		}
1484 		t->t_sig_check = 1;	/* so ISSIG will be done */
1485 		thread_unlock(t);
1486 		/*
1487 		 * More jobcontrol side-effects.
1488 		 */
1489 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1490 			p->p_stopsig = 0;
1491 			do {
1492 				thread_lock(tx);
1493 				if (tx->t_state == TS_STOPPED &&
1494 				    tx->t_whystop == PR_JOBCONTROL) {
1495 					tx->t_schedflag |= TS_XSTART;
1496 					setrun_locked(tx);
1497 				}
1498 				thread_unlock(tx);
1499 			} while ((tx = tx->t_forw) != p->p_tlist);
1500 		}
1501 	}
1502 	return (error);
1503 }
1504 
1505 int
1506 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1507 {
1508 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1509 	prcommon_t *pcp = pnp->pr_common;
1510 	proc_t *p = pcp->prc_proc;
1511 	k_siginfo_t info;
1512 
1513 	if (sig <= 0 || sig >= nsig)
1514 		return (EINVAL);
1515 
1516 	bzero(&info, sizeof (info));
1517 	info.si_signo = sig;
1518 	info.si_code = SI_USER;
1519 	info.si_pid = curproc->p_pid;
1520 	info.si_ctid = PRCTID(curproc);
1521 	info.si_zoneid = getzoneid();
1522 	info.si_uid = crgetruid(cr);
1523 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1524 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1525 
1526 	return (0);
1527 }
1528 
1529 int
1530 pr_unkill(prnode_t *pnp, int sig)
1531 {
1532 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1533 	prcommon_t *pcp = pnp->pr_common;
1534 	proc_t *p = pcp->prc_proc;
1535 	sigqueue_t *infop = NULL;
1536 
1537 	if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1538 		return (EINVAL);
1539 
1540 	if (pcp->prc_flags & PRC_LWP)
1541 		sigdeq(p, pcp->prc_thread, sig, &infop);
1542 	else
1543 		sigdeq(p, NULL, sig, &infop);
1544 
1545 	if (infop)
1546 		siginfofree(infop);
1547 
1548 	return (0);
1549 }
1550 
1551 int
1552 pr_nice(proc_t *p, int nice, cred_t *cr)
1553 {
1554 	kthread_t *t;
1555 	int err;
1556 	int error = 0;
1557 
1558 	t = p->p_tlist;
1559 	do {
1560 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1561 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1562 		schedctl_set_cidpri(t);
1563 		if (error == 0)
1564 			error = err;
1565 	} while ((t = t->t_forw) != p->p_tlist);
1566 
1567 	return (error);
1568 }
1569 
1570 void
1571 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1572 {
1573 	user_t *up = PTOU(p);
1574 
1575 	if (entry) {
1576 		prassignset(&up->u_entrymask, sysset);
1577 	} else {
1578 		prassignset(&up->u_exitmask, sysset);
1579 	}
1580 	if (!prisempty(&up->u_entrymask) ||
1581 	    !prisempty(&up->u_exitmask)) {
1582 		up->u_systrap = 1;
1583 		p->p_proc_flag |= P_PR_TRACE;
1584 		set_proc_sys(p);	/* set pre and post-sys flags */
1585 	} else {
1586 		up->u_systrap = 0;
1587 		if (sigisempty(&p->p_sigmask) &&
1588 		    prisempty(&p->p_fltmask))
1589 			p->p_proc_flag &= ~P_PR_TRACE;
1590 	}
1591 }
1592 
1593 #define	ALLFLAGS	\
1594 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1595 
1596 int
1597 pr_set(proc_t *p, long flags)
1598 {
1599 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1600 		return (EBUSY);
1601 
1602 	if (flags & ~ALLFLAGS)
1603 		return (EINVAL);
1604 
1605 	if (flags & PR_FORK)
1606 		p->p_proc_flag |= P_PR_FORK;
1607 	if (flags & PR_RLC)
1608 		p->p_proc_flag |= P_PR_RUNLCL;
1609 	if (flags & PR_KLC)
1610 		p->p_proc_flag |= P_PR_KILLCL;
1611 	if (flags & PR_ASYNC)
1612 		p->p_proc_flag |= P_PR_ASYNC;
1613 	if (flags & PR_BPTADJ)
1614 		p->p_proc_flag |= P_PR_BPTADJ;
1615 	if (flags & PR_MSACCT)
1616 		if ((p->p_flag & SMSACCT) == 0)
1617 			estimate_msacct(p->p_tlist, gethrtime());
1618 	if (flags & PR_MSFORK)
1619 		p->p_flag |= SMSFORK;
1620 	if (flags & PR_PTRACE) {
1621 		p->p_proc_flag |= P_PR_PTRACE;
1622 		/* ptraced process must die if parent dead */
1623 		if (p->p_ppid == 1)
1624 			sigtoproc(p, NULL, SIGKILL);
1625 	}
1626 
1627 	return (0);
1628 }
1629 
1630 int
1631 pr_unset(proc_t *p, long flags)
1632 {
1633 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1634 		return (EBUSY);
1635 
1636 	if (flags & ~ALLFLAGS)
1637 		return (EINVAL);
1638 
1639 	if (flags & PR_FORK)
1640 		p->p_proc_flag &= ~P_PR_FORK;
1641 	if (flags & PR_RLC)
1642 		p->p_proc_flag &= ~P_PR_RUNLCL;
1643 	if (flags & PR_KLC)
1644 		p->p_proc_flag &= ~P_PR_KILLCL;
1645 	if (flags & PR_ASYNC)
1646 		p->p_proc_flag &= ~P_PR_ASYNC;
1647 	if (flags & PR_BPTADJ)
1648 		p->p_proc_flag &= ~P_PR_BPTADJ;
1649 	if (flags & PR_MSACCT)
1650 		disable_msacct(p);
1651 	if (flags & PR_MSFORK)
1652 		p->p_flag &= ~SMSFORK;
1653 	if (flags & PR_PTRACE)
1654 		p->p_proc_flag &= ~P_PR_PTRACE;
1655 
1656 	return (0);
1657 }
1658 
1659 static int
1660 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1661 {
1662 	proc_t *p = pnp->pr_common->prc_proc;
1663 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1664 
1665 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1666 		thread_unlock(t);
1667 		return (EBUSY);
1668 	}
1669 	if (!prhasfp()) {
1670 		thread_unlock(t);
1671 		return (EINVAL);	/* No FP support */
1672 	}
1673 
1674 	/* drop p_lock while touching the lwp's stack */
1675 	thread_unlock(t);
1676 	mutex_exit(&p->p_lock);
1677 	prsetprfpregs(ttolwp(t), prfpregset);
1678 	mutex_enter(&p->p_lock);
1679 
1680 	return (0);
1681 }
1682 
1683 #ifdef	_SYSCALL32_IMPL
1684 static int
1685 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1686 {
1687 	proc_t *p = pnp->pr_common->prc_proc;
1688 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1689 
1690 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1691 		thread_unlock(t);
1692 		return (EBUSY);
1693 	}
1694 	if (!prhasfp()) {
1695 		thread_unlock(t);
1696 		return (EINVAL);	/* No FP support */
1697 	}
1698 
1699 	/* drop p_lock while touching the lwp's stack */
1700 	thread_unlock(t);
1701 	mutex_exit(&p->p_lock);
1702 	prsetprfpregs32(ttolwp(t), prfpregset);
1703 	mutex_enter(&p->p_lock);
1704 
1705 	return (0);
1706 }
1707 #endif	/* _SYSCALL32_IMPL */
1708 
1709 #if defined(__sparc)
1710 /* ARGSUSED */
1711 static int
1712 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1713 {
1714 	proc_t *p = pnp->pr_common->prc_proc;
1715 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1716 
1717 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1718 		thread_unlock(t);
1719 		return (EBUSY);
1720 	}
1721 	thread_unlock(t);
1722 
1723 	if (!prhasx(p))
1724 		return (EINVAL);	/* No extra register support */
1725 
1726 	/* drop p_lock while touching the lwp's stack */
1727 	mutex_exit(&p->p_lock);
1728 	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1729 	mutex_enter(&p->p_lock);
1730 
1731 	return (0);
1732 }
1733 
1734 static int
1735 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1736 {
1737 	proc_t *p = pnp->pr_common->prc_proc;
1738 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1739 
1740 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1741 		thread_unlock(t);
1742 		return (EBUSY);
1743 	}
1744 	thread_unlock(t);
1745 
1746 	/* drop p_lock while touching the lwp's stack */
1747 	mutex_exit(&p->p_lock);
1748 	prsetasregs(ttolwp(t), asrset);
1749 	mutex_enter(&p->p_lock);
1750 
1751 	return (0);
1752 }
1753 #endif
1754 
1755 static int
1756 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1757 {
1758 	proc_t *p = pnp->pr_common->prc_proc;
1759 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1760 
1761 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1762 		thread_unlock(t);
1763 		return (EBUSY);
1764 	}
1765 
1766 	/* drop p_lock while touching the lwp's stack */
1767 	thread_unlock(t);
1768 	mutex_exit(&p->p_lock);
1769 	prsvaddr(ttolwp(t), vaddr);
1770 	mutex_enter(&p->p_lock);
1771 
1772 	return (0);
1773 }
1774 
1775 void
1776 pr_sethold(prnode_t *pnp, sigset_t *sp)
1777 {
1778 	proc_t *p = pnp->pr_common->prc_proc;
1779 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1780 
1781 	schedctl_finish_sigblock(t);
1782 	sigutok(sp, &t->t_hold);
1783 	if (ISWAKEABLE(t) &&
1784 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1785 		setrun_locked(t);
1786 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1787 	thread_unlock(t);
1788 }
1789 
1790 void
1791 pr_setfault(proc_t *p, fltset_t *fltp)
1792 {
1793 	prassignset(&p->p_fltmask, fltp);
1794 	if (!prisempty(&p->p_fltmask))
1795 		p->p_proc_flag |= P_PR_TRACE;
1796 	else if (sigisempty(&p->p_sigmask)) {
1797 		user_t *up = PTOU(p);
1798 		if (up->u_systrap == 0)
1799 			p->p_proc_flag &= ~P_PR_TRACE;
1800 	}
1801 }
1802 
1803 static int
1804 pr_clearsig(prnode_t *pnp)
1805 {
1806 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1807 	klwp_t *lwp = ttolwp(t);
1808 
1809 	thread_unlock(t);
1810 	if (lwp->lwp_cursig == SIGKILL)
1811 		return (EBUSY);
1812 
1813 	/*
1814 	 * Discard current siginfo_t, if any.
1815 	 */
1816 	lwp->lwp_cursig = 0;
1817 	lwp->lwp_extsig = 0;
1818 	if (lwp->lwp_curinfo) {
1819 		siginfofree(lwp->lwp_curinfo);
1820 		lwp->lwp_curinfo = NULL;
1821 	}
1822 
1823 	return (0);
1824 }
1825 
1826 static int
1827 pr_clearflt(prnode_t *pnp)
1828 {
1829 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1830 
1831 	thread_unlock(t);
1832 	ttolwp(t)->lwp_curflt = 0;
1833 
1834 	return (0);
1835 }
1836 
1837 static int
1838 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1839 {
1840 	proc_t *p = pnp->pr_common->prc_proc;
1841 	struct as *as = p->p_as;
1842 	uintptr_t vaddr = pwp->pr_vaddr;
1843 	size_t size = pwp->pr_size;
1844 	int wflags = pwp->pr_wflags;
1845 	ulong_t newpage = 0;
1846 	struct watched_area *pwa;
1847 	int error;
1848 
1849 	*unlocked = 0;
1850 
1851 	/*
1852 	 * Can't apply to a system process.
1853 	 */
1854 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1855 		return (EBUSY);
1856 
1857 	/*
1858 	 * Verify that the address range does not wrap
1859 	 * and that only the proper flags were specified.
1860 	 */
1861 	if ((wflags & ~WA_TRAPAFTER) == 0)
1862 		size = 0;
1863 	if (vaddr + size < vaddr ||
1864 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1865 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1866 		return (EINVAL);
1867 
1868 	/*
1869 	 * Don't let the address range go above as->a_userlimit.
1870 	 * There is no error here, just a limitation.
1871 	 */
1872 	if (vaddr >= (uintptr_t)as->a_userlimit)
1873 		return (0);
1874 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1875 		size = (uintptr_t)as->a_userlimit - vaddr;
1876 
1877 	/*
1878 	 * Compute maximum number of pages this will add.
1879 	 */
1880 	if ((wflags & ~WA_TRAPAFTER) != 0) {
1881 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1882 		newpage = btopr(pagespan);
1883 		if (newpage > 2 * prnwatch)
1884 			return (E2BIG);
1885 	}
1886 
1887 	/*
1888 	 * Force the process to be fully stopped.
1889 	 */
1890 	if (p == curproc) {
1891 		prunlock(pnp);
1892 		while (holdwatch() != 0)
1893 			continue;
1894 		if ((error = prlock(pnp, ZNO)) != 0) {
1895 			continuelwps(p);
1896 			*unlocked = 1;
1897 			return (error);
1898 		}
1899 	} else {
1900 		pauselwps(p);
1901 		while (pr_allstopped(p, 0) > 0) {
1902 			/*
1903 			 * This cv/mutex pair is persistent even
1904 			 * if the process disappears after we
1905 			 * unmark it and drop p->p_lock.
1906 			 */
1907 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1908 			kmutex_t *mp = &p->p_lock;
1909 
1910 			prunmark(p);
1911 			(void) cv_wait(cv, mp);
1912 			mutex_exit(mp);
1913 			if ((error = prlock(pnp, ZNO)) != 0) {
1914 				/*
1915 				 * Unpause the process if it exists.
1916 				 */
1917 				p = pr_p_lock(pnp);
1918 				mutex_exit(&pr_pidlock);
1919 				if (p != NULL) {
1920 					unpauselwps(p);
1921 					prunlock(pnp);
1922 				}
1923 				*unlocked = 1;
1924 				return (error);
1925 			}
1926 		}
1927 	}
1928 
1929 	/*
1930 	 * Drop p->p_lock in order to perform the rest of this.
1931 	 * The process is still locked with the P_PR_LOCK flag.
1932 	 */
1933 	mutex_exit(&p->p_lock);
1934 
1935 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1936 	pwa->wa_vaddr = (caddr_t)vaddr;
1937 	pwa->wa_eaddr = (caddr_t)vaddr + size;
1938 	pwa->wa_flags = (ulong_t)wflags;
1939 
1940 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1941 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1942 
1943 	if (p == curproc) {
1944 		setallwatch();
1945 		mutex_enter(&p->p_lock);
1946 		continuelwps(p);
1947 	} else {
1948 		mutex_enter(&p->p_lock);
1949 		unpauselwps(p);
1950 	}
1951 
1952 	return (error);
1953 }
1954 
1955 /* jobcontrol stopped, but with a /proc directed stop in effect */
1956 #define	JDSTOPPED(t)	\
1957 	((t)->t_state == TS_STOPPED && \
1958 	(t)->t_whystop == PR_JOBCONTROL && \
1959 	((t)->t_proc_flag & TP_PRSTOP))
1960 
1961 /*
1962  * pr_agent() creates the agent lwp. If the process is exiting while
1963  * we are creating an agent lwp, then exitlwps() waits until the
1964  * agent has been created using prbarrier().
1965  */
1966 static int
1967 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1968 {
1969 	proc_t *p = pnp->pr_common->prc_proc;
1970 	prcommon_t *pcp;
1971 	kthread_t *t;
1972 	kthread_t *ct;
1973 	klwp_t *clwp;
1974 	k_sigset_t smask;
1975 	int cid;
1976 	void *bufp = NULL;
1977 	int error;
1978 
1979 	*unlocked = 0;
1980 
1981 	/*
1982 	 * Cannot create the /proc agent lwp if :-
1983 	 * - the process is not fully stopped or directed to stop.
1984 	 * - there is an agent lwp already.
1985 	 * - the process has been killed.
1986 	 * - the process is exiting.
1987 	 * - it's a vfork(2) parent.
1988 	 */
1989 	t = prchoose(p);	/* returns locked thread */
1990 	ASSERT(t != NULL);
1991 
1992 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1993 	    p->p_agenttp != NULL ||
1994 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1995 		thread_unlock(t);
1996 		return (EBUSY);
1997 	}
1998 
1999 	thread_unlock(t);
2000 	mutex_exit(&p->p_lock);
2001 
2002 	sigfillset(&smask);
2003 	sigdiffset(&smask, &cantmask);
2004 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2005 	    t->t_pri, &smask, NOCLASS, 0);
2006 	if (clwp == NULL) {
2007 		mutex_enter(&p->p_lock);
2008 		return (ENOMEM);
2009 	}
2010 	prsetprregs(clwp, prgregset, 1);
2011 retry:
2012 	cid = t->t_cid;
2013 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2014 	mutex_enter(&p->p_lock);
2015 	if (cid != t->t_cid) {
2016 		/*
2017 		 * Someone just changed this thread's scheduling class,
2018 		 * so try pre-allocating the buffer again.  Hopefully we
2019 		 * don't hit this often.
2020 		 */
2021 		mutex_exit(&p->p_lock);
2022 		CL_FREE(cid, bufp);
2023 		goto retry;
2024 	}
2025 
2026 	clwp->lwp_ap = clwp->lwp_arg;
2027 	clwp->lwp_eosys = NORMALRETURN;
2028 	ct = lwptot(clwp);
2029 	ct->t_clfuncs = t->t_clfuncs;
2030 	CL_FORK(t, ct, bufp);
2031 	ct->t_cid = t->t_cid;
2032 	ct->t_proc_flag |= TP_PRSTOP;
2033 	/*
2034 	 * Setting t_sysnum to zero causes post_syscall()
2035 	 * to bypass all syscall checks and go directly to
2036 	 *	if (issig()) psig();
2037 	 * so that the agent lwp will stop in issig_forreal()
2038 	 * showing PR_REQUESTED.
2039 	 */
2040 	ct->t_sysnum = 0;
2041 	ct->t_post_sys = 1;
2042 	ct->t_sig_check = 1;
2043 	p->p_agenttp = ct;
2044 	ct->t_proc_flag &= ~TP_HOLDLWP;
2045 
2046 	pcp = pnp->pr_pcommon;
2047 	mutex_enter(&pcp->prc_mutex);
2048 
2049 	lwp_create_done(ct);
2050 
2051 	/*
2052 	 * Don't return until the agent is stopped on PR_REQUESTED.
2053 	 */
2054 
2055 	for (;;) {
2056 		prunlock(pnp);
2057 		*unlocked = 1;
2058 
2059 		/*
2060 		 * Wait for the agent to stop and notify us.
2061 		 * If we've been interrupted, return that information.
2062 		 */
2063 		error = pr_wait(pcp, NULL, 0);
2064 		if (error == EINTR) {
2065 			error = 0;
2066 			break;
2067 		}
2068 
2069 		/*
2070 		 * Confirm that the agent LWP has stopped.
2071 		 */
2072 
2073 		if ((error = prlock(pnp, ZNO)) != 0)
2074 			break;
2075 		*unlocked = 0;
2076 
2077 		/*
2078 		 * Since we dropped the lock on the process, the agent
2079 		 * may have disappeared or changed. Grab the current
2080 		 * agent and check fail if it has disappeared.
2081 		 */
2082 		if ((ct = p->p_agenttp) == NULL) {
2083 			error = ENOENT;
2084 			break;
2085 		}
2086 
2087 		mutex_enter(&pcp->prc_mutex);
2088 		thread_lock(ct);
2089 
2090 		if (ISTOPPED(ct)) {
2091 			thread_unlock(ct);
2092 			mutex_exit(&pcp->prc_mutex);
2093 			break;
2094 		}
2095 
2096 		thread_unlock(ct);
2097 	}
2098 
2099 	return (error ? error : -1);
2100 }
2101 
2102 static int
2103 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2104 {
2105 	caddr_t base = (caddr_t)pio->pio_base;
2106 	size_t cnt = pio->pio_len;
2107 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2108 	struct uio auio;
2109 	struct iovec aiov;
2110 	int error = 0;
2111 
2112 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2113 		error = EIO;
2114 	else if ((base + cnt) < base || (offset + cnt) < offset)
2115 		error = EINVAL;
2116 	else if (cnt != 0) {
2117 		aiov.iov_base = base;
2118 		aiov.iov_len = cnt;
2119 
2120 		auio.uio_loffset = offset;
2121 		auio.uio_iov = &aiov;
2122 		auio.uio_iovcnt = 1;
2123 		auio.uio_resid = cnt;
2124 		auio.uio_segflg = UIO_USERSPACE;
2125 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2126 		auio.uio_fmode = FREAD|FWRITE;
2127 		auio.uio_extflg = UIO_COPY_DEFAULT;
2128 
2129 		mutex_exit(&p->p_lock);
2130 		error = prusrio(p, rw, &auio, 0);
2131 		mutex_enter(&p->p_lock);
2132 
2133 		/*
2134 		 * We have no way to return the i/o count,
2135 		 * like read() or write() would do, so we
2136 		 * return an error if the i/o was truncated.
2137 		 */
2138 		if (auio.uio_resid != 0 && error == 0)
2139 			error = EIO;
2140 	}
2141 
2142 	return (error);
2143 }
2144 
2145 static int
2146 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2147 {
2148 	kthread_t *t;
2149 	cred_t *oldcred;
2150 	cred_t *newcred;
2151 	uid_t oldruid;
2152 	int error;
2153 	zone_t *zone = crgetzone(cr);
2154 
2155 	if (!VALID_UID(prcred->pr_euid, zone) ||
2156 	    !VALID_UID(prcred->pr_ruid, zone) ||
2157 	    !VALID_UID(prcred->pr_suid, zone) ||
2158 	    !VALID_GID(prcred->pr_egid, zone) ||
2159 	    !VALID_GID(prcred->pr_rgid, zone) ||
2160 	    !VALID_GID(prcred->pr_sgid, zone))
2161 		return (EINVAL);
2162 
2163 	if (dogrps) {
2164 		int ngrp = prcred->pr_ngroups;
2165 		int i;
2166 
2167 		if (ngrp < 0 || ngrp > ngroups_max)
2168 			return (EINVAL);
2169 
2170 		for (i = 0; i < ngrp; i++) {
2171 			if (!VALID_GID(prcred->pr_groups[i], zone))
2172 				return (EINVAL);
2173 		}
2174 	}
2175 
2176 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2177 
2178 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2179 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2180 
2181 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2182 	    prcred->pr_suid != prcred->pr_ruid)
2183 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2184 
2185 	if (error)
2186 		return (error);
2187 
2188 	mutex_exit(&p->p_lock);
2189 
2190 	/* hold old cred so it doesn't disappear while we dup it */
2191 	mutex_enter(&p->p_crlock);
2192 	crhold(oldcred = p->p_cred);
2193 	mutex_exit(&p->p_crlock);
2194 	newcred = crdup(oldcred);
2195 	oldruid = crgetruid(oldcred);
2196 	crfree(oldcred);
2197 
2198 	/* Error checking done above */
2199 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2200 	    prcred->pr_suid);
2201 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2202 	    prcred->pr_sgid);
2203 
2204 	if (dogrps) {
2205 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2206 		    prcred->pr_groups);
2207 
2208 	}
2209 
2210 	mutex_enter(&p->p_crlock);
2211 	oldcred = p->p_cred;
2212 	p->p_cred = newcred;
2213 	mutex_exit(&p->p_crlock);
2214 	crfree(oldcred);
2215 
2216 	/*
2217 	 * Keep count of processes per uid consistent.
2218 	 */
2219 	if (oldruid != prcred->pr_ruid) {
2220 		zoneid_t zoneid = crgetzoneid(newcred);
2221 
2222 		mutex_enter(&pidlock);
2223 		upcount_dec(oldruid, zoneid);
2224 		upcount_inc(prcred->pr_ruid, zoneid);
2225 		mutex_exit(&pidlock);
2226 	}
2227 
2228 	/*
2229 	 * Broadcast the cred change to the threads.
2230 	 */
2231 	mutex_enter(&p->p_lock);
2232 	t = p->p_tlist;
2233 	do {
2234 		t->t_pre_sys = 1; /* so syscall will get new cred */
2235 	} while ((t = t->t_forw) != p->p_tlist);
2236 
2237 	return (0);
2238 }
2239 
2240 /*
2241  * Change process credentials to specified zone.  Used to temporarily
2242  * set a process to run in the global zone; only transitions between
2243  * the process's actual zone and the global zone are allowed.
2244  */
2245 static int
2246 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2247 {
2248 	kthread_t *t;
2249 	cred_t *oldcred;
2250 	cred_t *newcred;
2251 	zone_t *zptr;
2252 	zoneid_t oldzoneid;
2253 
2254 	if (secpolicy_zone_config(cr) != 0)
2255 		return (EPERM);
2256 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2257 		return (EINVAL);
2258 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2259 		return (EINVAL);
2260 	mutex_exit(&p->p_lock);
2261 	mutex_enter(&p->p_crlock);
2262 	oldcred = p->p_cred;
2263 	crhold(oldcred);
2264 	mutex_exit(&p->p_crlock);
2265 	newcred = crdup(oldcred);
2266 	oldzoneid = crgetzoneid(oldcred);
2267 	crfree(oldcred);
2268 
2269 	crsetzone(newcred, zptr);
2270 	zone_rele(zptr);
2271 
2272 	mutex_enter(&p->p_crlock);
2273 	oldcred = p->p_cred;
2274 	p->p_cred = newcred;
2275 	mutex_exit(&p->p_crlock);
2276 	crfree(oldcred);
2277 
2278 	/*
2279 	 * The target process is changing zones (according to its cred), so
2280 	 * update the per-zone upcounts, which are based on process creds.
2281 	 */
2282 	if (oldzoneid != zoneid) {
2283 		uid_t ruid = crgetruid(newcred);
2284 
2285 		mutex_enter(&pidlock);
2286 		upcount_dec(ruid, oldzoneid);
2287 		upcount_inc(ruid, zoneid);
2288 		mutex_exit(&pidlock);
2289 	}
2290 	/*
2291 	 * Broadcast the cred change to the threads.
2292 	 */
2293 	mutex_enter(&p->p_lock);
2294 	t = p->p_tlist;
2295 	do {
2296 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2297 	} while ((t = t->t_forw) != p->p_tlist);
2298 
2299 	return (0);
2300 }
2301 
2302 static int
2303 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2304 {
2305 	kthread_t *t;
2306 	int err;
2307 
2308 	ASSERT(MUTEX_HELD(&p->p_lock));
2309 
2310 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2311 		/*
2312 		 * Broadcast the cred change to the threads.
2313 		 */
2314 		t = p->p_tlist;
2315 		do {
2316 			t->t_pre_sys = 1; /* so syscall will get new cred */
2317 		} while ((t = t->t_forw) != p->p_tlist);
2318 	}
2319 
2320 	return (err);
2321 }
2322 
2323 /*
2324  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2325  * terminate or perform an exec(2).
2326  *
2327  * Returns 0 if the process is fully stopped except for the current thread (if
2328  * we are operating on our own process), 1 otherwise.
2329  *
2330  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2331  * See holdwatch() for details.
2332  */
2333 int
2334 pr_allstopped(proc_t *p, int watchstop)
2335 {
2336 	kthread_t *t;
2337 	int rv = 0;
2338 
2339 	ASSERT(MUTEX_HELD(&p->p_lock));
2340 
2341 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2342 		return (-1);
2343 
2344 	if ((t = p->p_tlist) != NULL) {
2345 		do {
2346 			if (t == curthread || VSTOPPED(t) ||
2347 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2348 				continue;
2349 			thread_lock(t);
2350 			switch (t->t_state) {
2351 			case TS_ZOMB:
2352 			case TS_STOPPED:
2353 				break;
2354 			case TS_SLEEP:
2355 				if (!(t->t_flag & T_WAKEABLE) ||
2356 				    t->t_wchan0 == NULL)
2357 					rv = 1;
2358 				break;
2359 			default:
2360 				rv = 1;
2361 				break;
2362 			}
2363 			thread_unlock(t);
2364 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2365 	}
2366 
2367 	return (rv);
2368 }
2369 
2370 /*
2371  * Cause all lwps in the process to pause (for watchpoint operations).
2372  */
2373 static void
2374 pauselwps(proc_t *p)
2375 {
2376 	kthread_t *t;
2377 
2378 	ASSERT(MUTEX_HELD(&p->p_lock));
2379 	ASSERT(p != curproc);
2380 
2381 	if ((t = p->p_tlist) != NULL) {
2382 		do {
2383 			thread_lock(t);
2384 			t->t_proc_flag |= TP_PAUSE;
2385 			aston(t);
2386 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2387 			    ISWAITING(t)) {
2388 				setrun_locked(t);
2389 			}
2390 			prpokethread(t);
2391 			thread_unlock(t);
2392 		} while ((t = t->t_forw) != p->p_tlist);
2393 	}
2394 }
2395 
2396 /*
2397  * undo the effects of pauselwps()
2398  */
2399 static void
2400 unpauselwps(proc_t *p)
2401 {
2402 	kthread_t *t;
2403 
2404 	ASSERT(MUTEX_HELD(&p->p_lock));
2405 	ASSERT(p != curproc);
2406 
2407 	if ((t = p->p_tlist) != NULL) {
2408 		do {
2409 			thread_lock(t);
2410 			t->t_proc_flag &= ~TP_PAUSE;
2411 			if (t->t_state == TS_STOPPED) {
2412 				t->t_schedflag |= TS_UNPAUSE;
2413 				t->t_dtrace_stop = 0;
2414 				setrun_locked(t);
2415 			}
2416 			thread_unlock(t);
2417 		} while ((t = t->t_forw) != p->p_tlist);
2418 	}
2419 }
2420 
2421 /*
2422  * Cancel all watched areas.  Called from prclose().
2423  */
2424 proc_t *
2425 pr_cancel_watch(prnode_t *pnp)
2426 {
2427 	proc_t *p = pnp->pr_pcommon->prc_proc;
2428 	struct as *as;
2429 	kthread_t *t;
2430 
2431 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2432 
2433 	if (!pr_watch_active(p))
2434 		return (p);
2435 
2436 	/*
2437 	 * Pause the process before dealing with the watchpoints.
2438 	 */
2439 	if (p == curproc) {
2440 		prunlock(pnp);
2441 		while (holdwatch() != 0)
2442 			continue;
2443 		p = pr_p_lock(pnp);
2444 		mutex_exit(&pr_pidlock);
2445 		ASSERT(p == curproc);
2446 	} else {
2447 		pauselwps(p);
2448 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2449 			/*
2450 			 * This cv/mutex pair is persistent even
2451 			 * if the process disappears after we
2452 			 * unmark it and drop p->p_lock.
2453 			 */
2454 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2455 			kmutex_t *mp = &p->p_lock;
2456 
2457 			prunmark(p);
2458 			(void) cv_wait(cv, mp);
2459 			mutex_exit(mp);
2460 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2461 			mutex_exit(&pr_pidlock);
2462 		}
2463 	}
2464 
2465 	if (p == NULL)		/* the process disappeared */
2466 		return (NULL);
2467 
2468 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2469 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2470 
2471 	if (pr_watch_active(p)) {
2472 		pr_free_watchpoints(p);
2473 		if ((t = p->p_tlist) != NULL) {
2474 			do {
2475 				watch_disable(t);
2476 
2477 			} while ((t = t->t_forw) != p->p_tlist);
2478 		}
2479 	}
2480 
2481 	if ((as = p->p_as) != NULL) {
2482 		avl_tree_t *tree;
2483 		struct watched_page *pwp;
2484 
2485 		/*
2486 		 * If this is the parent of a vfork, the watched page
2487 		 * list has been moved temporarily to p->p_wpage.
2488 		 */
2489 		if (avl_numnodes(&p->p_wpage) != 0)
2490 			tree = &p->p_wpage;
2491 		else
2492 			tree = &as->a_wpage;
2493 
2494 		mutex_exit(&p->p_lock);
2495 		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2496 
2497 		for (pwp = avl_first(tree); pwp != NULL;
2498 		    pwp = AVL_NEXT(tree, pwp)) {
2499 			pwp->wp_read = 0;
2500 			pwp->wp_write = 0;
2501 			pwp->wp_exec = 0;
2502 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2503 				pwp->wp_flags |= WP_SETPROT;
2504 				pwp->wp_prot = pwp->wp_oprot;
2505 				pwp->wp_list = p->p_wprot;
2506 				p->p_wprot = pwp;
2507 			}
2508 		}
2509 
2510 		AS_LOCK_EXIT(as, &as->a_lock);
2511 		mutex_enter(&p->p_lock);
2512 	}
2513 
2514 	/*
2515 	 * Unpause the process now.
2516 	 */
2517 	if (p == curproc)
2518 		continuelwps(p);
2519 	else
2520 		unpauselwps(p);
2521 
2522 	return (p);
2523 }
2524