xref: /illumos-gate/usr/src/uts/common/fs/proc/prcontrol.c (revision 9a244c8ee0ee32d71c3e66c8a1c3e18a518d48c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
29  * Copyright 2023 Oxide Computer Company
30  */
31 
32 #include <sys/types.h>
33 #include <sys/uio.h>
34 #include <sys/param.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cred.h>
37 #include <sys/policy.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/file.h>
41 #include <sys/inline.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/regset.h>
46 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/signal.h>
51 #include <sys/auxv.h>
52 #include <sys/user.h>
53 #include <sys/class.h>
54 #include <sys/fault.h>
55 #include <sys/syscall.h>
56 #include <sys/procfs.h>
57 #include <sys/zone.h>
58 #include <sys/copyops.h>
59 #include <sys/schedctl.h>
60 #include <vm/as.h>
61 #include <vm/seg.h>
62 #include <fs/proc/prdata.h>
63 #include <sys/contract/process_impl.h>
64 #include <sys/stdalign.h>
65 
66 static	void	pr_settrace(proc_t *, sigset_t *);
67 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
68 static	int	pr_setxregs(prnode_t *, prxregset_t *);
69 static	int	pr_setvaddr(prnode_t *, caddr_t);
70 static	int	pr_clearsig(prnode_t *);
71 static	int	pr_clearflt(prnode_t *);
72 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
73 static	int	pr_agent(prnode_t *, prgregset_t, int *);
74 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
75 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
76 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
77 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
78 static	void	pauselwps(proc_t *);
79 static	void	unpauselwps(proc_t *);
80 
81 /*
82  * This union represents the size of commands that are generally fixed size in
83  * /proc. There are some commands that are variable size because the actual data
84  * is structured. Of things in the latter category, some of these are the same
85  * across all architectures (e.g. prcred_t, prpriv_t) and some vary and are
86  * opaque (e.g. the prxregset_t).
87  */
88 typedef union {
89 	long		sig;		/* PCKILL, PCUNKILL */
90 	long		nice;		/* PCNICE */
91 	long		timeo;		/* PCTWSTOP */
92 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
93 	caddr_t		vaddr;		/* PCSVADDR */
94 	siginfo_t	siginfo;	/* PCSSIG */
95 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
96 	fltset_t	fltset;		/* PCSFAULT */
97 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
98 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
99 	prfpregset_t	prfpregset;	/* PCSFPREG */
100 	prwatch_t	prwatch;	/* PCWATCH */
101 	priovec_t	priovec;	/* PCREAD, PCWRITE */
102 	prcred_t	prcred;		/* PCSCRED */
103 	prpriv_t	prpriv;		/* PCSPRIV */
104 	long		przoneid;	/* PCSZONE */
105 } arg_t;
106 
107 static boolean_t
108 prwritectl_pcscredx_sizef(const void *datap, size_t *sizep)
109 {
110 	const prcred_t *cred = datap;
111 
112 	if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
113 		return (B_FALSE);
114 	}
115 
116 	if (cred->pr_ngroups == 0) {
117 		*sizep = 0;
118 	} else {
119 		*sizep = (cred->pr_ngroups - 1) * sizeof (gid_t);
120 	}
121 	return (B_TRUE);
122 }
123 
124 static boolean_t
125 prwritectl_pcspriv_sizef(const void *datap, size_t *sizep)
126 {
127 	const prpriv_t *priv = datap;
128 	*sizep = priv_prgetprivsize(priv) - sizeof (prpriv_t);
129 	return (B_TRUE);
130 }
131 
132 /*
133  * This structure represents a single /proc write command that we support and
134  * metadata about how to ensure we have sufficient data for it. To determine the
135  * data that we need to read, this combines information from three different
136  * sources for a given named command in 'pcs_cmd'. The main goal is to first
137  * make sure we have the right minimum amount of information so we can read and
138  * validate the data around variable length structures.
139  *
140  *   o Most commands have a fixed static size. This is represented in the
141  *     pcs_size member. This also is used to represent the base structure size
142  *     in the case of entries like PCSCREDX.
143  *
144  *   o Other commands have an unknown minimum size to determine how much data
145  *     there is and they use the pcs_minf() function to determine the right
146  *     value. This is often unknown at compile time because it is say a
147  *     machdep or ISA based feature (ala PCSXREGS) and we'd rather not #ifdef
148  *     this code to death. This may be skipped and is for most things. The value
149  *     it returns is added to the static value.
150  *
151  *   o The final piece is the pcs_sizef() function pointer which determines the
152  *     total required size for this. It is given a pointer that has at least
153  *     pcs_size and pcs_minf() bytes. This is used to determine the total
154  *     expected size of the structure. Callers must not dereference data beyond
155  *     what they've indicated previously. This should only return extra bytes
156  *     that are required beyond what was already indicated between the two
157  *     functions.
158  *
159  * In all cases, the core prwritectl() logic will determine if there is
160  * sufficient step along the way for each of these to proceed.
161  */
162 typedef struct proc_control_info {
163 	long	pcs_cmd;
164 	size_t	pcs_size;
165 	boolean_t (*pcs_minf)(size_t *);
166 	boolean_t (*pcs_sizef)(const void *, size_t *);
167 } proc_control_info_t;
168 
169 static const proc_control_info_t proc_ctl_info[] = {
170 	{ PCNULL,	0,			NULL,		NULL },
171 	{ PCSTOP,	0,			NULL,		NULL },
172 	{ PCDSTOP,	0,			NULL,		NULL },
173 	{ PCWSTOP,	0,			NULL,		NULL },
174 	{ PCCSIG,	0,			NULL,		NULL },
175 	{ PCCFAULT,	0,			NULL,		NULL },
176 	{ PCSSIG,	sizeof (siginfo_t),	NULL,		NULL },
177 	{ PCTWSTOP,	sizeof (long),		NULL,		NULL },
178 	{ PCKILL,	sizeof (long),		NULL,		NULL },
179 	{ PCUNKILL,	sizeof (long),		NULL,		NULL },
180 	{ PCNICE,	sizeof (long),		NULL,		NULL },
181 	{ PCRUN,	sizeof (ulong_t),	NULL,		NULL },
182 	{ PCSET,	sizeof (ulong_t),	NULL,		NULL },
183 	{ PCUNSET,	sizeof (ulong_t),	NULL,		NULL },
184 	{ PCSTRACE,	sizeof (sigset_t),	NULL,		NULL },
185 	{ PCSHOLD,	sizeof (sigset_t),	NULL,		NULL },
186 	{ PCSFAULT,	sizeof (fltset_t),	NULL,		NULL },
187 	{ PCSENTRY,	sizeof (sysset_t),	NULL,		NULL },
188 	{ PCSEXIT,	sizeof (sysset_t),	NULL,		NULL },
189 	{ PCSREG,	sizeof (prgregset_t),	NULL,		NULL },
190 	{ PCAGENT,	sizeof (prgregset_t),	NULL,		NULL },
191 	{ PCSFPREG,	sizeof (prfpregset_t),	NULL,		NULL },
192 	{ PCSXREG,	0,			prwriteminxreg,
193 	    prwritesizexreg },
194 	{ PCWATCH,	sizeof (prwatch_t),	NULL,		NULL },
195 	{ PCREAD,	sizeof (priovec_t),	NULL,		NULL },
196 	{ PCWRITE,	sizeof (priovec_t),	NULL,		NULL },
197 	{ PCSCRED,	sizeof (prcred_t),	NULL,		NULL },
198 	{ PCSCREDX,	sizeof (prcred_t),	NULL,
199 	    prwritectl_pcscredx_sizef },
200 	{ PCSPRIV,	sizeof (prpriv_t),	NULL,
201 	    prwritectl_pcspriv_sizef },
202 	{ PCSZONE,	sizeof (long),		NULL,		NULL },
203 };
204 
205 /*
206  * We need a default buffer that we're going to allocate when we need memory to
207  * read control operations. This is on average large enough to hold multiple
208  * control operations. We leave this as a smaller value on debug builds just
209  * to exercise our reallocation logic.
210  */
211 #ifdef	DEBUG
212 #define	PROC_CTL_DEFSIZE	32
213 #else
214 #define	PROC_CTL_DEFSIZE	1024
215 #endif
216 
217 /*
218  * This structure is used to track all of the information that we have around a
219  * prwritectl call. This is used to reduce function parameters and make state
220  * clear.
221  */
222 typedef struct {
223 	void	*prwc_buf;
224 	size_t	prwc_buflen;
225 	size_t	prwc_curvalid;
226 	uio_t	*prwc_uiop;
227 	prnode_t *prwc_pnp;
228 	boolean_t prwc_locked;
229 	boolean_t prwc_need32;
230 	void	*prwc_buf32;
231 } prwritectl_t;
232 
233 /*
234  * Ensure that we have at least "needed" data marked as valid and present. If we
235  * require additional data, then we will read that in from uio_t. When we read
236  * data, we try to buffer as much data as will fit in our internal buffers in
237  * one go.
238  */
239 static int
240 prwritectl_readin(prwritectl_t *prwc, size_t needed)
241 {
242 	int ret;
243 	size_t toread;
244 	void *start;
245 
246 	/*
247 	 * If we have as much data as we need then we're good to go.
248 	 */
249 	if (prwc->prwc_curvalid > needed) {
250 		ASSERT3U(prwc->prwc_buflen, >=, prwc->prwc_curvalid);
251 		ASSERT3U(prwc->prwc_buflen, >=, needed);
252 		return (0);
253 	}
254 
255 	/*
256 	 * We don't have all of our data. We must make sure of several things:
257 	 *
258 	 *   1. That there actually is enough data in the uio_t for what we
259 	 *	need, considering what we've already read.
260 	 *   2. If the process is locked, at this point, we want to unlock it
261 	 *	before we deal with any I/O or memory allocation. Otherwise we
262 	 *	can wreak havoc with p_lock / paging.
263 	 *   3. We need to make sure that our buffer is large enough to actually
264 	 *	fit it all.
265 	 *   4. Only at that point can we actually perform the read.
266 	 */
267 	if (needed - prwc->prwc_curvalid > prwc->prwc_uiop->uio_resid) {
268 		return (EINVAL);
269 	}
270 
271 	if (prwc->prwc_locked) {
272 		prunlock(prwc->prwc_pnp);
273 		prwc->prwc_locked = B_FALSE;
274 	}
275 
276 	if (needed > prwc->prwc_buflen) {
277 		size_t new_len = P2ROUNDUP(needed, PROC_CTL_DEFSIZE);
278 		prwc->prwc_buf = kmem_rezalloc(prwc->prwc_buf,
279 		    prwc->prwc_buflen, new_len, KM_SLEEP);
280 		if (prwc->prwc_need32) {
281 			prwc->prwc_buf32 = kmem_rezalloc(prwc->prwc_buf32,
282 			    prwc->prwc_buflen, new_len, KM_SLEEP);
283 		}
284 		prwc->prwc_buflen = new_len;
285 	}
286 
287 	toread = MIN(prwc->prwc_buflen - prwc->prwc_curvalid,
288 	    prwc->prwc_uiop->uio_resid);
289 	ASSERT3U(toread, >=, needed - prwc->prwc_curvalid);
290 	start = (void *)((uintptr_t)prwc->prwc_buf + prwc->prwc_curvalid);
291 	if ((ret = uiomove(start, toread, UIO_WRITE, prwc->prwc_uiop)) != 0) {
292 		return (ret);
293 	}
294 
295 	prwc->prwc_curvalid += toread;
296 	return (0);
297 }
298 
299 static const proc_control_info_t *
300 prwritectl_cmd_identify(const prwritectl_t *prwc,
301     const proc_control_info_t *info, size_t ninfo, size_t cmdsize)
302 {
303 	long cmd;
304 
305 	ASSERT(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
306 	if (cmdsize == 4) {
307 		cmd = (long)*(int32_t *)prwc->prwc_buf;
308 	} else {
309 		cmd = *(long *)prwc->prwc_buf;
310 	}
311 
312 
313 	for (size_t i = 0; i < ninfo; i++) {
314 		if (info[i].pcs_cmd == cmd) {
315 			return (&info[i]);
316 		}
317 	}
318 
319 	return (NULL);
320 }
321 
322 /*
323  * Control operations (lots).
324  *
325  * Users can submit one or more commands to us in the uio_t. They are required
326  * to always be complete messages. The first one that fails will cause all
327  * subsequent things to fail. Processing this can be a little tricky as the
328  * actual data size that may be required is variable, not all structures are
329  * fixed sizes and some vary based on the instructing set (e.g. x86 vs.
330  * something else).
331  *
332  * The way that we handle process locking deserves some consideration. Prior to
333  * the colonization of prwritectl and the support for dynamic sizing of data,
334  * the logic would try to read in a large chunk of data and keep a process
335  * locked throughout that period and then unlock it before reading more data. As
336  * such, we mimic that logically and basically lock it before executing the
337  * first (or any subsequent) command and then only unlock it either when we're
338  * done entirely or we need to allocate memory or read from the process.
339  *
340  * This function is a common implementation for both the ILP32 and LP64 entry
341  * points as they are mostly the same except for the sizing and control function
342  * we call.
343  */
344 int
345 prwritectl_common(vnode_t *vp, uio_t *uiop, cred_t *cr,
346     const proc_control_info_t *proc_info, size_t ninfo, size_t cmdsize,
347     int (*pr_controlf)(long, void *, prnode_t *, cred_t *))
348 {
349 	int ret;
350 	prwritectl_t prwc;
351 
352 	VERIFY(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
353 
354 	bzero(&prwc, sizeof (prwc));
355 	prwc.prwc_pnp = VTOP(vp);
356 	prwc.prwc_uiop = uiop;
357 	prwc.prwc_need32 = (cmdsize == sizeof (int32_t));
358 
359 	/*
360 	 * We may have multiple commands to read and want to try to minimize the
361 	 * amount of reading that we do. Our callers expect us to have a
362 	 * contiguous buffer for a command's actual implementation. However, we
363 	 * must have at least a single long worth of data, otherwise it's not
364 	 * worth continuing.
365 	 */
366 	while (uiop->uio_resid > 0 || prwc.prwc_curvalid > 0) {
367 		const proc_control_info_t *proc_cmd;
368 		void *data;
369 
370 		/*
371 		 * Check if we have enough data to identify a command. If not,
372 		 * we read as much as we can in one gulp.
373 		 */
374 		if ((ret = prwritectl_readin(&prwc, cmdsize)) != 0) {
375 			goto out;
376 		}
377 
378 		/*
379 		 * Identify the command and figure out how how much data we
380 		 * should have read in the kernel. Some commands have a variable
381 		 * length and we need to make sure the minimum is met before
382 		 * asking how much there is in general. Most things know what
383 		 * the minimum length is and this pcs_minf() is not implemented.
384 		 * However things that are ISA-specific require us to ask that
385 		 * first.
386 		 *
387 		 * We also must be aware that there may not actually be enough
388 		 * data present in the uio_t.
389 		 */
390 		if ((proc_cmd = prwritectl_cmd_identify(&prwc, proc_info,
391 		    ninfo, cmdsize)) == NULL) {
392 			ret = EINVAL;
393 			goto out;
394 		}
395 
396 		size_t needed_data = cmdsize + proc_cmd->pcs_size;
397 		if (proc_cmd->pcs_minf != NULL) {
398 			size_t min;
399 
400 			if (!proc_cmd->pcs_minf(&min)) {
401 				ret = EINVAL;
402 				goto out;
403 			}
404 
405 			needed_data += min;
406 		}
407 
408 		if (proc_cmd->pcs_sizef != NULL) {
409 			size_t extra;
410 
411 			/*
412 			 * Make sure we have the minimum amount of data that
413 			 * they asked us to between the static and minf
414 			 * function.
415 			 */
416 			if ((ret = prwritectl_readin(&prwc, needed_data)) !=
417 			    0) {
418 				goto out;
419 			}
420 
421 			VERIFY3U(prwc.prwc_curvalid, >, cmdsize);
422 			data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
423 			if (!proc_cmd->pcs_sizef(data, &extra)) {
424 				ret = EINVAL;
425 				goto out;
426 			}
427 
428 			needed_data += extra;
429 		}
430 
431 		/*
432 		 * Now that we know how much data we're supposed to have,
433 		 * finally ensure we have the total amount we need.
434 		 */
435 		if ((ret = prwritectl_readin(&prwc, needed_data)) != 0) {
436 			goto out;
437 		}
438 
439 		/*
440 		 * /proc has traditionally assumed control writes come in
441 		 * multiples of a long. This is 4 bytes for ILP32 and 8 bytes
442 		 * for LP64. When calculating the required size for a structure,
443 		 * it would always round that up to the next long. However, the
444 		 * exact combination of circumstances changes with the
445 		 * introduction of the 64-bit kernel. For 64-bit processes we
446 		 * round up when the current command we're processing isn't the
447 		 * last one.
448 		 *
449 		 * Because of our tracking structures and caching we need to
450 		 * look beyond the uio_t to make this determination. In
451 		 * particular, the uio_t can have a zero resid, but we may still
452 		 * have additional data to read as indicated by prwc_curvalid
453 		 * exceeding the current command size. In the end, we must check
454 		 * both of these cases.
455 		 */
456 		if ((needed_data % cmdsize) != 0) {
457 			if (cmdsize == sizeof (int32_t) ||
458 			    prwc.prwc_curvalid > needed_data ||
459 			    prwc.prwc_uiop->uio_resid > 0) {
460 				needed_data = P2ROUNDUP(needed_data,
461 				    cmdsize);
462 				if ((ret = prwritectl_readin(&prwc,
463 				    needed_data)) != 0) {
464 					goto out;
465 				}
466 			}
467 		}
468 
469 		if (!prwc.prwc_locked) {
470 			ret = prlock(prwc.prwc_pnp, ZNO);
471 			if (ret != 0) {
472 				goto out;
473 			}
474 			prwc.prwc_locked = B_TRUE;
475 		}
476 
477 		/*
478 		 * Run our actual command. When there is an error, then the
479 		 * underlying pr_control call will have unlocked the prnode_t
480 		 * on our behalf. pr_control can return -1, which is a special
481 		 * error indicating a timeout occurred. In such a case the node
482 		 * is unlocked; however, that we are supposed to continue
483 		 * processing commands regardless.
484 		 *
485 		 * Finally, we must deal with with one actual wrinkle. The LP64
486 		 * based logic always guarantees that we have data that is
487 		 * 8-byte aligned. However, the ILP32 logic is 4-byte aligned
488 		 * and the rest of the /proc code assumes it can always
489 		 * dereference it. If we're not aligned, we have to bcopy it to
490 		 * a temporary buffer.
491 		 */
492 		data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
493 #ifdef	DEBUG
494 		if (cmdsize == sizeof (long)) {
495 			ASSERT0((uintptr_t)data % alignof (long));
496 		}
497 #endif
498 		if (prwc.prwc_need32 && ((uintptr_t)data % alignof (long)) !=
499 		    0 && needed_data > cmdsize) {
500 			bcopy(data, prwc.prwc_buf32, needed_data - cmdsize);
501 			data = prwc.prwc_buf32;
502 		}
503 		ret = pr_controlf(proc_cmd->pcs_cmd, data, prwc.prwc_pnp, cr);
504 		if (ret != 0) {
505 			prwc.prwc_locked = B_FALSE;
506 			if (ret > 0) {
507 				goto out;
508 			}
509 		}
510 
511 		/*
512 		 * Finally, now that we have processed this command, we need to
513 		 * move on. To make our life simple, we basically shift all the
514 		 * data in our buffer over to indicate it's been consumed. While
515 		 * a little wasteful, this simplifies buffer management and
516 		 * guarantees that command processing uses a semi-sanitized
517 		 * state. Visually, this is the following transformation:
518 		 *
519 		 *  0			20		prwc.prwc_curvalid
520 		 *   +------------------+----------------+
521 		 *   |   needed_data    | remaining_data |
522 		 *   +------------------+----------------+
523 		 *
524 		 * In the above example we are shifting all the data over by 20,
525 		 * so remaining data starts at 0. This leaves us needed_data
526 		 * bytes to clean up from what was valid.
527 		 */
528 		if (prwc.prwc_buf32 != NULL) {
529 			bzero(prwc.prwc_buf32, needed_data - cmdsize);
530 		}
531 
532 		if (prwc.prwc_curvalid > needed_data) {
533 			size_t save_size = prwc.prwc_curvalid - needed_data;
534 			void *first_save = (void *)((uintptr_t)prwc.prwc_buf +
535 			    needed_data);
536 			memmove(prwc.prwc_buf, first_save, save_size);
537 			void *first_zero = (void *)((uintptr_t)prwc.prwc_buf +
538 			    save_size);
539 			bzero(first_zero, needed_data);
540 		} else {
541 			bzero(prwc.prwc_buf, prwc.prwc_curvalid);
542 		}
543 		prwc.prwc_curvalid -= needed_data;
544 	}
545 
546 	/*
547 	 * We've managed to successfully process everything. We can actually say
548 	 * this was successful now.
549 	 */
550 	ret = 0;
551 
552 out:
553 	if (prwc.prwc_locked) {
554 		prunlock(prwc.prwc_pnp);
555 		prwc.prwc_locked = B_FALSE;
556 	}
557 
558 	if (prwc.prwc_buf != NULL) {
559 		kmem_free(prwc.prwc_buf, prwc.prwc_buflen);
560 	}
561 
562 	if (prwc.prwc_buf32 != NULL) {
563 		VERIFY(prwc.prwc_need32);
564 		kmem_free(prwc.prwc_buf32, prwc.prwc_buflen);
565 	}
566 
567 	return (ret);
568 }
569 
570 static int
571 pr_control(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
572 {
573 	prcommon_t *pcp;
574 	proc_t *p;
575 	int unlocked;
576 	int error = 0;
577 	arg_t *argp = generic;
578 
579 	if (cmd == PCNULL)
580 		return (0);
581 
582 	pcp = pnp->pr_common;
583 	p = pcp->prc_proc;
584 	ASSERT(p != NULL);
585 
586 	/* System processes defy control. */
587 	if (p->p_flag & SSYS) {
588 		prunlock(pnp);
589 		return (EBUSY);
590 	}
591 
592 	switch (cmd) {
593 
594 	default:
595 		error = EINVAL;
596 		break;
597 
598 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
599 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
600 	case PCWSTOP:	/* wait for process or lwp to stop */
601 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
602 		{
603 			time_t timeo;
604 
605 			/*
606 			 * Can't apply to a system process.
607 			 */
608 			if (p->p_as == &kas) {
609 				error = EBUSY;
610 				break;
611 			}
612 
613 			if (cmd == PCSTOP || cmd == PCDSTOP)
614 				pr_stop(pnp);
615 
616 			if (cmd == PCDSTOP)
617 				break;
618 
619 			/*
620 			 * If an lwp is waiting for itself or its process,
621 			 * don't wait. The stopped lwp would never see the
622 			 * fact that it is stopped.
623 			 */
624 			if ((pcp->prc_flags & PRC_LWP)?
625 			    (pcp->prc_thread == curthread) : (p == curproc)) {
626 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
627 					error = EBUSY;
628 				break;
629 			}
630 
631 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
632 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
633 				return (error);
634 
635 			break;
636 		}
637 
638 	case PCRUN:	/* make lwp or process runnable */
639 		error = pr_setrun(pnp, argp->flags);
640 		break;
641 
642 	case PCSTRACE:	/* set signal trace mask */
643 		pr_settrace(p,  &argp->sigset);
644 		break;
645 
646 	case PCSSIG:	/* set current signal */
647 		error = pr_setsig(pnp, &argp->siginfo);
648 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
649 			prunlock(pnp);
650 			pr_wait_die(pnp);
651 			return (-1);
652 		}
653 		break;
654 
655 	case PCKILL:	/* send signal */
656 		error = pr_kill(pnp, (int)argp->sig, cr);
657 		if (error == 0 && argp->sig == SIGKILL) {
658 			prunlock(pnp);
659 			pr_wait_die(pnp);
660 			return (-1);
661 		}
662 		break;
663 
664 	case PCUNKILL:	/* delete a pending signal */
665 		error = pr_unkill(pnp, (int)argp->sig);
666 		break;
667 
668 	case PCNICE:	/* set nice priority */
669 		error = pr_nice(p, (int)argp->nice, cr);
670 		break;
671 
672 	case PCSENTRY:	/* set syscall entry bit mask */
673 	case PCSEXIT:	/* set syscall exit bit mask */
674 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
675 		break;
676 
677 	case PCSET:	/* set process flags */
678 		error = pr_set(p, argp->flags);
679 		break;
680 
681 	case PCUNSET:	/* unset process flags */
682 		error = pr_unset(p, argp->flags);
683 		break;
684 
685 	case PCSREG:	/* set general registers */
686 		{
687 			kthread_t *t = pr_thread(pnp);
688 
689 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
690 				thread_unlock(t);
691 				error = EBUSY;
692 			} else {
693 				thread_unlock(t);
694 				mutex_exit(&p->p_lock);
695 				prsetprregs(ttolwp(t), argp->prgregset, 0);
696 				mutex_enter(&p->p_lock);
697 			}
698 			break;
699 		}
700 
701 	case PCSFPREG:	/* set floating-point registers */
702 		error = pr_setfpregs(pnp, &argp->prfpregset);
703 		break;
704 
705 	case PCSXREG:	/* set extra registers */
706 		error = pr_setxregs(pnp, (prxregset_t *)argp);
707 		break;
708 
709 	case PCSVADDR:	/* set virtual address at which to resume */
710 		error = pr_setvaddr(pnp, argp->vaddr);
711 		break;
712 
713 	case PCSHOLD:	/* set signal-hold mask */
714 		pr_sethold(pnp, &argp->sigset);
715 		break;
716 
717 	case PCSFAULT:	/* set mask of traced faults */
718 		pr_setfault(p, &argp->fltset);
719 		break;
720 
721 	case PCCSIG:	/* clear current signal */
722 		error = pr_clearsig(pnp);
723 		break;
724 
725 	case PCCFAULT:	/* clear current fault */
726 		error = pr_clearflt(pnp);
727 		break;
728 
729 	case PCWATCH:	/* set or clear watched areas */
730 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
731 		if (error && unlocked)
732 			return (error);
733 		break;
734 
735 	case PCAGENT:	/* create the /proc agent lwp in the target process */
736 		error = pr_agent(pnp, argp->prgregset, &unlocked);
737 		if (error && unlocked)
738 			return (error);
739 		break;
740 
741 	case PCREAD:	/* read from the address space */
742 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
743 		break;
744 
745 	case PCWRITE:	/* write to the address space */
746 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
747 		break;
748 
749 	case PCSCRED:	/* set the process credentials */
750 	case PCSCREDX:
751 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
752 		break;
753 
754 	case PCSPRIV:	/* set the process privileges */
755 		error = pr_spriv(p, &argp->prpriv, cr);
756 		break;
757 	case PCSZONE:	/* set the process's zoneid credentials */
758 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
759 		break;
760 	}
761 
762 	if (error)
763 		prunlock(pnp);
764 	return (error);
765 }
766 
767 int
768 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
769 {
770 	return (prwritectl_common(vp, uiop, cr, proc_ctl_info,
771 	    ARRAY_SIZE(proc_ctl_info), sizeof (long), pr_control));
772 }
773 
774 #ifdef _SYSCALL32_IMPL
775 
776 typedef union {
777 	int32_t		sig;		/* PCKILL, PCUNKILL */
778 	int32_t		nice;		/* PCNICE */
779 	int32_t		timeo;		/* PCTWSTOP */
780 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
781 	caddr32_t	vaddr;		/* PCSVADDR */
782 	siginfo32_t	siginfo;	/* PCSSIG */
783 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
784 	fltset_t	fltset;		/* PCSFAULT */
785 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
786 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
787 	prfpregset32_t	prfpregset;	/* PCSFPREG */
788 	prwatch32_t	prwatch;	/* PCWATCH */
789 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
790 	prcred32_t	prcred;		/* PCSCRED */
791 	prpriv_t	prpriv;		/* PCSPRIV */
792 	int32_t		przoneid;	/* PCSZONE */
793 } arg32_t;
794 
795 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
796 
797 static boolean_t
798 prwritectl_pcscredx32_sizef(const void *datap, size_t *sizep)
799 {
800 	const prcred32_t *cred = datap;
801 
802 	if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
803 		return (B_FALSE);
804 	}
805 
806 	if (cred->pr_ngroups == 0) {
807 		*sizep = 0;
808 	} else {
809 		*sizep = (cred->pr_ngroups - 1) * sizeof (gid32_t);
810 	}
811 	return (B_TRUE);
812 }
813 
814 /*
815  * When dealing with ILP32 code, we are not at a point where we can assume
816  * 64-bit aligned data. Any functions that are operating here must be aware of
817  * that.
818  */
819 static const proc_control_info_t proc_ctl_info32[] = {
820 	{ PCNULL,	0,			NULL,		NULL },
821 	{ PCSTOP,	0,			NULL,		NULL },
822 	{ PCDSTOP,	0,			NULL,		NULL },
823 	{ PCWSTOP,	0,			NULL,		NULL },
824 	{ PCCSIG,	0,			NULL,		NULL },
825 	{ PCCFAULT,	0,			NULL,		NULL },
826 	{ PCSSIG,	sizeof (siginfo32_t),	NULL,		NULL },
827 	{ PCTWSTOP,	sizeof (int32_t),	NULL,		NULL },
828 	{ PCKILL,	sizeof (int32_t),	NULL,		NULL },
829 	{ PCUNKILL,	sizeof (int32_t),	NULL,		NULL },
830 	{ PCNICE,	sizeof (int32_t),	NULL,		NULL },
831 	{ PCRUN,	sizeof (uint32_t),	NULL,		NULL },
832 	{ PCSET,	sizeof (uint32_t),	NULL,		NULL },
833 	{ PCUNSET,	sizeof (uint32_t),	NULL,		NULL },
834 	{ PCSVADDR,	sizeof (caddr32_t),	NULL,		NULL },
835 	{ PCSTRACE,	sizeof (sigset_t),	NULL,		NULL },
836 	{ PCSHOLD,	sizeof (sigset_t),	NULL,		NULL },
837 	{ PCSFAULT,	sizeof (fltset_t),	NULL,		NULL },
838 	{ PCSENTRY,	sizeof (sysset_t),	NULL,		NULL },
839 	{ PCSEXIT,	sizeof (sysset_t),	NULL,		NULL },
840 	{ PCSREG,	sizeof (prgregset32_t),	NULL,		NULL },
841 	{ PCAGENT,	sizeof (prgregset32_t),	NULL,		NULL },
842 	{ PCSFPREG,	sizeof (prfpregset32_t), NULL,		NULL },
843 	{ PCSXREG,	0,			prwriteminxreg,
844 	    prwritesizexreg },
845 	{ PCWATCH,	sizeof (prwatch32_t),	NULL,		NULL },
846 	{ PCREAD,	sizeof (priovec32_t),	NULL,		NULL },
847 	{ PCWRITE,	sizeof (priovec32_t),	NULL,		NULL },
848 	{ PCSCRED,	sizeof (prcred32_t),	NULL,		NULL },
849 	{ PCSCREDX,	sizeof (prcred32_t),	NULL,
850 	    prwritectl_pcscredx32_sizef },
851 	{ PCSPRIV,	sizeof (prpriv_t),	NULL,
852 	    prwritectl_pcspriv_sizef },
853 	{ PCSZONE,	sizeof (long),		NULL,		NULL },
854 };
855 
856 static int
857 pr_control32(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
858 {
859 	prcommon_t *pcp;
860 	proc_t *p;
861 	int unlocked;
862 	int error = 0;
863 	arg32_t *argp = generic;
864 
865 	if (cmd == PCNULL)
866 		return (0);
867 
868 	pcp = pnp->pr_common;
869 	p = pcp->prc_proc;
870 	ASSERT(p != NULL);
871 
872 	if (p->p_flag & SSYS) {
873 		prunlock(pnp);
874 		return (EBUSY);
875 	}
876 
877 	switch (cmd) {
878 
879 	default:
880 		error = EINVAL;
881 		break;
882 
883 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
884 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
885 	case PCWSTOP:	/* wait for process or lwp to stop */
886 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
887 		{
888 			time_t timeo;
889 
890 			/*
891 			 * Can't apply to a system process.
892 			 */
893 			if (p->p_as == &kas) {
894 				error = EBUSY;
895 				break;
896 			}
897 
898 			if (cmd == PCSTOP || cmd == PCDSTOP)
899 				pr_stop(pnp);
900 
901 			if (cmd == PCDSTOP)
902 				break;
903 
904 			/*
905 			 * If an lwp is waiting for itself or its process,
906 			 * don't wait. The lwp will never see the fact that
907 			 * itself is stopped.
908 			 */
909 			if ((pcp->prc_flags & PRC_LWP)?
910 			    (pcp->prc_thread == curthread) : (p == curproc)) {
911 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
912 					error = EBUSY;
913 				break;
914 			}
915 
916 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
917 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
918 				return (error);
919 
920 			break;
921 		}
922 
923 	case PCRUN:	/* make lwp or process runnable */
924 		error = pr_setrun(pnp, (ulong_t)argp->flags);
925 		break;
926 
927 	case PCSTRACE:	/* set signal trace mask */
928 		pr_settrace(p,  &argp->sigset);
929 		break;
930 
931 	case PCSSIG:	/* set current signal */
932 		if (PROCESS_NOT_32BIT(p))
933 			error = EOVERFLOW;
934 		else {
935 			int sig = (int)argp->siginfo.si_signo;
936 			siginfo_t siginfo;
937 
938 			bzero(&siginfo, sizeof (siginfo));
939 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
940 			error = pr_setsig(pnp, &siginfo);
941 			if (sig == SIGKILL && error == 0) {
942 				prunlock(pnp);
943 				pr_wait_die(pnp);
944 				return (-1);
945 			}
946 		}
947 		break;
948 
949 	case PCKILL:	/* send signal */
950 		error = pr_kill(pnp, (int)argp->sig, cr);
951 		if (error == 0 && argp->sig == SIGKILL) {
952 			prunlock(pnp);
953 			pr_wait_die(pnp);
954 			return (-1);
955 		}
956 		break;
957 
958 	case PCUNKILL:	/* delete a pending signal */
959 		error = pr_unkill(pnp, (int)argp->sig);
960 		break;
961 
962 	case PCNICE:	/* set nice priority */
963 		error = pr_nice(p, (int)argp->nice, cr);
964 		break;
965 
966 	case PCSENTRY:	/* set syscall entry bit mask */
967 	case PCSEXIT:	/* set syscall exit bit mask */
968 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
969 		break;
970 
971 	case PCSET:	/* set process flags */
972 		error = pr_set(p, (long)argp->flags);
973 		break;
974 
975 	case PCUNSET:	/* unset process flags */
976 		error = pr_unset(p, (long)argp->flags);
977 		break;
978 
979 	case PCSREG:	/* set general registers */
980 		if (PROCESS_NOT_32BIT(p))
981 			error = EOVERFLOW;
982 		else {
983 			kthread_t *t = pr_thread(pnp);
984 
985 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
986 				thread_unlock(t);
987 				error = EBUSY;
988 			} else {
989 				prgregset_t prgregset;
990 				klwp_t *lwp = ttolwp(t);
991 
992 				thread_unlock(t);
993 				mutex_exit(&p->p_lock);
994 				prgregset_32ton(lwp, argp->prgregset,
995 				    prgregset);
996 				prsetprregs(lwp, prgregset, 0);
997 				mutex_enter(&p->p_lock);
998 			}
999 		}
1000 		break;
1001 
1002 	case PCSFPREG:	/* set floating-point registers */
1003 		if (PROCESS_NOT_32BIT(p))
1004 			error = EOVERFLOW;
1005 		else
1006 			error = pr_setfpregs32(pnp, &argp->prfpregset);
1007 		break;
1008 
1009 	case PCSXREG:	/* set extra registers */
1010 		if (PROCESS_NOT_32BIT(p))
1011 			error = EOVERFLOW;
1012 		else
1013 			error = pr_setxregs(pnp, (prxregset_t *)argp);
1014 		break;
1015 
1016 	case PCSVADDR:	/* set virtual address at which to resume */
1017 		if (PROCESS_NOT_32BIT(p))
1018 			error = EOVERFLOW;
1019 		else
1020 			error = pr_setvaddr(pnp,
1021 			    (caddr_t)(uintptr_t)argp->vaddr);
1022 		break;
1023 
1024 	case PCSHOLD:	/* set signal-hold mask */
1025 		pr_sethold(pnp, &argp->sigset);
1026 		break;
1027 
1028 	case PCSFAULT:	/* set mask of traced faults */
1029 		pr_setfault(p, &argp->fltset);
1030 		break;
1031 
1032 	case PCCSIG:	/* clear current signal */
1033 		error = pr_clearsig(pnp);
1034 		break;
1035 
1036 	case PCCFAULT:	/* clear current fault */
1037 		error = pr_clearflt(pnp);
1038 		break;
1039 
1040 	case PCWATCH:	/* set or clear watched areas */
1041 		if (PROCESS_NOT_32BIT(p))
1042 			error = EOVERFLOW;
1043 		else {
1044 			prwatch_t prwatch;
1045 
1046 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
1047 			prwatch.pr_size = argp->prwatch.pr_size;
1048 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
1049 			prwatch.pr_pad = argp->prwatch.pr_pad;
1050 			error = pr_watch(pnp, &prwatch, &unlocked);
1051 			if (error && unlocked)
1052 				return (error);
1053 		}
1054 		break;
1055 
1056 	case PCAGENT:	/* create the /proc agent lwp in the target process */
1057 		if (PROCESS_NOT_32BIT(p))
1058 			error = EOVERFLOW;
1059 		else {
1060 			prgregset_t prgregset;
1061 			kthread_t *t = pr_thread(pnp);
1062 			klwp_t *lwp = ttolwp(t);
1063 			thread_unlock(t);
1064 			mutex_exit(&p->p_lock);
1065 			prgregset_32ton(lwp, argp->prgregset, prgregset);
1066 			mutex_enter(&p->p_lock);
1067 			error = pr_agent(pnp, prgregset, &unlocked);
1068 			if (error && unlocked)
1069 				return (error);
1070 		}
1071 		break;
1072 
1073 	case PCREAD:	/* read from the address space */
1074 	case PCWRITE:	/* write to the address space */
1075 		if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
1076 			error = EOVERFLOW;
1077 		else {
1078 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
1079 			priovec_t priovec;
1080 
1081 			priovec.pio_base =
1082 			    (void *)(uintptr_t)argp->priovec.pio_base;
1083 			priovec.pio_len = (size_t)argp->priovec.pio_len;
1084 			priovec.pio_offset = (off_t)
1085 			    (uint32_t)argp->priovec.pio_offset;
1086 			error = pr_rdwr(p, rw, &priovec);
1087 		}
1088 		break;
1089 
1090 	case PCSCRED:	/* set the process credentials */
1091 	case PCSCREDX:
1092 		{
1093 			/*
1094 			 * All the fields in these structures are exactly the
1095 			 * same and so the structures are compatible.  In case
1096 			 * this ever changes, we catch this with the ASSERT
1097 			 * below.
1098 			 */
1099 			prcred_t *prcred = (prcred_t *)&argp->prcred;
1100 
1101 #ifndef __lint
1102 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
1103 #endif
1104 
1105 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
1106 			break;
1107 		}
1108 
1109 	case PCSPRIV:	/* set the process privileges */
1110 		error = pr_spriv(p, &argp->prpriv, cr);
1111 		break;
1112 
1113 	case PCSZONE:	/* set the process's zoneid */
1114 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
1115 		break;
1116 	}
1117 
1118 	if (error)
1119 		prunlock(pnp);
1120 	return (error);
1121 }
1122 
1123 int
1124 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
1125 {
1126 	return (prwritectl_common(vp, uiop, cr, proc_ctl_info32,
1127 	    ARRAY_SIZE(proc_ctl_info32), sizeof (int32_t), pr_control32));
1128 }
1129 #endif	/* _SYSCALL32_IMPL */
1130 
1131 /*
1132  * Return the specific or chosen thread/lwp for a control operation.
1133  * Returns with the thread locked via thread_lock(t).
1134  */
1135 kthread_t *
1136 pr_thread(prnode_t *pnp)
1137 {
1138 	prcommon_t *pcp = pnp->pr_common;
1139 	kthread_t *t;
1140 
1141 	if (pcp->prc_flags & PRC_LWP) {
1142 		t = pcp->prc_thread;
1143 		ASSERT(t != NULL);
1144 		thread_lock(t);
1145 	} else {
1146 		proc_t *p = pcp->prc_proc;
1147 		t = prchoose(p);	/* returns locked thread */
1148 		ASSERT(t != NULL);
1149 	}
1150 
1151 	return (t);
1152 }
1153 
1154 /*
1155  * Direct the process or lwp to stop.
1156  */
1157 void
1158 pr_stop(prnode_t *pnp)
1159 {
1160 	prcommon_t *pcp = pnp->pr_common;
1161 	proc_t *p = pcp->prc_proc;
1162 	kthread_t *t;
1163 	vnode_t *vp;
1164 
1165 	/*
1166 	 * If already stopped, do nothing; otherwise flag
1167 	 * it to be stopped the next time it tries to run.
1168 	 * If sleeping at interruptible priority, set it
1169 	 * running so it will stop within cv_wait_sig().
1170 	 *
1171 	 * Take care to cooperate with jobcontrol: if an lwp
1172 	 * is stopped due to the default action of a jobcontrol
1173 	 * stop signal, flag it to be stopped the next time it
1174 	 * starts due to a SIGCONT signal.
1175 	 */
1176 	if (pcp->prc_flags & PRC_LWP)
1177 		t = pcp->prc_thread;
1178 	else
1179 		t = p->p_tlist;
1180 	ASSERT(t != NULL);
1181 
1182 	do {
1183 		int notify;
1184 
1185 		notify = 0;
1186 		thread_lock(t);
1187 		if (!ISTOPPED(t)) {
1188 			t->t_proc_flag |= TP_PRSTOP;
1189 			t->t_sig_check = 1;	/* do ISSIG */
1190 		}
1191 
1192 		/* Move the thread from wait queue to run queue */
1193 		if (ISWAITING(t))
1194 			setrun_locked(t);
1195 
1196 		if (ISWAKEABLE(t)) {
1197 			if (t->t_wchan0 == NULL)
1198 				setrun_locked(t);
1199 			else if (!VSTOPPED(t)) {
1200 				/*
1201 				 * Mark it virtually stopped.
1202 				 */
1203 				t->t_proc_flag |= TP_PRVSTOP;
1204 				notify = 1;
1205 			}
1206 		}
1207 		/*
1208 		 * force the thread into the kernel
1209 		 * if it is not already there.
1210 		 */
1211 		prpokethread(t);
1212 		thread_unlock(t);
1213 		if (notify &&
1214 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1215 			prnotify(vp);
1216 		if (pcp->prc_flags & PRC_LWP)
1217 			break;
1218 	} while ((t = t->t_forw) != p->p_tlist);
1219 
1220 	/*
1221 	 * We do this just in case the thread we asked
1222 	 * to stop is in holdlwps() (called from cfork()).
1223 	 */
1224 	cv_broadcast(&p->p_holdlwps);
1225 }
1226 
1227 /*
1228  * Sleep until the lwp stops, but cooperate with
1229  * jobcontrol:  Don't wake up if the lwp is stopped
1230  * due to the default action of a jobcontrol stop signal.
1231  * If this is the process file descriptor, sleep
1232  * until all of the process's lwps stop.
1233  */
1234 int
1235 pr_wait_stop(prnode_t *pnp, time_t timeo)
1236 {
1237 	prcommon_t *pcp = pnp->pr_common;
1238 	proc_t *p = pcp->prc_proc;
1239 	timestruc_t rqtime;
1240 	timestruc_t *rqtp = NULL;
1241 	int timecheck = 0;
1242 	kthread_t *t;
1243 	int error;
1244 
1245 	if (timeo > 0) {	/* millisecond timeout */
1246 		/*
1247 		 * Determine the precise future time of the requested timeout.
1248 		 */
1249 		timestruc_t now;
1250 
1251 		timecheck = timechanged;
1252 		gethrestime(&now);
1253 		rqtp = &rqtime;
1254 		rqtp->tv_sec = timeo / MILLISEC;
1255 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1256 		timespecadd(rqtp, &now);
1257 	}
1258 
1259 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1260 		t = pcp->prc_thread;
1261 		ASSERT(t != NULL);
1262 		thread_lock(t);
1263 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1264 			thread_unlock(t);
1265 			mutex_enter(&pcp->prc_mutex);
1266 			prunlock(pnp);
1267 			error = pr_wait(pcp, rqtp, timecheck);
1268 			if (error)	/* -1 is timeout */
1269 				return (error);
1270 			if ((error = prlock(pnp, ZNO)) != 0)
1271 				return (error);
1272 			ASSERT(p == pcp->prc_proc);
1273 			ASSERT(t == pcp->prc_thread);
1274 			thread_lock(t);
1275 		}
1276 		thread_unlock(t);
1277 	} else {			/* process file descriptor */
1278 		t = prchoose(p);	/* returns locked thread */
1279 		ASSERT(t != NULL);
1280 		ASSERT(MUTEX_HELD(&p->p_lock));
1281 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1282 		    (p->p_flag & SEXITLWPS)) {
1283 			thread_unlock(t);
1284 			mutex_enter(&pcp->prc_mutex);
1285 			prunlock(pnp);
1286 			error = pr_wait(pcp, rqtp, timecheck);
1287 			if (error)	/* -1 is timeout */
1288 				return (error);
1289 			if ((error = prlock(pnp, ZNO)) != 0)
1290 				return (error);
1291 			ASSERT(p == pcp->prc_proc);
1292 			t = prchoose(p);	/* returns locked t */
1293 			ASSERT(t != NULL);
1294 		}
1295 		thread_unlock(t);
1296 	}
1297 
1298 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1299 	    t != NULL && t->t_state != TS_ZOMB);
1300 
1301 	return (0);
1302 }
1303 
1304 int
1305 pr_setrun(prnode_t *pnp, ulong_t flags)
1306 {
1307 	prcommon_t *pcp = pnp->pr_common;
1308 	proc_t *p = pcp->prc_proc;
1309 	kthread_t *t;
1310 	klwp_t *lwp;
1311 
1312 	/*
1313 	 * Cannot set an lwp running if it is not stopped.
1314 	 * Also, no lwp other than the /proc agent lwp can
1315 	 * be set running so long as the /proc agent lwp exists.
1316 	 */
1317 	t = pr_thread(pnp);	/* returns locked thread */
1318 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1319 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1320 	    (p->p_agenttp != NULL &&
1321 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1322 		thread_unlock(t);
1323 		return (EBUSY);
1324 	}
1325 	thread_unlock(t);
1326 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1327 		return (EINVAL);
1328 	lwp = ttolwp(t);
1329 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1330 		/*
1331 		 * Discard current siginfo_t, if any.
1332 		 */
1333 		lwp->lwp_cursig = 0;
1334 		lwp->lwp_extsig = 0;
1335 		if (lwp->lwp_curinfo) {
1336 			siginfofree(lwp->lwp_curinfo);
1337 			lwp->lwp_curinfo = NULL;
1338 		}
1339 	}
1340 	if (flags & PRCFAULT)
1341 		lwp->lwp_curflt = 0;
1342 	/*
1343 	 * We can't hold p->p_lock when we touch the lwp's registers.
1344 	 * It may be swapped out and we will get a page fault.
1345 	 */
1346 	if (flags & PRSTEP) {
1347 		mutex_exit(&p->p_lock);
1348 		prstep(lwp, 0);
1349 		mutex_enter(&p->p_lock);
1350 	}
1351 	if (flags & PRSTOP) {
1352 		t->t_proc_flag |= TP_PRSTOP;
1353 		t->t_sig_check = 1;	/* do ISSIG */
1354 	}
1355 	if (flags & PRSABORT)
1356 		lwp->lwp_sysabort = 1;
1357 	thread_lock(t);
1358 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1359 		/*
1360 		 * Here, we are dealing with a single lwp.
1361 		 */
1362 		if (ISTOPPED(t)) {
1363 			t->t_schedflag |= TS_PSTART;
1364 			t->t_dtrace_stop = 0;
1365 			setrun_locked(t);
1366 		} else if (flags & PRSABORT) {
1367 			t->t_proc_flag &=
1368 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1369 			setrun_locked(t);
1370 		} else if (!(flags & PRSTOP)) {
1371 			t->t_proc_flag &=
1372 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1373 		}
1374 		thread_unlock(t);
1375 	} else {
1376 		/*
1377 		 * Here, we are dealing with the whole process.
1378 		 */
1379 		if (ISTOPPED(t)) {
1380 			/*
1381 			 * The representative lwp is stopped on an event
1382 			 * of interest.  We demote it to PR_REQUESTED and
1383 			 * choose another representative lwp.  If the new
1384 			 * representative lwp is not stopped on an event of
1385 			 * interest (other than PR_REQUESTED), we set the
1386 			 * whole process running, else we leave the process
1387 			 * stopped showing the next event of interest.
1388 			 */
1389 			kthread_t *tx = NULL;
1390 
1391 			if (!(flags & PRSABORT) &&
1392 			    t->t_whystop == PR_SYSENTRY &&
1393 			    t->t_whatstop == SYS_lwp_exit)
1394 				tx = t;		/* remember the exiting lwp */
1395 			t->t_whystop = PR_REQUESTED;
1396 			t->t_whatstop = 0;
1397 			thread_unlock(t);
1398 			t = prchoose(p);	/* returns locked t */
1399 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1400 			if (VSTOPPED(t) ||
1401 			    t->t_whystop == PR_REQUESTED) {
1402 				thread_unlock(t);
1403 				allsetrun(p);
1404 			} else {
1405 				thread_unlock(t);
1406 				/*
1407 				 * As a special case, if the old representative
1408 				 * lwp was stopped on entry to _lwp_exit()
1409 				 * (and we are not aborting the system call),
1410 				 * we set the old representative lwp running.
1411 				 * We do this so that the next process stop
1412 				 * will find the exiting lwp gone.
1413 				 */
1414 				if (tx != NULL) {
1415 					thread_lock(tx);
1416 					tx->t_schedflag |= TS_PSTART;
1417 					t->t_dtrace_stop = 0;
1418 					setrun_locked(tx);
1419 					thread_unlock(tx);
1420 				}
1421 			}
1422 		} else {
1423 			/*
1424 			 * No event of interest; set all of the lwps running.
1425 			 */
1426 			if (flags & PRSABORT) {
1427 				t->t_proc_flag &=
1428 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1429 				setrun_locked(t);
1430 			}
1431 			thread_unlock(t);
1432 			allsetrun(p);
1433 		}
1434 	}
1435 	return (0);
1436 }
1437 
1438 /*
1439  * Wait until process/lwp stops or until timer expires.
1440  * Return EINTR for an interruption, -1 for timeout, else 0.
1441  */
1442 int
1443 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1444     timestruc_t *ts,		/* absolute time of timeout, if any */
1445     int timecheck)
1446 {
1447 	int rval;
1448 
1449 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1450 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1451 	mutex_exit(&pcp->prc_mutex);
1452 	switch (rval) {
1453 	case 0:
1454 		return (EINTR);
1455 	case -1:
1456 		return (-1);
1457 	default:
1458 		return (0);
1459 	}
1460 }
1461 
1462 /*
1463  * Make all threads in the process runnable.
1464  */
1465 void
1466 allsetrun(proc_t *p)
1467 {
1468 	kthread_t *t;
1469 
1470 	ASSERT(MUTEX_HELD(&p->p_lock));
1471 
1472 	if ((t = p->p_tlist) != NULL) {
1473 		do {
1474 			thread_lock(t);
1475 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1476 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1477 			if (ISTOPPED(t)) {
1478 				t->t_schedflag |= TS_PSTART;
1479 				t->t_dtrace_stop = 0;
1480 				setrun_locked(t);
1481 			}
1482 			thread_unlock(t);
1483 		} while ((t = t->t_forw) != p->p_tlist);
1484 	}
1485 }
1486 
1487 /*
1488  * Wait for the process to die.
1489  * We do this after sending SIGKILL because we know it will
1490  * die soon and we want subsequent operations to return ENOENT.
1491  */
1492 void
1493 pr_wait_die(prnode_t *pnp)
1494 {
1495 	proc_t *p;
1496 
1497 	mutex_enter(&pidlock);
1498 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1499 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1500 			break;
1501 	}
1502 	mutex_exit(&pidlock);
1503 }
1504 
1505 static void
1506 pr_settrace(proc_t *p, sigset_t *sp)
1507 {
1508 	prdelset(sp, SIGKILL);
1509 	prassignset(&p->p_sigmask, sp);
1510 	if (!sigisempty(&p->p_sigmask))
1511 		p->p_proc_flag |= P_PR_TRACE;
1512 	else if (prisempty(&p->p_fltmask)) {
1513 		user_t *up = PTOU(p);
1514 		if (up->u_systrap == 0)
1515 			p->p_proc_flag &= ~P_PR_TRACE;
1516 	}
1517 }
1518 
1519 int
1520 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1521 {
1522 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1523 	int sig = sip->si_signo;
1524 	prcommon_t *pcp = pnp->pr_common;
1525 	proc_t *p = pcp->prc_proc;
1526 	kthread_t *t;
1527 	klwp_t *lwp;
1528 	int error = 0;
1529 
1530 	t = pr_thread(pnp);	/* returns locked thread */
1531 	thread_unlock(t);
1532 	lwp = ttolwp(t);
1533 	if (sig < 0 || sig >= nsig)
1534 		/* Zero allowed here */
1535 		error = EINVAL;
1536 	else if (lwp->lwp_cursig == SIGKILL)
1537 		/* "can't happen", but just in case */
1538 		error = EBUSY;
1539 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1540 		lwp->lwp_extsig = 0;
1541 		/*
1542 		 * Discard current siginfo_t, if any.
1543 		 */
1544 		if (lwp->lwp_curinfo) {
1545 			siginfofree(lwp->lwp_curinfo);
1546 			lwp->lwp_curinfo = NULL;
1547 		}
1548 	} else {
1549 		kthread_t *tx;
1550 		sigqueue_t *sqp;
1551 
1552 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1553 		mutex_exit(&p->p_lock);
1554 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1555 		mutex_enter(&p->p_lock);
1556 
1557 		if (lwp->lwp_curinfo == NULL)
1558 			lwp->lwp_curinfo = sqp;
1559 		else
1560 			kmem_free(sqp, sizeof (sigqueue_t));
1561 		/*
1562 		 * Copy contents of info to current siginfo_t.
1563 		 */
1564 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1565 		    sizeof (lwp->lwp_curinfo->sq_info));
1566 		/*
1567 		 * Prevent contents published by si_zoneid-unaware /proc
1568 		 * consumers from being incorrectly filtered.  Because
1569 		 * an uninitialized si_zoneid is the same as
1570 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1571 		 * process in a non-global zone with a siginfo which
1572 		 * appears to come from the global zone.
1573 		 */
1574 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1575 			lwp->lwp_curinfo->sq_info.si_zoneid =
1576 			    p->p_zone->zone_id;
1577 		/*
1578 		 * Side-effects for SIGKILL and jobcontrol signals.
1579 		 */
1580 		if (sig == SIGKILL) {
1581 			p->p_flag |= SKILLED;
1582 			p->p_flag &= ~SEXTKILLED;
1583 		} else if (sig == SIGCONT) {
1584 			p->p_flag |= SSCONT;
1585 			sigdelq(p, NULL, SIGSTOP);
1586 			sigdelq(p, NULL, SIGTSTP);
1587 			sigdelq(p, NULL, SIGTTOU);
1588 			sigdelq(p, NULL, SIGTTIN);
1589 			sigdiffset(&p->p_sig, &stopdefault);
1590 			sigdiffset(&p->p_extsig, &stopdefault);
1591 			if ((tx = p->p_tlist) != NULL) {
1592 				do {
1593 					sigdelq(p, tx, SIGSTOP);
1594 					sigdelq(p, tx, SIGTSTP);
1595 					sigdelq(p, tx, SIGTTOU);
1596 					sigdelq(p, tx, SIGTTIN);
1597 					sigdiffset(&tx->t_sig, &stopdefault);
1598 					sigdiffset(&tx->t_extsig, &stopdefault);
1599 				} while ((tx = tx->t_forw) != p->p_tlist);
1600 			}
1601 		} else if (sigismember(&stopdefault, sig)) {
1602 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1603 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1604 				p->p_flag &= ~SSCONT;
1605 			sigdelq(p, NULL, SIGCONT);
1606 			sigdelset(&p->p_sig, SIGCONT);
1607 			sigdelset(&p->p_extsig, SIGCONT);
1608 			if ((tx = p->p_tlist) != NULL) {
1609 				do {
1610 					sigdelq(p, tx, SIGCONT);
1611 					sigdelset(&tx->t_sig, SIGCONT);
1612 					sigdelset(&tx->t_extsig, SIGCONT);
1613 				} while ((tx = tx->t_forw) != p->p_tlist);
1614 			}
1615 		}
1616 		thread_lock(t);
1617 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1618 			/* Set signaled sleeping/waiting lwp running */
1619 			setrun_locked(t);
1620 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1621 			/* If SIGKILL, set stopped lwp running */
1622 			p->p_stopsig = 0;
1623 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1624 			t->t_dtrace_stop = 0;
1625 			setrun_locked(t);
1626 		}
1627 		t->t_sig_check = 1;	/* so ISSIG will be done */
1628 		thread_unlock(t);
1629 		/*
1630 		 * More jobcontrol side-effects.
1631 		 */
1632 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1633 			p->p_stopsig = 0;
1634 			do {
1635 				thread_lock(tx);
1636 				if (tx->t_state == TS_STOPPED &&
1637 				    tx->t_whystop == PR_JOBCONTROL) {
1638 					tx->t_schedflag |= TS_XSTART;
1639 					setrun_locked(tx);
1640 				}
1641 				thread_unlock(tx);
1642 			} while ((tx = tx->t_forw) != p->p_tlist);
1643 		}
1644 	}
1645 	return (error);
1646 }
1647 
1648 int
1649 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1650 {
1651 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1652 	prcommon_t *pcp = pnp->pr_common;
1653 	proc_t *p = pcp->prc_proc;
1654 	k_siginfo_t info;
1655 
1656 	if (sig <= 0 || sig >= nsig)
1657 		return (EINVAL);
1658 
1659 	bzero(&info, sizeof (info));
1660 	info.si_signo = sig;
1661 	info.si_code = SI_USER;
1662 	info.si_pid = curproc->p_pid;
1663 	info.si_ctid = PRCTID(curproc);
1664 	info.si_zoneid = getzoneid();
1665 	info.si_uid = crgetruid(cr);
1666 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1667 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1668 
1669 	return (0);
1670 }
1671 
1672 int
1673 pr_unkill(prnode_t *pnp, int sig)
1674 {
1675 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1676 	prcommon_t *pcp = pnp->pr_common;
1677 	proc_t *p = pcp->prc_proc;
1678 	sigqueue_t *infop = NULL;
1679 
1680 	if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1681 		return (EINVAL);
1682 
1683 	if (pcp->prc_flags & PRC_LWP)
1684 		sigdeq(p, pcp->prc_thread, sig, &infop);
1685 	else
1686 		sigdeq(p, NULL, sig, &infop);
1687 
1688 	if (infop)
1689 		siginfofree(infop);
1690 
1691 	return (0);
1692 }
1693 
1694 int
1695 pr_nice(proc_t *p, int nice, cred_t *cr)
1696 {
1697 	kthread_t *t;
1698 	int err;
1699 	int error = 0;
1700 
1701 	t = p->p_tlist;
1702 	do {
1703 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1704 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1705 		schedctl_set_cidpri(t);
1706 		if (error == 0)
1707 			error = err;
1708 	} while ((t = t->t_forw) != p->p_tlist);
1709 
1710 	return (error);
1711 }
1712 
1713 void
1714 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1715 {
1716 	user_t *up = PTOU(p);
1717 
1718 	if (entry) {
1719 		prassignset(&up->u_entrymask, sysset);
1720 	} else {
1721 		prassignset(&up->u_exitmask, sysset);
1722 	}
1723 	if (!prisempty(&up->u_entrymask) ||
1724 	    !prisempty(&up->u_exitmask)) {
1725 		up->u_systrap = 1;
1726 		p->p_proc_flag |= P_PR_TRACE;
1727 		set_proc_sys(p);	/* set pre and post-sys flags */
1728 	} else {
1729 		up->u_systrap = 0;
1730 		if (sigisempty(&p->p_sigmask) &&
1731 		    prisempty(&p->p_fltmask))
1732 			p->p_proc_flag &= ~P_PR_TRACE;
1733 	}
1734 }
1735 
1736 #define	ALLFLAGS	\
1737 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1738 
1739 int
1740 pr_set(proc_t *p, long flags)
1741 {
1742 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1743 		return (EBUSY);
1744 
1745 	if (flags & ~ALLFLAGS)
1746 		return (EINVAL);
1747 
1748 	if (flags & PR_FORK)
1749 		p->p_proc_flag |= P_PR_FORK;
1750 	if (flags & PR_RLC)
1751 		p->p_proc_flag |= P_PR_RUNLCL;
1752 	if (flags & PR_KLC)
1753 		p->p_proc_flag |= P_PR_KILLCL;
1754 	if (flags & PR_ASYNC)
1755 		p->p_proc_flag |= P_PR_ASYNC;
1756 	if (flags & PR_BPTADJ)
1757 		p->p_proc_flag |= P_PR_BPTADJ;
1758 	if (flags & PR_MSACCT)
1759 		if ((p->p_flag & SMSACCT) == 0)
1760 			estimate_msacct(p->p_tlist, gethrtime());
1761 	if (flags & PR_MSFORK)
1762 		p->p_flag |= SMSFORK;
1763 	if (flags & PR_PTRACE) {
1764 		p->p_proc_flag |= P_PR_PTRACE;
1765 		/* ptraced process must die if parent dead */
1766 		if (p->p_ppid == 1)
1767 			sigtoproc(p, NULL, SIGKILL);
1768 	}
1769 
1770 	return (0);
1771 }
1772 
1773 int
1774 pr_unset(proc_t *p, long flags)
1775 {
1776 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1777 		return (EBUSY);
1778 
1779 	if (flags & ~ALLFLAGS)
1780 		return (EINVAL);
1781 
1782 	if (flags & PR_FORK)
1783 		p->p_proc_flag &= ~P_PR_FORK;
1784 	if (flags & PR_RLC)
1785 		p->p_proc_flag &= ~P_PR_RUNLCL;
1786 	if (flags & PR_KLC)
1787 		p->p_proc_flag &= ~P_PR_KILLCL;
1788 	if (flags & PR_ASYNC)
1789 		p->p_proc_flag &= ~P_PR_ASYNC;
1790 	if (flags & PR_BPTADJ)
1791 		p->p_proc_flag &= ~P_PR_BPTADJ;
1792 	if (flags & PR_MSACCT)
1793 		disable_msacct(p);
1794 	if (flags & PR_MSFORK)
1795 		p->p_flag &= ~SMSFORK;
1796 	if (flags & PR_PTRACE)
1797 		p->p_proc_flag &= ~P_PR_PTRACE;
1798 
1799 	return (0);
1800 }
1801 
1802 static int
1803 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1804 {
1805 	proc_t *p = pnp->pr_common->prc_proc;
1806 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1807 
1808 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1809 		thread_unlock(t);
1810 		return (EBUSY);
1811 	}
1812 	if (!prhasfp()) {
1813 		thread_unlock(t);
1814 		return (EINVAL);	/* No FP support */
1815 	}
1816 
1817 	/* drop p_lock while touching the lwp's stack */
1818 	thread_unlock(t);
1819 	mutex_exit(&p->p_lock);
1820 	prsetprfpregs(ttolwp(t), prfpregset);
1821 	mutex_enter(&p->p_lock);
1822 
1823 	return (0);
1824 }
1825 
1826 #ifdef	_SYSCALL32_IMPL
1827 static int
1828 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1829 {
1830 	proc_t *p = pnp->pr_common->prc_proc;
1831 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1832 
1833 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1834 		thread_unlock(t);
1835 		return (EBUSY);
1836 	}
1837 	if (!prhasfp()) {
1838 		thread_unlock(t);
1839 		return (EINVAL);	/* No FP support */
1840 	}
1841 
1842 	/* drop p_lock while touching the lwp's stack */
1843 	thread_unlock(t);
1844 	mutex_exit(&p->p_lock);
1845 	prsetprfpregs32(ttolwp(t), prfpregset);
1846 	mutex_enter(&p->p_lock);
1847 
1848 	return (0);
1849 }
1850 #endif	/* _SYSCALL32_IMPL */
1851 
1852 /* ARGSUSED */
1853 static int
1854 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1855 {
1856 	int error;
1857 	proc_t *p = pnp->pr_common->prc_proc;
1858 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1859 
1860 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1861 		thread_unlock(t);
1862 		return (EBUSY);
1863 	}
1864 	thread_unlock(t);
1865 
1866 	if (!prhasx(p))
1867 		return (EINVAL);	/* No extra register support */
1868 
1869 	/* drop p_lock while touching the lwp's stack */
1870 	mutex_exit(&p->p_lock);
1871 	error = prsetprxregs(ttolwp(t), prxregset);
1872 	mutex_enter(&p->p_lock);
1873 
1874 	return (error);
1875 }
1876 
1877 static int
1878 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1879 {
1880 	proc_t *p = pnp->pr_common->prc_proc;
1881 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1882 
1883 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1884 		thread_unlock(t);
1885 		return (EBUSY);
1886 	}
1887 
1888 	/* drop p_lock while touching the lwp's stack */
1889 	thread_unlock(t);
1890 	mutex_exit(&p->p_lock);
1891 	prsvaddr(ttolwp(t), vaddr);
1892 	mutex_enter(&p->p_lock);
1893 
1894 	return (0);
1895 }
1896 
1897 void
1898 pr_sethold(prnode_t *pnp, sigset_t *sp)
1899 {
1900 	proc_t *p = pnp->pr_common->prc_proc;
1901 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1902 
1903 	schedctl_finish_sigblock(t);
1904 	sigutok(sp, &t->t_hold);
1905 	if (ISWAKEABLE(t) &&
1906 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1907 		setrun_locked(t);
1908 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1909 	thread_unlock(t);
1910 }
1911 
1912 void
1913 pr_setfault(proc_t *p, fltset_t *fltp)
1914 {
1915 	prassignset(&p->p_fltmask, fltp);
1916 	if (!prisempty(&p->p_fltmask))
1917 		p->p_proc_flag |= P_PR_TRACE;
1918 	else if (sigisempty(&p->p_sigmask)) {
1919 		user_t *up = PTOU(p);
1920 		if (up->u_systrap == 0)
1921 			p->p_proc_flag &= ~P_PR_TRACE;
1922 	}
1923 }
1924 
1925 static int
1926 pr_clearsig(prnode_t *pnp)
1927 {
1928 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1929 	klwp_t *lwp = ttolwp(t);
1930 
1931 	thread_unlock(t);
1932 	if (lwp->lwp_cursig == SIGKILL)
1933 		return (EBUSY);
1934 
1935 	/*
1936 	 * Discard current siginfo_t, if any.
1937 	 */
1938 	lwp->lwp_cursig = 0;
1939 	lwp->lwp_extsig = 0;
1940 	if (lwp->lwp_curinfo) {
1941 		siginfofree(lwp->lwp_curinfo);
1942 		lwp->lwp_curinfo = NULL;
1943 	}
1944 
1945 	return (0);
1946 }
1947 
1948 static int
1949 pr_clearflt(prnode_t *pnp)
1950 {
1951 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1952 
1953 	thread_unlock(t);
1954 	ttolwp(t)->lwp_curflt = 0;
1955 
1956 	return (0);
1957 }
1958 
1959 static int
1960 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1961 {
1962 	proc_t *p = pnp->pr_common->prc_proc;
1963 	struct as *as = p->p_as;
1964 	uintptr_t vaddr = pwp->pr_vaddr;
1965 	size_t size = pwp->pr_size;
1966 	int wflags = pwp->pr_wflags;
1967 	ulong_t newpage = 0;
1968 	struct watched_area *pwa;
1969 	int error;
1970 
1971 	*unlocked = 0;
1972 
1973 	/*
1974 	 * Can't apply to a system process.
1975 	 */
1976 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1977 		return (EBUSY);
1978 
1979 	/*
1980 	 * Verify that the address range does not wrap
1981 	 * and that only the proper flags were specified.
1982 	 */
1983 	if ((wflags & ~WA_TRAPAFTER) == 0)
1984 		size = 0;
1985 	if (vaddr + size < vaddr ||
1986 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1987 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1988 		return (EINVAL);
1989 
1990 	/*
1991 	 * Don't let the address range go above as->a_userlimit.
1992 	 * There is no error here, just a limitation.
1993 	 */
1994 	if (vaddr >= (uintptr_t)as->a_userlimit)
1995 		return (0);
1996 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1997 		size = (uintptr_t)as->a_userlimit - vaddr;
1998 
1999 	/*
2000 	 * Compute maximum number of pages this will add.
2001 	 */
2002 	if ((wflags & ~WA_TRAPAFTER) != 0) {
2003 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
2004 		newpage = btopr(pagespan);
2005 		if (newpage > 2 * prnwatch)
2006 			return (E2BIG);
2007 	}
2008 
2009 	/*
2010 	 * Force the process to be fully stopped.
2011 	 */
2012 	if (p == curproc) {
2013 		prunlock(pnp);
2014 		while (holdwatch() != 0)
2015 			continue;
2016 		if ((error = prlock(pnp, ZNO)) != 0) {
2017 			continuelwps(p);
2018 			*unlocked = 1;
2019 			return (error);
2020 		}
2021 	} else {
2022 		pauselwps(p);
2023 		while (pr_allstopped(p, 0) > 0) {
2024 			/*
2025 			 * This cv/mutex pair is persistent even
2026 			 * if the process disappears after we
2027 			 * unmark it and drop p->p_lock.
2028 			 */
2029 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2030 			kmutex_t *mp = &p->p_lock;
2031 
2032 			prunmark(p);
2033 			(void) cv_wait(cv, mp);
2034 			mutex_exit(mp);
2035 			if ((error = prlock(pnp, ZNO)) != 0) {
2036 				/*
2037 				 * Unpause the process if it exists.
2038 				 */
2039 				p = pr_p_lock(pnp);
2040 				mutex_exit(&pr_pidlock);
2041 				if (p != NULL) {
2042 					unpauselwps(p);
2043 					prunlock(pnp);
2044 				}
2045 				*unlocked = 1;
2046 				return (error);
2047 			}
2048 		}
2049 	}
2050 
2051 	/*
2052 	 * Drop p->p_lock in order to perform the rest of this.
2053 	 * The process is still locked with the P_PR_LOCK flag.
2054 	 */
2055 	mutex_exit(&p->p_lock);
2056 
2057 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
2058 	pwa->wa_vaddr = (caddr_t)vaddr;
2059 	pwa->wa_eaddr = (caddr_t)vaddr + size;
2060 	pwa->wa_flags = (ulong_t)wflags;
2061 
2062 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
2063 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
2064 
2065 	if (p == curproc) {
2066 		setallwatch();
2067 		mutex_enter(&p->p_lock);
2068 		continuelwps(p);
2069 	} else {
2070 		mutex_enter(&p->p_lock);
2071 		unpauselwps(p);
2072 	}
2073 
2074 	return (error);
2075 }
2076 
2077 /* jobcontrol stopped, but with a /proc directed stop in effect */
2078 #define	JDSTOPPED(t)	\
2079 	((t)->t_state == TS_STOPPED && \
2080 	(t)->t_whystop == PR_JOBCONTROL && \
2081 	((t)->t_proc_flag & TP_PRSTOP))
2082 
2083 /*
2084  * pr_agent() creates the agent lwp. If the process is exiting while
2085  * we are creating an agent lwp, then exitlwps() waits until the
2086  * agent has been created using prbarrier().
2087  */
2088 static int
2089 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
2090 {
2091 	proc_t *p = pnp->pr_common->prc_proc;
2092 	prcommon_t *pcp;
2093 	kthread_t *t;
2094 	kthread_t *ct;
2095 	klwp_t *clwp;
2096 	k_sigset_t smask;
2097 	int cid;
2098 	void *bufp = NULL;
2099 	int error;
2100 
2101 	*unlocked = 0;
2102 
2103 	/*
2104 	 * Cannot create the /proc agent lwp if :-
2105 	 * - the process is not fully stopped or directed to stop.
2106 	 * - there is an agent lwp already.
2107 	 * - the process has been killed.
2108 	 * - the process is exiting.
2109 	 * - it's a vfork(2) parent.
2110 	 */
2111 	t = prchoose(p);	/* returns locked thread */
2112 	ASSERT(t != NULL);
2113 
2114 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
2115 	    p->p_agenttp != NULL ||
2116 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
2117 		thread_unlock(t);
2118 		return (EBUSY);
2119 	}
2120 
2121 	thread_unlock(t);
2122 	mutex_exit(&p->p_lock);
2123 
2124 	sigfillset(&smask);
2125 	sigdiffset(&smask, &cantmask);
2126 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2127 	    t->t_pri, &smask, NOCLASS, 0);
2128 	if (clwp == NULL) {
2129 		mutex_enter(&p->p_lock);
2130 		return (ENOMEM);
2131 	}
2132 	prsetprregs(clwp, prgregset, 1);
2133 
2134 	/*
2135 	 * Because abandoning the agent inside the target process leads to
2136 	 * a state that is essentially undebuggable, we record the psinfo of
2137 	 * the process creating the agent and hang that off of the lwp.
2138 	 */
2139 	clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2140 	mutex_enter(&curproc->p_lock);
2141 	prgetpsinfo(curproc, clwp->lwp_spymaster);
2142 	mutex_exit(&curproc->p_lock);
2143 
2144 	/*
2145 	 * We overload pr_time in the spymaster to denote the time at which the
2146 	 * agent was created.
2147 	 */
2148 	gethrestime(&clwp->lwp_spymaster->pr_time);
2149 
2150 retry:
2151 	cid = t->t_cid;
2152 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2153 	mutex_enter(&p->p_lock);
2154 	if (cid != t->t_cid) {
2155 		/*
2156 		 * Someone just changed this thread's scheduling class,
2157 		 * so try pre-allocating the buffer again.  Hopefully we
2158 		 * don't hit this often.
2159 		 */
2160 		mutex_exit(&p->p_lock);
2161 		CL_FREE(cid, bufp);
2162 		goto retry;
2163 	}
2164 
2165 	clwp->lwp_ap = clwp->lwp_arg;
2166 	clwp->lwp_eosys = NORMALRETURN;
2167 	ct = lwptot(clwp);
2168 	ct->t_clfuncs = t->t_clfuncs;
2169 	CL_FORK(t, ct, bufp);
2170 	ct->t_cid = t->t_cid;
2171 	ct->t_proc_flag |= TP_PRSTOP;
2172 	/*
2173 	 * Setting t_sysnum to zero causes post_syscall()
2174 	 * to bypass all syscall checks and go directly to
2175 	 *	if (issig()) psig();
2176 	 * so that the agent lwp will stop in issig_forreal()
2177 	 * showing PR_REQUESTED.
2178 	 */
2179 	ct->t_sysnum = 0;
2180 	ct->t_post_sys = 1;
2181 	ct->t_sig_check = 1;
2182 	p->p_agenttp = ct;
2183 	ct->t_proc_flag &= ~TP_HOLDLWP;
2184 
2185 	pcp = pnp->pr_pcommon;
2186 	mutex_enter(&pcp->prc_mutex);
2187 
2188 	lwp_create_done(ct);
2189 
2190 	/*
2191 	 * Don't return until the agent is stopped on PR_REQUESTED.
2192 	 */
2193 
2194 	for (;;) {
2195 		prunlock(pnp);
2196 		*unlocked = 1;
2197 
2198 		/*
2199 		 * Wait for the agent to stop and notify us.
2200 		 * If we've been interrupted, return that information.
2201 		 */
2202 		error = pr_wait(pcp, NULL, 0);
2203 		if (error == EINTR) {
2204 			error = 0;
2205 			break;
2206 		}
2207 
2208 		/*
2209 		 * Confirm that the agent LWP has stopped.
2210 		 */
2211 
2212 		if ((error = prlock(pnp, ZNO)) != 0)
2213 			break;
2214 		*unlocked = 0;
2215 
2216 		/*
2217 		 * Since we dropped the lock on the process, the agent
2218 		 * may have disappeared or changed. Grab the current
2219 		 * agent and check fail if it has disappeared.
2220 		 */
2221 		if ((ct = p->p_agenttp) == NULL) {
2222 			error = ENOENT;
2223 			break;
2224 		}
2225 
2226 		mutex_enter(&pcp->prc_mutex);
2227 		thread_lock(ct);
2228 
2229 		if (ISTOPPED(ct)) {
2230 			thread_unlock(ct);
2231 			mutex_exit(&pcp->prc_mutex);
2232 			break;
2233 		}
2234 
2235 		thread_unlock(ct);
2236 	}
2237 
2238 	return (error ? error : -1);
2239 }
2240 
2241 static int
2242 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2243 {
2244 	caddr_t base = (caddr_t)pio->pio_base;
2245 	size_t cnt = pio->pio_len;
2246 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2247 	struct uio auio;
2248 	struct iovec aiov;
2249 	int error = 0;
2250 
2251 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2252 		error = EIO;
2253 	else if ((base + cnt) < base || (offset + cnt) < offset)
2254 		error = EINVAL;
2255 	else if (cnt != 0) {
2256 		aiov.iov_base = base;
2257 		aiov.iov_len = cnt;
2258 
2259 		auio.uio_loffset = offset;
2260 		auio.uio_iov = &aiov;
2261 		auio.uio_iovcnt = 1;
2262 		auio.uio_resid = cnt;
2263 		auio.uio_segflg = UIO_USERSPACE;
2264 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2265 		auio.uio_fmode = FREAD|FWRITE;
2266 		auio.uio_extflg = UIO_COPY_DEFAULT;
2267 
2268 		mutex_exit(&p->p_lock);
2269 		error = prusrio(p, rw, &auio, 0);
2270 		mutex_enter(&p->p_lock);
2271 
2272 		/*
2273 		 * We have no way to return the i/o count,
2274 		 * like read() or write() would do, so we
2275 		 * return an error if the i/o was truncated.
2276 		 */
2277 		if (auio.uio_resid != 0 && error == 0)
2278 			error = EIO;
2279 	}
2280 
2281 	return (error);
2282 }
2283 
2284 static int
2285 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2286 {
2287 	kthread_t *t;
2288 	cred_t *oldcred;
2289 	cred_t *newcred;
2290 	uid_t oldruid;
2291 	int error;
2292 	zone_t *zone = crgetzone(cr);
2293 
2294 	if (!VALID_UID(prcred->pr_euid, zone) ||
2295 	    !VALID_UID(prcred->pr_ruid, zone) ||
2296 	    !VALID_UID(prcred->pr_suid, zone) ||
2297 	    !VALID_GID(prcred->pr_egid, zone) ||
2298 	    !VALID_GID(prcred->pr_rgid, zone) ||
2299 	    !VALID_GID(prcred->pr_sgid, zone))
2300 		return (EINVAL);
2301 
2302 	if (dogrps) {
2303 		int ngrp = prcred->pr_ngroups;
2304 		int i;
2305 
2306 		if (ngrp < 0 || ngrp > ngroups_max)
2307 			return (EINVAL);
2308 
2309 		for (i = 0; i < ngrp; i++) {
2310 			if (!VALID_GID(prcred->pr_groups[i], zone))
2311 				return (EINVAL);
2312 		}
2313 	}
2314 
2315 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2316 
2317 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2318 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2319 
2320 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2321 	    prcred->pr_suid != prcred->pr_ruid)
2322 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2323 
2324 	if (error)
2325 		return (error);
2326 
2327 	mutex_exit(&p->p_lock);
2328 
2329 	/* hold old cred so it doesn't disappear while we dup it */
2330 	mutex_enter(&p->p_crlock);
2331 	crhold(oldcred = p->p_cred);
2332 	mutex_exit(&p->p_crlock);
2333 	newcred = crdup(oldcred);
2334 	oldruid = crgetruid(oldcred);
2335 	crfree(oldcred);
2336 
2337 	/* Error checking done above */
2338 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2339 	    prcred->pr_suid);
2340 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2341 	    prcred->pr_sgid);
2342 
2343 	if (dogrps) {
2344 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2345 		    prcred->pr_groups);
2346 
2347 	}
2348 
2349 	mutex_enter(&p->p_crlock);
2350 	oldcred = p->p_cred;
2351 	p->p_cred = newcred;
2352 	mutex_exit(&p->p_crlock);
2353 	crfree(oldcred);
2354 
2355 	/*
2356 	 * Keep count of processes per uid consistent.
2357 	 */
2358 	if (oldruid != prcred->pr_ruid) {
2359 		zoneid_t zoneid = crgetzoneid(newcred);
2360 
2361 		mutex_enter(&pidlock);
2362 		upcount_dec(oldruid, zoneid);
2363 		upcount_inc(prcred->pr_ruid, zoneid);
2364 		mutex_exit(&pidlock);
2365 	}
2366 
2367 	/*
2368 	 * Broadcast the cred change to the threads.
2369 	 */
2370 	mutex_enter(&p->p_lock);
2371 	t = p->p_tlist;
2372 	do {
2373 		t->t_pre_sys = 1; /* so syscall will get new cred */
2374 	} while ((t = t->t_forw) != p->p_tlist);
2375 
2376 	return (0);
2377 }
2378 
2379 /*
2380  * Change process credentials to specified zone.  Used to temporarily
2381  * set a process to run in the global zone; only transitions between
2382  * the process's actual zone and the global zone are allowed.
2383  */
2384 static int
2385 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2386 {
2387 	kthread_t *t;
2388 	cred_t *oldcred;
2389 	cred_t *newcred;
2390 	zone_t *zptr;
2391 	zoneid_t oldzoneid;
2392 
2393 	if (secpolicy_zone_config(cr) != 0)
2394 		return (EPERM);
2395 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2396 		return (EINVAL);
2397 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2398 		return (EINVAL);
2399 	mutex_exit(&p->p_lock);
2400 	mutex_enter(&p->p_crlock);
2401 	oldcred = p->p_cred;
2402 	crhold(oldcred);
2403 	mutex_exit(&p->p_crlock);
2404 	newcred = crdup(oldcred);
2405 	oldzoneid = crgetzoneid(oldcred);
2406 	crfree(oldcred);
2407 
2408 	crsetzone(newcred, zptr);
2409 	zone_rele(zptr);
2410 
2411 	mutex_enter(&p->p_crlock);
2412 	oldcred = p->p_cred;
2413 	p->p_cred = newcred;
2414 	mutex_exit(&p->p_crlock);
2415 	crfree(oldcred);
2416 
2417 	/*
2418 	 * The target process is changing zones (according to its cred), so
2419 	 * update the per-zone upcounts, which are based on process creds.
2420 	 */
2421 	if (oldzoneid != zoneid) {
2422 		uid_t ruid = crgetruid(newcred);
2423 
2424 		mutex_enter(&pidlock);
2425 		upcount_dec(ruid, oldzoneid);
2426 		upcount_inc(ruid, zoneid);
2427 		mutex_exit(&pidlock);
2428 	}
2429 	/*
2430 	 * Broadcast the cred change to the threads.
2431 	 */
2432 	mutex_enter(&p->p_lock);
2433 	t = p->p_tlist;
2434 	do {
2435 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2436 	} while ((t = t->t_forw) != p->p_tlist);
2437 
2438 	return (0);
2439 }
2440 
2441 static int
2442 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2443 {
2444 	kthread_t *t;
2445 	int err;
2446 
2447 	ASSERT(MUTEX_HELD(&p->p_lock));
2448 
2449 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2450 		/*
2451 		 * Broadcast the cred change to the threads.
2452 		 */
2453 		t = p->p_tlist;
2454 		do {
2455 			t->t_pre_sys = 1; /* so syscall will get new cred */
2456 		} while ((t = t->t_forw) != p->p_tlist);
2457 	}
2458 
2459 	return (err);
2460 }
2461 
2462 /*
2463  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2464  * terminate or perform an exec(2).
2465  *
2466  * Returns 0 if the process is fully stopped except for the current thread (if
2467  * we are operating on our own process), 1 otherwise.
2468  *
2469  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2470  * See holdwatch() for details.
2471  */
2472 int
2473 pr_allstopped(proc_t *p, int watchstop)
2474 {
2475 	kthread_t *t;
2476 	int rv = 0;
2477 
2478 	ASSERT(MUTEX_HELD(&p->p_lock));
2479 
2480 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2481 		return (-1);
2482 
2483 	if ((t = p->p_tlist) != NULL) {
2484 		do {
2485 			if (t == curthread || VSTOPPED(t) ||
2486 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2487 				continue;
2488 			thread_lock(t);
2489 			switch (t->t_state) {
2490 			case TS_ZOMB:
2491 			case TS_STOPPED:
2492 				break;
2493 			case TS_SLEEP:
2494 				if (!(t->t_flag & T_WAKEABLE) ||
2495 				    t->t_wchan0 == NULL)
2496 					rv = 1;
2497 				break;
2498 			default:
2499 				rv = 1;
2500 				break;
2501 			}
2502 			thread_unlock(t);
2503 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2504 	}
2505 
2506 	return (rv);
2507 }
2508 
2509 /*
2510  * Cause all lwps in the process to pause (for watchpoint operations).
2511  */
2512 static void
2513 pauselwps(proc_t *p)
2514 {
2515 	kthread_t *t;
2516 
2517 	ASSERT(MUTEX_HELD(&p->p_lock));
2518 	ASSERT(p != curproc);
2519 
2520 	if ((t = p->p_tlist) != NULL) {
2521 		do {
2522 			thread_lock(t);
2523 			t->t_proc_flag |= TP_PAUSE;
2524 			aston(t);
2525 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2526 			    ISWAITING(t)) {
2527 				setrun_locked(t);
2528 			}
2529 			prpokethread(t);
2530 			thread_unlock(t);
2531 		} while ((t = t->t_forw) != p->p_tlist);
2532 	}
2533 }
2534 
2535 /*
2536  * undo the effects of pauselwps()
2537  */
2538 static void
2539 unpauselwps(proc_t *p)
2540 {
2541 	kthread_t *t;
2542 
2543 	ASSERT(MUTEX_HELD(&p->p_lock));
2544 	ASSERT(p != curproc);
2545 
2546 	if ((t = p->p_tlist) != NULL) {
2547 		do {
2548 			thread_lock(t);
2549 			t->t_proc_flag &= ~TP_PAUSE;
2550 			if (t->t_state == TS_STOPPED) {
2551 				t->t_schedflag |= TS_UNPAUSE;
2552 				t->t_dtrace_stop = 0;
2553 				setrun_locked(t);
2554 			}
2555 			thread_unlock(t);
2556 		} while ((t = t->t_forw) != p->p_tlist);
2557 	}
2558 }
2559 
2560 /*
2561  * Cancel all watched areas.  Called from prclose().
2562  */
2563 proc_t *
2564 pr_cancel_watch(prnode_t *pnp)
2565 {
2566 	proc_t *p = pnp->pr_pcommon->prc_proc;
2567 	struct as *as;
2568 	kthread_t *t;
2569 
2570 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2571 
2572 	if (!pr_watch_active(p))
2573 		return (p);
2574 
2575 	/*
2576 	 * Pause the process before dealing with the watchpoints.
2577 	 */
2578 	if (p == curproc) {
2579 		prunlock(pnp);
2580 		while (holdwatch() != 0)
2581 			continue;
2582 		p = pr_p_lock(pnp);
2583 		mutex_exit(&pr_pidlock);
2584 		ASSERT(p == curproc);
2585 	} else {
2586 		pauselwps(p);
2587 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2588 			/*
2589 			 * This cv/mutex pair is persistent even
2590 			 * if the process disappears after we
2591 			 * unmark it and drop p->p_lock.
2592 			 */
2593 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2594 			kmutex_t *mp = &p->p_lock;
2595 
2596 			prunmark(p);
2597 			(void) cv_wait(cv, mp);
2598 			mutex_exit(mp);
2599 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2600 			mutex_exit(&pr_pidlock);
2601 		}
2602 	}
2603 
2604 	if (p == NULL)		/* the process disappeared */
2605 		return (NULL);
2606 
2607 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2608 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2609 
2610 	if (pr_watch_active(p)) {
2611 		pr_free_watchpoints(p);
2612 		if ((t = p->p_tlist) != NULL) {
2613 			do {
2614 				watch_disable(t);
2615 
2616 			} while ((t = t->t_forw) != p->p_tlist);
2617 		}
2618 	}
2619 
2620 	if ((as = p->p_as) != NULL) {
2621 		avl_tree_t *tree;
2622 		struct watched_page *pwp;
2623 
2624 		/*
2625 		 * If this is the parent of a vfork, the watched page
2626 		 * list has been moved temporarily to p->p_wpage.
2627 		 */
2628 		if (avl_numnodes(&p->p_wpage) != 0)
2629 			tree = &p->p_wpage;
2630 		else
2631 			tree = &as->a_wpage;
2632 
2633 		mutex_exit(&p->p_lock);
2634 		AS_LOCK_ENTER(as, RW_WRITER);
2635 
2636 		for (pwp = avl_first(tree); pwp != NULL;
2637 		    pwp = AVL_NEXT(tree, pwp)) {
2638 			pwp->wp_read = 0;
2639 			pwp->wp_write = 0;
2640 			pwp->wp_exec = 0;
2641 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2642 				pwp->wp_flags |= WP_SETPROT;
2643 				pwp->wp_prot = pwp->wp_oprot;
2644 				pwp->wp_list = p->p_wprot;
2645 				p->p_wprot = pwp;
2646 			}
2647 		}
2648 
2649 		AS_LOCK_EXIT(as);
2650 		mutex_enter(&p->p_lock);
2651 	}
2652 
2653 	/*
2654 	 * Unpause the process now.
2655 	 */
2656 	if (p == curproc)
2657 		continuelwps(p);
2658 	else
2659 		unpauselwps(p);
2660 
2661 	return (p);
2662 }
2663