xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/priv.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/mman.h>
41 #include <sys/proc.h>
42 #include <sys/brand.h>
43 #include <sys/sobject.h>
44 #include <sys/sysmacros.h>
45 #include <sys/systm.h>
46 #include <sys/uio.h>
47 #include <sys/var.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/session.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/disp.h>
55 #include <sys/class.h>
56 #include <sys/ts.h>
57 #include <sys/bitmap.h>
58 #include <sys/poll.h>
59 #include <sys/shm_impl.h>
60 #include <sys/fault.h>
61 #include <sys/syscall.h>
62 #include <sys/procfs.h>
63 #include <sys/processor.h>
64 #include <sys/cpuvar.h>
65 #include <sys/copyops.h>
66 #include <sys/time.h>
67 #include <sys/msacct.h>
68 #include <vm/as.h>
69 #include <vm/rm.h>
70 #include <vm/seg.h>
71 #include <vm/seg_vn.h>
72 #include <vm/seg_dev.h>
73 #include <vm/seg_spt.h>
74 #include <vm/page.h>
75 #include <sys/vmparam.h>
76 #include <sys/swap.h>
77 #include <fs/proc/prdata.h>
78 #include <sys/task.h>
79 #include <sys/project.h>
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88 
89 #define	MAX_ITERS_SPIN	5
90 
91 typedef struct prpagev {
92 	uint_t *pg_protv;	/* vector of page permissions */
93 	char *pg_incore;	/* vector of incore flags */
94 	size_t pg_npages;	/* number of pages in protv and incore */
95 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
96 } prpagev_t;
97 
98 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
99 
100 extern struct seg_ops segdev_ops;	/* needs a header file */
101 extern struct seg_ops segspt_shmops;	/* needs a header file */
102 
103 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105 
106 /*
107  * Choose an lwp from the complete set of lwps for the process.
108  * This is called for any operation applied to the process
109  * file descriptor that requires an lwp to operate upon.
110  *
111  * Returns a pointer to the thread for the selected LWP,
112  * and with the dispatcher lock held for the thread.
113  *
114  * The algorithm for choosing an lwp is critical for /proc semantics;
115  * don't touch this code unless you know all of the implications.
116  */
117 kthread_t *
118 prchoose(proc_t *p)
119 {
120 	kthread_t *t;
121 	kthread_t *t_onproc = NULL;	/* running on processor */
122 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
123 	kthread_t *t_sleep = NULL;	/* sleeping */
124 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
125 	kthread_t *t_susp = NULL;	/* suspended stop */
126 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
127 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
128 	kthread_t *t_req = NULL;	/* requested stop */
129 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
130 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
131 
132 	ASSERT(MUTEX_HELD(&p->p_lock));
133 
134 	/*
135 	 * If the agent lwp exists, it takes precedence over all others.
136 	 */
137 	if ((t = p->p_agenttp) != NULL) {
138 		thread_lock(t);
139 		return (t);
140 	}
141 
142 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
143 		return (t);
144 	do {		/* for eacn lwp in the process */
145 		if (VSTOPPED(t)) {	/* virtually stopped */
146 			if (t_req == NULL)
147 				t_req = t;
148 			continue;
149 		}
150 
151 		thread_lock(t);		/* make sure thread is in good state */
152 		switch (t->t_state) {
153 		default:
154 			panic("prchoose: bad thread state %d, thread 0x%p",
155 			    t->t_state, (void *)t);
156 			/*NOTREACHED*/
157 		case TS_SLEEP:
158 			/* this is filthy */
159 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 			    t->t_wchan0 == NULL) {
161 				if (t_hold == NULL)
162 					t_hold = t;
163 			} else {
164 				if (t_sleep == NULL)
165 					t_sleep = t;
166 			}
167 			break;
168 		case TS_RUN:
169 		case TS_WAIT:
170 			if (t_run == NULL)
171 				t_run = t;
172 			break;
173 		case TS_ONPROC:
174 			if (t_onproc == NULL)
175 				t_onproc = t;
176 			break;
177 		case TS_ZOMB:		/* last possible choice */
178 			break;
179 		case TS_STOPPED:
180 			switch (t->t_whystop) {
181 			case PR_SUSPENDED:
182 				if (t_susp == NULL)
183 					t_susp = t;
184 				break;
185 			case PR_JOBCONTROL:
186 				if (t->t_proc_flag & TP_PRSTOP) {
187 					if (t_jdstop == NULL)
188 						t_jdstop = t;
189 				} else {
190 					if (t_jstop == NULL)
191 						t_jstop = t;
192 				}
193 				break;
194 			case PR_REQUESTED:
195 				if (t->t_dtrace_stop && t_dtrace == NULL)
196 					t_dtrace = t;
197 				else if (t_req == NULL)
198 					t_req = t;
199 				break;
200 			case PR_SYSENTRY:
201 			case PR_SYSEXIT:
202 			case PR_SIGNALLED:
203 			case PR_FAULTED:
204 				/*
205 				 * Make an lwp calling exit() be the
206 				 * last lwp seen in the process.
207 				 */
208 				if (t_istop == NULL ||
209 				    (t_istop->t_whystop == PR_SYSENTRY &&
210 				    t_istop->t_whatstop == SYS_exit))
211 					t_istop = t;
212 				break;
213 			case PR_CHECKPOINT:	/* can't happen? */
214 				break;
215 			default:
216 				panic("prchoose: bad t_whystop %d, thread 0x%p",
217 				    t->t_whystop, (void *)t);
218 				/*NOTREACHED*/
219 			}
220 			break;
221 		}
222 		thread_unlock(t);
223 	} while ((t = t->t_forw) != p->p_tlist);
224 
225 	if (t_onproc)
226 		t = t_onproc;
227 	else if (t_run)
228 		t = t_run;
229 	else if (t_sleep)
230 		t = t_sleep;
231 	else if (t_jstop)
232 		t = t_jstop;
233 	else if (t_jdstop)
234 		t = t_jdstop;
235 	else if (t_istop)
236 		t = t_istop;
237 	else if (t_dtrace)
238 		t = t_dtrace;
239 	else if (t_req)
240 		t = t_req;
241 	else if (t_hold)
242 		t = t_hold;
243 	else if (t_susp)
244 		t = t_susp;
245 	else			/* TS_ZOMB */
246 		t = p->p_tlist;
247 
248 	if (t != NULL)
249 		thread_lock(t);
250 	return (t);
251 }
252 
253 /*
254  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256  * on the /proc file descriptor.  Called from stop() when a traced
257  * process stops on an event of interest.  Also called from exit()
258  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259  */
260 void
261 prnotify(struct vnode *vp)
262 {
263 	prcommon_t *pcp = VTOP(vp)->pr_common;
264 
265 	mutex_enter(&pcp->prc_mutex);
266 	cv_broadcast(&pcp->prc_wait);
267 	mutex_exit(&pcp->prc_mutex);
268 	if (pcp->prc_flags & PRC_POLL) {
269 		/*
270 		 * We call pollwakeup() with POLLHUP to ensure that
271 		 * the pollers are awakened even if they are polling
272 		 * for nothing (i.e., waiting for the process to exit).
273 		 * This enables the use of the PRC_POLL flag for optimization
274 		 * (we can turn off PRC_POLL only if we know no pollers remain).
275 		 */
276 		pcp->prc_flags &= ~PRC_POLL;
277 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
278 	}
279 }
280 
281 /* called immediately below, in prfree() */
282 static void
283 prfreenotify(vnode_t *vp)
284 {
285 	prnode_t *pnp;
286 	prcommon_t *pcp;
287 
288 	while (vp != NULL) {
289 		pnp = VTOP(vp);
290 		pcp = pnp->pr_common;
291 		ASSERT(pcp->prc_thread == NULL);
292 		pcp->prc_proc = NULL;
293 		/*
294 		 * We can't call prnotify() here because we are holding
295 		 * pidlock.  We assert that there is no need to.
296 		 */
297 		mutex_enter(&pcp->prc_mutex);
298 		cv_broadcast(&pcp->prc_wait);
299 		mutex_exit(&pcp->prc_mutex);
300 		ASSERT(!(pcp->prc_flags & PRC_POLL));
301 
302 		vp = pnp->pr_next;
303 		pnp->pr_next = NULL;
304 	}
305 }
306 
307 /*
308  * Called from a hook in freeproc() when a traced process is removed
309  * from the process table.  The proc-table pointers of all associated
310  * /proc vnodes are cleared to indicate that the process has gone away.
311  */
312 void
313 prfree(proc_t *p)
314 {
315 	uint_t slot = p->p_slot;
316 
317 	ASSERT(MUTEX_HELD(&pidlock));
318 
319 	/*
320 	 * Block the process against /proc so it can be freed.
321 	 * It cannot be freed while locked by some controlling process.
322 	 * Lock ordering:
323 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324 	 */
325 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
326 	mutex_enter(&p->p_lock);
327 	while (p->p_proc_flag & P_PR_LOCK) {
328 		mutex_exit(&pr_pidlock);
329 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 		mutex_exit(&p->p_lock);
331 		mutex_enter(&pr_pidlock);
332 		mutex_enter(&p->p_lock);
333 	}
334 
335 	ASSERT(p->p_tlist == NULL);
336 
337 	prfreenotify(p->p_plist);
338 	p->p_plist = NULL;
339 
340 	prfreenotify(p->p_trace);
341 	p->p_trace = NULL;
342 
343 	/*
344 	 * We broadcast to wake up everyone waiting for this process.
345 	 * No one can reach this process from this point on.
346 	 */
347 	cv_broadcast(&pr_pid_cv[slot]);
348 
349 	mutex_exit(&p->p_lock);
350 	mutex_exit(&pr_pidlock);
351 }
352 
353 /*
354  * Called from a hook in exit() when a traced process is becoming a zombie.
355  */
356 void
357 prexit(proc_t *p)
358 {
359 	ASSERT(MUTEX_HELD(&p->p_lock));
360 
361 	if (pr_watch_active(p)) {
362 		pr_free_watchpoints(p);
363 		watch_disable(curthread);
364 	}
365 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 	if (p->p_trace) {
367 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 		prnotify(p->p_trace);
369 	}
370 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
371 }
372 
373 /*
374  * Called when a thread calls lwp_exit().
375  */
376 void
377 prlwpexit(kthread_t *t)
378 {
379 	vnode_t *vp;
380 	prnode_t *pnp;
381 	prcommon_t *pcp;
382 	proc_t *p = ttoproc(t);
383 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384 
385 	ASSERT(t == curthread);
386 	ASSERT(MUTEX_HELD(&p->p_lock));
387 
388 	/*
389 	 * The process must be blocked against /proc to do this safely.
390 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 	 * It is the caller's responsibility to have called prbarrier(p).
392 	 */
393 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394 
395 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 		pnp = VTOP(vp);
397 		pcp = pnp->pr_common;
398 		if (pcp->prc_thread == t) {
399 			pcp->prc_thread = NULL;
400 			pcp->prc_flags |= PRC_DESTROY;
401 		}
402 	}
403 
404 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 		pnp = VTOP(vp);
406 		pcp = pnp->pr_common;
407 		pcp->prc_thread = NULL;
408 		pcp->prc_flags |= PRC_DESTROY;
409 		prnotify(vp);
410 	}
411 
412 	if (p->p_trace)
413 		prnotify(p->p_trace);
414 }
415 
416 /*
417  * Called when a zombie thread is joined or when a
418  * detached lwp exits.  Called from lwp_hash_out().
419  */
420 void
421 prlwpfree(proc_t *p, lwpent_t *lep)
422 {
423 	vnode_t *vp;
424 	prnode_t *pnp;
425 	prcommon_t *pcp;
426 
427 	ASSERT(MUTEX_HELD(&p->p_lock));
428 
429 	/*
430 	 * The process must be blocked against /proc to do this safely.
431 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 	 * It is the caller's responsibility to have called prbarrier(p).
433 	 */
434 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435 
436 	vp = lep->le_trace;
437 	lep->le_trace = NULL;
438 	while (vp) {
439 		prnotify(vp);
440 		pnp = VTOP(vp);
441 		pcp = pnp->pr_common;
442 		ASSERT(pcp->prc_thread == NULL &&
443 		    (pcp->prc_flags & PRC_DESTROY));
444 		pcp->prc_tslot = -1;
445 		vp = pnp->pr_next;
446 		pnp->pr_next = NULL;
447 	}
448 
449 	if (p->p_trace)
450 		prnotify(p->p_trace);
451 }
452 
453 /*
454  * Called from a hook in exec() when a thread starts exec().
455  */
456 void
457 prexecstart(void)
458 {
459 	proc_t *p = ttoproc(curthread);
460 	klwp_t *lwp = ttolwp(curthread);
461 
462 	/*
463 	 * The P_PR_EXEC flag blocks /proc operations for
464 	 * the duration of the exec().
465 	 * We can't start exec() while the process is
466 	 * locked by /proc, so we call prbarrier().
467 	 * lwp_nostop keeps the process from being stopped
468 	 * via job control for the duration of the exec().
469 	 */
470 
471 	ASSERT(MUTEX_HELD(&p->p_lock));
472 	prbarrier(p);
473 	lwp->lwp_nostop++;
474 	p->p_proc_flag |= P_PR_EXEC;
475 }
476 
477 /*
478  * Called from a hook in exec() when a thread finishes exec().
479  * The thread may or may not have succeeded.  Some other thread
480  * may have beat it to the punch.
481  */
482 void
483 prexecend(void)
484 {
485 	proc_t *p = ttoproc(curthread);
486 	klwp_t *lwp = ttolwp(curthread);
487 	vnode_t *vp;
488 	prnode_t *pnp;
489 	prcommon_t *pcp;
490 	model_t model = p->p_model;
491 	id_t tid = curthread->t_tid;
492 	int tslot = curthread->t_dslot;
493 
494 	ASSERT(MUTEX_HELD(&p->p_lock));
495 
496 	lwp->lwp_nostop--;
497 	if (p->p_flag & SEXITLWPS) {
498 		/*
499 		 * We are on our way to exiting because some
500 		 * other thread beat us in the race to exec().
501 		 * Don't clear the P_PR_EXEC flag in this case.
502 		 */
503 		return;
504 	}
505 
506 	/*
507 	 * Wake up anyone waiting in /proc for the process to complete exec().
508 	 */
509 	p->p_proc_flag &= ~P_PR_EXEC;
510 	if ((vp = p->p_trace) != NULL) {
511 		pcp = VTOP(vp)->pr_common;
512 		mutex_enter(&pcp->prc_mutex);
513 		cv_broadcast(&pcp->prc_wait);
514 		mutex_exit(&pcp->prc_mutex);
515 		for (; vp != NULL; vp = pnp->pr_next) {
516 			pnp = VTOP(vp);
517 			pnp->pr_common->prc_datamodel = model;
518 		}
519 	}
520 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521 		/*
522 		 * We dealt with the process common above.
523 		 */
524 		ASSERT(p->p_trace != NULL);
525 		pcp = VTOP(vp)->pr_common;
526 		mutex_enter(&pcp->prc_mutex);
527 		cv_broadcast(&pcp->prc_wait);
528 		mutex_exit(&pcp->prc_mutex);
529 		for (; vp != NULL; vp = pnp->pr_next) {
530 			pnp = VTOP(vp);
531 			pcp = pnp->pr_common;
532 			pcp->prc_datamodel = model;
533 			pcp->prc_tid = tid;
534 			pcp->prc_tslot = tslot;
535 		}
536 	}
537 }
538 
539 /*
540  * Called from a hook in relvm() just before freeing the address space.
541  * We free all the watched areas now.
542  */
543 void
544 prrelvm(void)
545 {
546 	proc_t *p = ttoproc(curthread);
547 
548 	mutex_enter(&p->p_lock);
549 	prbarrier(p);	/* block all other /proc operations */
550 	if (pr_watch_active(p)) {
551 		pr_free_watchpoints(p);
552 		watch_disable(curthread);
553 	}
554 	mutex_exit(&p->p_lock);
555 	pr_free_watched_pages(p);
556 }
557 
558 /*
559  * Called from hooks in exec-related code when a traced process
560  * attempts to exec(2) a setuid/setgid program or an unreadable
561  * file.  Rather than fail the exec we invalidate the associated
562  * /proc vnodes so that subsequent attempts to use them will fail.
563  *
564  * All /proc vnodes, except directory vnodes, are retained on a linked
565  * list (rooted at p_plist in the process structure) until last close.
566  *
567  * A controlling process must re-open the /proc files in order to
568  * regain control.
569  */
570 void
571 prinvalidate(struct user *up)
572 {
573 	kthread_t *t = curthread;
574 	proc_t *p = ttoproc(t);
575 	vnode_t *vp;
576 	prnode_t *pnp;
577 	int writers = 0;
578 
579 	mutex_enter(&p->p_lock);
580 	prbarrier(p);	/* block all other /proc operations */
581 
582 	/*
583 	 * At this moment, there can be only one lwp in the process.
584 	 */
585 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586 
587 	/*
588 	 * Invalidate any currently active /proc vnodes.
589 	 */
590 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 		pnp = VTOP(vp);
592 		switch (pnp->pr_type) {
593 		case PR_PSINFO:		/* these files can read by anyone */
594 		case PR_LPSINFO:
595 		case PR_LWPSINFO:
596 		case PR_LWPDIR:
597 		case PR_LWPIDDIR:
598 		case PR_USAGE:
599 		case PR_LUSAGE:
600 		case PR_LWPUSAGE:
601 			break;
602 		default:
603 			pnp->pr_flags |= PR_INVAL;
604 			break;
605 		}
606 	}
607 	/*
608 	 * Wake up anyone waiting for the process or lwp.
609 	 * p->p_trace is guaranteed to be non-NULL if there
610 	 * are any open /proc files for this process.
611 	 */
612 	if ((vp = p->p_trace) != NULL) {
613 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614 
615 		prnotify(vp);
616 		/*
617 		 * Are there any writers?
618 		 */
619 		if ((writers = pcp->prc_writers) != 0) {
620 			/*
621 			 * Clear the exclusive open flag (old /proc interface).
622 			 * Set prc_selfopens equal to prc_writers so that
623 			 * the next O_EXCL|O_WRITE open will succeed
624 			 * even with existing (though invalid) writers.
625 			 * prclose() must decrement prc_selfopens when
626 			 * the invalid files are closed.
627 			 */
628 			pcp->prc_flags &= ~PRC_EXCL;
629 			ASSERT(pcp->prc_selfopens <= writers);
630 			pcp->prc_selfopens = writers;
631 		}
632 	}
633 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 	while (vp != NULL) {
635 		/*
636 		 * We should not invalidate the lwpiddir vnodes,
637 		 * but the necessities of maintaining the old
638 		 * ioctl()-based version of /proc require it.
639 		 */
640 		pnp = VTOP(vp);
641 		pnp->pr_flags |= PR_INVAL;
642 		prnotify(vp);
643 		vp = pnp->pr_next;
644 	}
645 
646 	/*
647 	 * If any tracing flags are in effect and any vnodes are open for
648 	 * writing then set the requested-stop and run-on-last-close flags.
649 	 * Otherwise, clear all tracing flags.
650 	 */
651 	t->t_proc_flag &= ~TP_PAUSE;
652 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 		t->t_proc_flag |= TP_PRSTOP;
654 		aston(t);		/* so ISSIG will see the flag */
655 		p->p_proc_flag |= P_PR_RUNLCL;
656 	} else {
657 		premptyset(&up->u_entrymask);		/* syscalls */
658 		premptyset(&up->u_exitmask);
659 		up->u_systrap = 0;
660 		premptyset(&p->p_sigmask);		/* signals */
661 		premptyset(&p->p_fltmask);		/* faults */
662 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 		prnostep(ttolwp(t));
665 	}
666 
667 	mutex_exit(&p->p_lock);
668 }
669 
670 /*
671  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672  * Return with pr_pidlock held in all cases.
673  * Return with p_lock held if the the process still exists.
674  * Return value is the process pointer if the process still exists, else NULL.
675  * If we lock the process, give ourself kernel priority to avoid deadlocks;
676  * this is undone in prunlock().
677  */
678 proc_t *
679 pr_p_lock(prnode_t *pnp)
680 {
681 	proc_t *p;
682 	prcommon_t *pcp;
683 
684 	mutex_enter(&pr_pidlock);
685 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 		return (NULL);
687 	mutex_enter(&p->p_lock);
688 	while (p->p_proc_flag & P_PR_LOCK) {
689 		/*
690 		 * This cv/mutex pair is persistent even if
691 		 * the process disappears while we sleep.
692 		 */
693 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 		kmutex_t *mp = &p->p_lock;
695 
696 		mutex_exit(&pr_pidlock);
697 		cv_wait(cv, mp);
698 		mutex_exit(mp);
699 		mutex_enter(&pr_pidlock);
700 		if (pcp->prc_proc == NULL)
701 			return (NULL);
702 		ASSERT(p == pcp->prc_proc);
703 		mutex_enter(&p->p_lock);
704 	}
705 	p->p_proc_flag |= P_PR_LOCK;
706 	THREAD_KPRI_REQUEST();
707 	return (p);
708 }
709 
710 /*
711  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
712  * This prevents any lwp of the process from disappearing and
713  * blocks most operations that a process can perform on itself.
714  * Returns 0 on success, a non-zero error number on failure.
715  *
716  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
717  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
718  *
719  * error returns:
720  *	ENOENT: process or lwp has disappeared or process is exiting
721  *		(or has become a zombie and zdisp == ZNO).
722  *	EAGAIN: procfs vnode has become invalid.
723  *	EINTR:  signal arrived while waiting for exec to complete.
724  */
725 int
726 prlock(prnode_t *pnp, int zdisp)
727 {
728 	prcommon_t *pcp;
729 	proc_t *p;
730 
731 again:
732 	pcp = pnp->pr_common;
733 	p = pr_p_lock(pnp);
734 	mutex_exit(&pr_pidlock);
735 
736 	/*
737 	 * Return ENOENT immediately if there is no process.
738 	 */
739 	if (p == NULL)
740 		return (ENOENT);
741 
742 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
743 
744 	/*
745 	 * Return ENOENT if process entered zombie state or is exiting
746 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
747 	 */
748 	if (zdisp == ZNO &&
749 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
750 		prunlock(pnp);
751 		return (ENOENT);
752 	}
753 
754 	/*
755 	 * If lwp-specific, check to see if lwp has disappeared.
756 	 */
757 	if (pcp->prc_flags & PRC_LWP) {
758 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
759 		    pcp->prc_tslot == -1) {
760 			prunlock(pnp);
761 			return (ENOENT);
762 		}
763 	}
764 
765 	/*
766 	 * Return EAGAIN if we have encountered a security violation.
767 	 * (The process exec'd a set-id or unreadable executable file.)
768 	 */
769 	if (pnp->pr_flags & PR_INVAL) {
770 		prunlock(pnp);
771 		return (EAGAIN);
772 	}
773 
774 	/*
775 	 * If process is undergoing an exec(), wait for
776 	 * completion and then start all over again.
777 	 */
778 	if (p->p_proc_flag & P_PR_EXEC) {
779 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
780 		mutex_enter(&pcp->prc_mutex);
781 		prunlock(pnp);
782 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
783 			mutex_exit(&pcp->prc_mutex);
784 			return (EINTR);
785 		}
786 		mutex_exit(&pcp->prc_mutex);
787 		goto again;
788 	}
789 
790 	/*
791 	 * We return holding p->p_lock.
792 	 */
793 	return (0);
794 }
795 
796 /*
797  * Undo prlock() and pr_p_lock().
798  * p->p_lock is still held; pr_pidlock is no longer held.
799  *
800  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
801  * if any, waiting for the flag to be dropped; it retains p->p_lock.
802  *
803  * prunlock() calls prunmark() and then drops p->p_lock.
804  */
805 void
806 prunmark(proc_t *p)
807 {
808 	ASSERT(p->p_proc_flag & P_PR_LOCK);
809 	ASSERT(MUTEX_HELD(&p->p_lock));
810 
811 	cv_signal(&pr_pid_cv[p->p_slot]);
812 	p->p_proc_flag &= ~P_PR_LOCK;
813 	THREAD_KPRI_RELEASE();
814 }
815 
816 void
817 prunlock(prnode_t *pnp)
818 {
819 	prcommon_t *pcp = pnp->pr_common;
820 	proc_t *p = pcp->prc_proc;
821 
822 	/*
823 	 * If we (or someone) gave it a SIGKILL, and it is not
824 	 * already a zombie, set it running unconditionally.
825 	 */
826 	if ((p->p_flag & SKILLED) &&
827 	    !(p->p_flag & SEXITING) &&
828 	    !(pcp->prc_flags & PRC_DESTROY) &&
829 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
830 		(void) pr_setrun(pnp, 0);
831 	prunmark(p);
832 	mutex_exit(&p->p_lock);
833 }
834 
835 /*
836  * Called while holding p->p_lock to delay until the process is unlocked.
837  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
838  * The process cannot become locked again until p->p_lock is dropped.
839  */
840 void
841 prbarrier(proc_t *p)
842 {
843 	ASSERT(MUTEX_HELD(&p->p_lock));
844 
845 	if (p->p_proc_flag & P_PR_LOCK) {
846 		/* The process is locked; delay until not locked */
847 		uint_t slot = p->p_slot;
848 
849 		while (p->p_proc_flag & P_PR_LOCK)
850 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
851 		cv_signal(&pr_pid_cv[slot]);
852 	}
853 }
854 
855 /*
856  * Return process/lwp status.
857  * The u-block is mapped in by this routine and unmapped at the end.
858  */
859 void
860 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
861 {
862 	kthread_t *t;
863 
864 	ASSERT(MUTEX_HELD(&p->p_lock));
865 
866 	t = prchoose(p);	/* returns locked thread */
867 	ASSERT(t != NULL);
868 	thread_unlock(t);
869 
870 	/* just bzero the process part, prgetlwpstatus() does the rest */
871 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
872 	sp->pr_nlwp = p->p_lwpcnt;
873 	sp->pr_nzomb = p->p_zombcnt;
874 	prassignset(&sp->pr_sigpend, &p->p_sig);
875 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
876 	sp->pr_brksize = p->p_brksize;
877 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
878 	sp->pr_stksize = p->p_stksize;
879 	sp->pr_pid = p->p_pid;
880 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
881 	    (p->p_flag & SZONETOP)) {
882 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
883 		/*
884 		 * Inside local zones, fake zsched's pid as parent pids for
885 		 * processes which reference processes outside of the zone.
886 		 */
887 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
888 	} else {
889 		sp->pr_ppid = p->p_ppid;
890 	}
891 	sp->pr_pgid  = p->p_pgrp;
892 	sp->pr_sid   = p->p_sessp->s_sid;
893 	sp->pr_taskid = p->p_task->tk_tkid;
894 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
895 	sp->pr_zoneid = p->p_zone->zone_id;
896 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
897 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
898 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
899 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
900 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
901 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
902 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
903 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
904 	switch (p->p_model) {
905 	case DATAMODEL_ILP32:
906 		sp->pr_dmodel = PR_MODEL_ILP32;
907 		break;
908 	case DATAMODEL_LP64:
909 		sp->pr_dmodel = PR_MODEL_LP64;
910 		break;
911 	}
912 	if (p->p_agenttp)
913 		sp->pr_agentid = p->p_agenttp->t_tid;
914 
915 	/* get the chosen lwp's status */
916 	prgetlwpstatus(t, &sp->pr_lwp, zp);
917 
918 	/* replicate the flags */
919 	sp->pr_flags = sp->pr_lwp.pr_flags;
920 }
921 
922 #ifdef _SYSCALL32_IMPL
923 void
924 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
925 {
926 	proc_t *p = ttoproc(t);
927 	klwp_t *lwp = ttolwp(t);
928 	struct mstate *ms = &lwp->lwp_mstate;
929 	hrtime_t usr, sys;
930 	int flags;
931 	ulong_t instr;
932 
933 	ASSERT(MUTEX_HELD(&p->p_lock));
934 
935 	bzero(sp, sizeof (*sp));
936 	flags = 0L;
937 	if (t->t_state == TS_STOPPED) {
938 		flags |= PR_STOPPED;
939 		if ((t->t_schedflag & TS_PSTART) == 0)
940 			flags |= PR_ISTOP;
941 	} else if (VSTOPPED(t)) {
942 		flags |= PR_STOPPED|PR_ISTOP;
943 	}
944 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
945 		flags |= PR_DSTOP;
946 	if (lwp->lwp_asleep)
947 		flags |= PR_ASLEEP;
948 	if (t == p->p_agenttp)
949 		flags |= PR_AGENT;
950 	if (!(t->t_proc_flag & TP_TWAIT))
951 		flags |= PR_DETACH;
952 	if (t->t_proc_flag & TP_DAEMON)
953 		flags |= PR_DAEMON;
954 	if (p->p_proc_flag & P_PR_FORK)
955 		flags |= PR_FORK;
956 	if (p->p_proc_flag & P_PR_RUNLCL)
957 		flags |= PR_RLC;
958 	if (p->p_proc_flag & P_PR_KILLCL)
959 		flags |= PR_KLC;
960 	if (p->p_proc_flag & P_PR_ASYNC)
961 		flags |= PR_ASYNC;
962 	if (p->p_proc_flag & P_PR_BPTADJ)
963 		flags |= PR_BPTADJ;
964 	if (p->p_proc_flag & P_PR_PTRACE)
965 		flags |= PR_PTRACE;
966 	if (p->p_flag & SMSACCT)
967 		flags |= PR_MSACCT;
968 	if (p->p_flag & SMSFORK)
969 		flags |= PR_MSFORK;
970 	if (p->p_flag & SVFWAIT)
971 		flags |= PR_VFORKP;
972 	sp->pr_flags = flags;
973 	if (VSTOPPED(t)) {
974 		sp->pr_why   = PR_REQUESTED;
975 		sp->pr_what  = 0;
976 	} else {
977 		sp->pr_why   = t->t_whystop;
978 		sp->pr_what  = t->t_whatstop;
979 	}
980 	sp->pr_lwpid = t->t_tid;
981 	sp->pr_cursig  = lwp->lwp_cursig;
982 	prassignset(&sp->pr_lwppend, &t->t_sig);
983 	schedctl_finish_sigblock(t);
984 	prassignset(&sp->pr_lwphold, &t->t_hold);
985 	if (t->t_whystop == PR_FAULTED) {
986 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
987 		if (t->t_whatstop == FLTPAGE)
988 			sp->pr_info.si_addr =
989 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
990 	} else if (lwp->lwp_curinfo)
991 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
992 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
993 	    sp->pr_info.si_zoneid != zp->zone_id) {
994 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
995 		sp->pr_info.si_uid = 0;
996 		sp->pr_info.si_ctid = -1;
997 		sp->pr_info.si_zoneid = zp->zone_id;
998 	}
999 	sp->pr_altstack.ss_sp =
1000 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1001 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1002 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1003 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1004 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1005 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1006 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1007 	    sizeof (sp->pr_clname) - 1);
1008 	if (flags & PR_STOPPED)
1009 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1010 	usr = ms->ms_acct[LMS_USER];
1011 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1012 	scalehrtime(&usr);
1013 	scalehrtime(&sys);
1014 	hrt2ts32(usr, &sp->pr_utime);
1015 	hrt2ts32(sys, &sp->pr_stime);
1016 
1017 	/*
1018 	 * Fetch the current instruction, if not a system process.
1019 	 * We don't attempt this unless the lwp is stopped.
1020 	 */
1021 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1022 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1023 	else if (!(flags & PR_STOPPED))
1024 		sp->pr_flags |= PR_PCINVAL;
1025 	else if (!prfetchinstr(lwp, &instr))
1026 		sp->pr_flags |= PR_PCINVAL;
1027 	else
1028 		sp->pr_instr = (uint32_t)instr;
1029 
1030 	/*
1031 	 * Drop p_lock while touching the lwp's stack.
1032 	 */
1033 	mutex_exit(&p->p_lock);
1034 	if (prisstep(lwp))
1035 		sp->pr_flags |= PR_STEP;
1036 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1037 		int i;
1038 
1039 		sp->pr_syscall = get_syscall32_args(lwp,
1040 		    (int *)sp->pr_sysarg, &i);
1041 		sp->pr_nsysarg = (ushort_t)i;
1042 	}
1043 	if ((flags & PR_STOPPED) || t == curthread)
1044 		prgetprregs32(lwp, sp->pr_reg);
1045 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1046 	    (flags & PR_VFORKP)) {
1047 		long r1, r2;
1048 		user_t *up;
1049 		auxv_t *auxp;
1050 		int i;
1051 
1052 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1053 		if (sp->pr_errno == 0) {
1054 			sp->pr_rval1 = (int32_t)r1;
1055 			sp->pr_rval2 = (int32_t)r2;
1056 			sp->pr_errpriv = PRIV_NONE;
1057 		} else
1058 			sp->pr_errpriv = lwp->lwp_badpriv;
1059 
1060 		if (t->t_sysnum == SYS_execve) {
1061 			up = PTOU(p);
1062 			sp->pr_sysarg[0] = 0;
1063 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1064 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1065 			for (i = 0, auxp = up->u_auxv;
1066 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1067 			    i++, auxp++) {
1068 				if (auxp->a_type == AT_SUN_EXECNAME) {
1069 					sp->pr_sysarg[0] =
1070 					    (caddr32_t)
1071 					    (uintptr_t)auxp->a_un.a_ptr;
1072 					break;
1073 				}
1074 			}
1075 		}
1076 	}
1077 	if (prhasfp())
1078 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1079 	mutex_enter(&p->p_lock);
1080 }
1081 
1082 void
1083 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1084 {
1085 	kthread_t *t;
1086 
1087 	ASSERT(MUTEX_HELD(&p->p_lock));
1088 
1089 	t = prchoose(p);	/* returns locked thread */
1090 	ASSERT(t != NULL);
1091 	thread_unlock(t);
1092 
1093 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1094 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1095 	sp->pr_nlwp = p->p_lwpcnt;
1096 	sp->pr_nzomb = p->p_zombcnt;
1097 	prassignset(&sp->pr_sigpend, &p->p_sig);
1098 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1099 	sp->pr_brksize = (uint32_t)p->p_brksize;
1100 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1101 	sp->pr_stksize = (uint32_t)p->p_stksize;
1102 	sp->pr_pid   = p->p_pid;
1103 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1104 	    (p->p_flag & SZONETOP)) {
1105 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1106 		/*
1107 		 * Inside local zones, fake zsched's pid as parent pids for
1108 		 * processes which reference processes outside of the zone.
1109 		 */
1110 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1111 	} else {
1112 		sp->pr_ppid = p->p_ppid;
1113 	}
1114 	sp->pr_pgid  = p->p_pgrp;
1115 	sp->pr_sid   = p->p_sessp->s_sid;
1116 	sp->pr_taskid = p->p_task->tk_tkid;
1117 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1118 	sp->pr_zoneid = p->p_zone->zone_id;
1119 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1120 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1121 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1122 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1123 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1124 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1125 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1126 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1127 	switch (p->p_model) {
1128 	case DATAMODEL_ILP32:
1129 		sp->pr_dmodel = PR_MODEL_ILP32;
1130 		break;
1131 	case DATAMODEL_LP64:
1132 		sp->pr_dmodel = PR_MODEL_LP64;
1133 		break;
1134 	}
1135 	if (p->p_agenttp)
1136 		sp->pr_agentid = p->p_agenttp->t_tid;
1137 
1138 	/* get the chosen lwp's status */
1139 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1140 
1141 	/* replicate the flags */
1142 	sp->pr_flags = sp->pr_lwp.pr_flags;
1143 }
1144 #endif	/* _SYSCALL32_IMPL */
1145 
1146 /*
1147  * Return lwp status.
1148  */
1149 void
1150 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1151 {
1152 	proc_t *p = ttoproc(t);
1153 	klwp_t *lwp = ttolwp(t);
1154 	struct mstate *ms = &lwp->lwp_mstate;
1155 	hrtime_t usr, sys;
1156 	int flags;
1157 	ulong_t instr;
1158 
1159 	ASSERT(MUTEX_HELD(&p->p_lock));
1160 
1161 	bzero(sp, sizeof (*sp));
1162 	flags = 0L;
1163 	if (t->t_state == TS_STOPPED) {
1164 		flags |= PR_STOPPED;
1165 		if ((t->t_schedflag & TS_PSTART) == 0)
1166 			flags |= PR_ISTOP;
1167 	} else if (VSTOPPED(t)) {
1168 		flags |= PR_STOPPED|PR_ISTOP;
1169 	}
1170 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1171 		flags |= PR_DSTOP;
1172 	if (lwp->lwp_asleep)
1173 		flags |= PR_ASLEEP;
1174 	if (t == p->p_agenttp)
1175 		flags |= PR_AGENT;
1176 	if (!(t->t_proc_flag & TP_TWAIT))
1177 		flags |= PR_DETACH;
1178 	if (t->t_proc_flag & TP_DAEMON)
1179 		flags |= PR_DAEMON;
1180 	if (p->p_proc_flag & P_PR_FORK)
1181 		flags |= PR_FORK;
1182 	if (p->p_proc_flag & P_PR_RUNLCL)
1183 		flags |= PR_RLC;
1184 	if (p->p_proc_flag & P_PR_KILLCL)
1185 		flags |= PR_KLC;
1186 	if (p->p_proc_flag & P_PR_ASYNC)
1187 		flags |= PR_ASYNC;
1188 	if (p->p_proc_flag & P_PR_BPTADJ)
1189 		flags |= PR_BPTADJ;
1190 	if (p->p_proc_flag & P_PR_PTRACE)
1191 		flags |= PR_PTRACE;
1192 	if (p->p_flag & SMSACCT)
1193 		flags |= PR_MSACCT;
1194 	if (p->p_flag & SMSFORK)
1195 		flags |= PR_MSFORK;
1196 	if (p->p_flag & SVFWAIT)
1197 		flags |= PR_VFORKP;
1198 	if (p->p_pgidp->pid_pgorphaned)
1199 		flags |= PR_ORPHAN;
1200 	if (p->p_pidflag & CLDNOSIGCHLD)
1201 		flags |= PR_NOSIGCHLD;
1202 	if (p->p_pidflag & CLDWAITPID)
1203 		flags |= PR_WAITPID;
1204 	sp->pr_flags = flags;
1205 	if (VSTOPPED(t)) {
1206 		sp->pr_why   = PR_REQUESTED;
1207 		sp->pr_what  = 0;
1208 	} else {
1209 		sp->pr_why   = t->t_whystop;
1210 		sp->pr_what  = t->t_whatstop;
1211 	}
1212 	sp->pr_lwpid = t->t_tid;
1213 	sp->pr_cursig  = lwp->lwp_cursig;
1214 	prassignset(&sp->pr_lwppend, &t->t_sig);
1215 	schedctl_finish_sigblock(t);
1216 	prassignset(&sp->pr_lwphold, &t->t_hold);
1217 	if (t->t_whystop == PR_FAULTED)
1218 		bcopy(&lwp->lwp_siginfo,
1219 		    &sp->pr_info, sizeof (k_siginfo_t));
1220 	else if (lwp->lwp_curinfo)
1221 		bcopy(&lwp->lwp_curinfo->sq_info,
1222 		    &sp->pr_info, sizeof (k_siginfo_t));
1223 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1224 	    sp->pr_info.si_zoneid != zp->zone_id) {
1225 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1226 		sp->pr_info.si_uid = 0;
1227 		sp->pr_info.si_ctid = -1;
1228 		sp->pr_info.si_zoneid = zp->zone_id;
1229 	}
1230 	sp->pr_altstack = lwp->lwp_sigaltstack;
1231 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1232 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1233 	sp->pr_ustack = lwp->lwp_ustack;
1234 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1235 	    sizeof (sp->pr_clname) - 1);
1236 	if (flags & PR_STOPPED)
1237 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1238 	usr = ms->ms_acct[LMS_USER];
1239 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1240 	scalehrtime(&usr);
1241 	scalehrtime(&sys);
1242 	hrt2ts(usr, &sp->pr_utime);
1243 	hrt2ts(sys, &sp->pr_stime);
1244 
1245 	/*
1246 	 * Fetch the current instruction, if not a system process.
1247 	 * We don't attempt this unless the lwp is stopped.
1248 	 */
1249 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1250 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1251 	else if (!(flags & PR_STOPPED))
1252 		sp->pr_flags |= PR_PCINVAL;
1253 	else if (!prfetchinstr(lwp, &instr))
1254 		sp->pr_flags |= PR_PCINVAL;
1255 	else
1256 		sp->pr_instr = instr;
1257 
1258 	/*
1259 	 * Drop p_lock while touching the lwp's stack.
1260 	 */
1261 	mutex_exit(&p->p_lock);
1262 	if (prisstep(lwp))
1263 		sp->pr_flags |= PR_STEP;
1264 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1265 		int i;
1266 
1267 		sp->pr_syscall = get_syscall_args(lwp,
1268 		    (long *)sp->pr_sysarg, &i);
1269 		sp->pr_nsysarg = (ushort_t)i;
1270 	}
1271 	if ((flags & PR_STOPPED) || t == curthread)
1272 		prgetprregs(lwp, sp->pr_reg);
1273 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1274 	    (flags & PR_VFORKP)) {
1275 		user_t *up;
1276 		auxv_t *auxp;
1277 		int i;
1278 
1279 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1280 		if (sp->pr_errno == 0)
1281 			sp->pr_errpriv = PRIV_NONE;
1282 		else
1283 			sp->pr_errpriv = lwp->lwp_badpriv;
1284 
1285 		if (t->t_sysnum == SYS_execve) {
1286 			up = PTOU(p);
1287 			sp->pr_sysarg[0] = 0;
1288 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1289 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1290 			for (i = 0, auxp = up->u_auxv;
1291 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1292 			    i++, auxp++) {
1293 				if (auxp->a_type == AT_SUN_EXECNAME) {
1294 					sp->pr_sysarg[0] =
1295 					    (uintptr_t)auxp->a_un.a_ptr;
1296 					break;
1297 				}
1298 			}
1299 		}
1300 	}
1301 	if (prhasfp())
1302 		prgetprfpregs(lwp, &sp->pr_fpreg);
1303 	mutex_enter(&p->p_lock);
1304 }
1305 
1306 /*
1307  * Get the sigaction structure for the specified signal.  The u-block
1308  * must already have been mapped in by the caller.
1309  */
1310 void
1311 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1312 {
1313 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1314 
1315 	bzero(sp, sizeof (*sp));
1316 
1317 	if (sig != 0 && (unsigned)sig < nsig) {
1318 		sp->sa_handler = up->u_signal[sig-1];
1319 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1320 		if (sigismember(&up->u_sigonstack, sig))
1321 			sp->sa_flags |= SA_ONSTACK;
1322 		if (sigismember(&up->u_sigresethand, sig))
1323 			sp->sa_flags |= SA_RESETHAND;
1324 		if (sigismember(&up->u_sigrestart, sig))
1325 			sp->sa_flags |= SA_RESTART;
1326 		if (sigismember(&p->p_siginfo, sig))
1327 			sp->sa_flags |= SA_SIGINFO;
1328 		if (sigismember(&up->u_signodefer, sig))
1329 			sp->sa_flags |= SA_NODEFER;
1330 		if (sig == SIGCLD) {
1331 			if (p->p_flag & SNOWAIT)
1332 				sp->sa_flags |= SA_NOCLDWAIT;
1333 			if ((p->p_flag & SJCTL) == 0)
1334 				sp->sa_flags |= SA_NOCLDSTOP;
1335 		}
1336 	}
1337 }
1338 
1339 #ifdef _SYSCALL32_IMPL
1340 void
1341 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1342 {
1343 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1344 
1345 	bzero(sp, sizeof (*sp));
1346 
1347 	if (sig != 0 && (unsigned)sig < nsig) {
1348 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1349 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1350 		if (sigismember(&up->u_sigonstack, sig))
1351 			sp->sa_flags |= SA_ONSTACK;
1352 		if (sigismember(&up->u_sigresethand, sig))
1353 			sp->sa_flags |= SA_RESETHAND;
1354 		if (sigismember(&up->u_sigrestart, sig))
1355 			sp->sa_flags |= SA_RESTART;
1356 		if (sigismember(&p->p_siginfo, sig))
1357 			sp->sa_flags |= SA_SIGINFO;
1358 		if (sigismember(&up->u_signodefer, sig))
1359 			sp->sa_flags |= SA_NODEFER;
1360 		if (sig == SIGCLD) {
1361 			if (p->p_flag & SNOWAIT)
1362 				sp->sa_flags |= SA_NOCLDWAIT;
1363 			if ((p->p_flag & SJCTL) == 0)
1364 				sp->sa_flags |= SA_NOCLDSTOP;
1365 		}
1366 	}
1367 }
1368 #endif	/* _SYSCALL32_IMPL */
1369 
1370 /*
1371  * Count the number of segments in this process's address space.
1372  */
1373 int
1374 prnsegs(struct as *as, int reserved)
1375 {
1376 	int n = 0;
1377 	struct seg *seg;
1378 
1379 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1380 
1381 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383 		caddr_t saddr, naddr;
1384 		void *tmp = NULL;
1385 
1386 		if ((seg->s_flags & S_HOLE) != 0) {
1387 			continue;
1388 		}
1389 
1390 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1391 			(void) pr_getprot(seg, reserved, &tmp,
1392 			    &saddr, &naddr, eaddr);
1393 			if (saddr != naddr)
1394 				n++;
1395 		}
1396 
1397 		ASSERT(tmp == NULL);
1398 	}
1399 
1400 	return (n);
1401 }
1402 
1403 /*
1404  * Convert uint32_t to decimal string w/o leading zeros.
1405  * Add trailing null characters if 'len' is greater than string length.
1406  * Return the string length.
1407  */
1408 int
1409 pr_u32tos(uint32_t n, char *s, int len)
1410 {
1411 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1412 	char *cp = cbuf;
1413 	char *end = s + len;
1414 
1415 	do {
1416 		*cp++ = (char)(n % 10 + '0');
1417 		n /= 10;
1418 	} while (n);
1419 
1420 	len = (int)(cp - cbuf);
1421 
1422 	do {
1423 		*s++ = *--cp;
1424 	} while (cp > cbuf);
1425 
1426 	while (s < end)		/* optional pad */
1427 		*s++ = '\0';
1428 
1429 	return (len);
1430 }
1431 
1432 /*
1433  * Convert uint64_t to decimal string w/o leading zeros.
1434  * Return the string length.
1435  */
1436 static int
1437 pr_u64tos(uint64_t n, char *s)
1438 {
1439 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1440 	char *cp = cbuf;
1441 	int len;
1442 
1443 	do {
1444 		*cp++ = (char)(n % 10 + '0');
1445 		n /= 10;
1446 	} while (n);
1447 
1448 	len = (int)(cp - cbuf);
1449 
1450 	do {
1451 		*s++ = *--cp;
1452 	} while (cp > cbuf);
1453 
1454 	return (len);
1455 }
1456 
1457 void
1458 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1459 {
1460 	char *s = name;
1461 	struct vfs *vfsp;
1462 	struct vfssw *vfsswp;
1463 
1464 	if ((vfsp = vp->v_vfsp) != NULL &&
1465 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1466 	    *vfsswp->vsw_name) {
1467 		(void) strcpy(s, vfsswp->vsw_name);
1468 		s += strlen(s);
1469 		*s++ = '.';
1470 	}
1471 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1472 	*s++ = '.';
1473 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1474 	*s++ = '.';
1475 	s += pr_u64tos(vattr->va_nodeid, s);
1476 	*s++ = '\0';
1477 }
1478 
1479 struct seg *
1480 break_seg(proc_t *p)
1481 {
1482 	caddr_t addr = p->p_brkbase;
1483 	struct seg *seg;
1484 	struct vnode *vp;
1485 
1486 	if (p->p_brksize != 0)
1487 		addr += p->p_brksize - 1;
1488 	seg = as_segat(p->p_as, addr);
1489 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1490 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1491 		return (seg);
1492 	return (NULL);
1493 }
1494 
1495 /*
1496  * Implementation of service functions to handle procfs generic chained
1497  * copyout buffers.
1498  */
1499 typedef struct pr_iobuf_list {
1500 	list_node_t	piol_link;	/* buffer linkage */
1501 	size_t		piol_size;	/* total size (header + data) */
1502 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1503 } piol_t;
1504 
1505 #define	MAPSIZE	(64 * 1024)
1506 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1507 
1508 void
1509 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1510 {
1511 	piol_t	*iol;
1512 	size_t	initial_size = MIN(1, n) * itemsize;
1513 
1514 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1515 
1516 	ASSERT(list_head(iolhead) == NULL);
1517 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1518 	ASSERT(initial_size > 0);
1519 
1520 	/*
1521 	 * Someone creating chained copyout buffers may ask for less than
1522 	 * MAPSIZE if the amount of data to be buffered is known to be
1523 	 * smaller than that.
1524 	 * But in order to prevent involuntary self-denial of service,
1525 	 * the requested input size is clamped at MAPSIZE.
1526 	 */
1527 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1528 	iol = kmem_alloc(initial_size, KM_SLEEP);
1529 	list_insert_head(iolhead, iol);
1530 	iol->piol_usedsize = 0;
1531 	iol->piol_size = initial_size;
1532 }
1533 
1534 void *
1535 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1536 {
1537 	piol_t	*iol;
1538 	char	*new;
1539 
1540 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1541 	ASSERT(list_head(iolhead) != NULL);
1542 
1543 	iol = (piol_t *)list_tail(iolhead);
1544 
1545 	if (iol->piol_size <
1546 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1547 		/*
1548 		 * Out of space in the current buffer. Allocate more.
1549 		 */
1550 		piol_t *newiol;
1551 
1552 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1553 		newiol->piol_size = MAPSIZE;
1554 		newiol->piol_usedsize = 0;
1555 
1556 		list_insert_after(iolhead, iol, newiol);
1557 		iol = list_next(iolhead, iol);
1558 		ASSERT(iol == newiol);
1559 	}
1560 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1561 	iol->piol_usedsize += itemsize;
1562 	bzero(new, itemsize);
1563 	return (new);
1564 }
1565 
1566 int
1567 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1568 {
1569 	int error = errin;
1570 	piol_t	*iol;
1571 
1572 	while ((iol = list_head(iolhead)) != NULL) {
1573 		list_remove(iolhead, iol);
1574 		if (!error) {
1575 			if (copyout(PIOL_DATABUF(iol), *tgt,
1576 			    iol->piol_usedsize))
1577 				error = EFAULT;
1578 			*tgt += iol->piol_usedsize;
1579 		}
1580 		kmem_free(iol, iol->piol_size);
1581 	}
1582 	list_destroy(iolhead);
1583 
1584 	return (error);
1585 }
1586 
1587 int
1588 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1589 {
1590 	offset_t	off = uiop->uio_offset;
1591 	char		*base;
1592 	size_t		size;
1593 	piol_t		*iol;
1594 	int		error = errin;
1595 
1596 	while ((iol = list_head(iolhead)) != NULL) {
1597 		list_remove(iolhead, iol);
1598 		base = PIOL_DATABUF(iol);
1599 		size = iol->piol_usedsize;
1600 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1601 			error = uiomove(base + off, size - off,
1602 			    UIO_READ, uiop);
1603 		off = MAX(0, off - (offset_t)size);
1604 		kmem_free(iol, iol->piol_size);
1605 	}
1606 	list_destroy(iolhead);
1607 
1608 	return (error);
1609 }
1610 
1611 /*
1612  * Return an array of structures with memory map information.
1613  * We allocate here; the caller must deallocate.
1614  */
1615 int
1616 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1617 {
1618 	struct as *as = p->p_as;
1619 	prmap_t *mp;
1620 	struct seg *seg;
1621 	struct seg *brkseg, *stkseg;
1622 	struct vnode *vp;
1623 	struct vattr vattr;
1624 	uint_t prot;
1625 
1626 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1627 
1628 	/*
1629 	 * Request an initial buffer size that doesn't waste memory
1630 	 * if the address space has only a small number of segments.
1631 	 */
1632 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1633 
1634 	if ((seg = AS_SEGFIRST(as)) == NULL)
1635 		return (0);
1636 
1637 	brkseg = break_seg(p);
1638 	stkseg = as_segat(as, prgetstackbase(p));
1639 
1640 	do {
1641 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1642 		caddr_t saddr, naddr;
1643 		void *tmp = NULL;
1644 
1645 		if ((seg->s_flags & S_HOLE) != 0) {
1646 			continue;
1647 		}
1648 
1649 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1650 			prot = pr_getprot(seg, reserved, &tmp,
1651 			    &saddr, &naddr, eaddr);
1652 			if (saddr == naddr)
1653 				continue;
1654 
1655 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1656 
1657 			mp->pr_vaddr = (uintptr_t)saddr;
1658 			mp->pr_size = naddr - saddr;
1659 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1660 			mp->pr_mflags = 0;
1661 			if (prot & PROT_READ)
1662 				mp->pr_mflags |= MA_READ;
1663 			if (prot & PROT_WRITE)
1664 				mp->pr_mflags |= MA_WRITE;
1665 			if (prot & PROT_EXEC)
1666 				mp->pr_mflags |= MA_EXEC;
1667 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1668 				mp->pr_mflags |= MA_SHARED;
1669 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1670 				mp->pr_mflags |= MA_NORESERVE;
1671 			if (seg->s_ops == &segspt_shmops ||
1672 			    (seg->s_ops == &segvn_ops &&
1673 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1674 				mp->pr_mflags |= MA_ANON;
1675 			if (seg == brkseg)
1676 				mp->pr_mflags |= MA_BREAK;
1677 			else if (seg == stkseg) {
1678 				mp->pr_mflags |= MA_STACK;
1679 				if (reserved) {
1680 					size_t maxstack =
1681 					    ((size_t)p->p_stk_ctl +
1682 					    PAGEOFFSET) & PAGEMASK;
1683 					mp->pr_vaddr =
1684 					    (uintptr_t)prgetstackbase(p) +
1685 					    p->p_stksize - maxstack;
1686 					mp->pr_size = (uintptr_t)naddr -
1687 					    mp->pr_vaddr;
1688 				}
1689 			}
1690 			if (seg->s_ops == &segspt_shmops)
1691 				mp->pr_mflags |= MA_ISM | MA_SHM;
1692 			mp->pr_pagesize = PAGESIZE;
1693 
1694 			/*
1695 			 * Manufacture a filename for the "object" directory.
1696 			 */
1697 			vattr.va_mask = AT_FSID|AT_NODEID;
1698 			if (seg->s_ops == &segvn_ops &&
1699 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1700 			    vp != NULL && vp->v_type == VREG &&
1701 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1702 				if (vp == p->p_exec)
1703 					(void) strcpy(mp->pr_mapname, "a.out");
1704 				else
1705 					pr_object_name(mp->pr_mapname,
1706 					    vp, &vattr);
1707 			}
1708 
1709 			/*
1710 			 * Get the SysV shared memory id, if any.
1711 			 */
1712 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1713 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1714 			    SHMID_NONE) {
1715 				if (mp->pr_shmid == SHMID_FREE)
1716 					mp->pr_shmid = -1;
1717 
1718 				mp->pr_mflags |= MA_SHM;
1719 			} else {
1720 				mp->pr_shmid = -1;
1721 			}
1722 		}
1723 		ASSERT(tmp == NULL);
1724 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1725 
1726 	return (0);
1727 }
1728 
1729 #ifdef _SYSCALL32_IMPL
1730 int
1731 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1732 {
1733 	struct as *as = p->p_as;
1734 	prmap32_t *mp;
1735 	struct seg *seg;
1736 	struct seg *brkseg, *stkseg;
1737 	struct vnode *vp;
1738 	struct vattr vattr;
1739 	uint_t prot;
1740 
1741 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1742 
1743 	/*
1744 	 * Request an initial buffer size that doesn't waste memory
1745 	 * if the address space has only a small number of segments.
1746 	 */
1747 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1748 
1749 	if ((seg = AS_SEGFIRST(as)) == NULL)
1750 		return (0);
1751 
1752 	brkseg = break_seg(p);
1753 	stkseg = as_segat(as, prgetstackbase(p));
1754 
1755 	do {
1756 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1757 		caddr_t saddr, naddr;
1758 		void *tmp = NULL;
1759 
1760 		if ((seg->s_flags & S_HOLE) != 0) {
1761 			continue;
1762 		}
1763 
1764 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1765 			prot = pr_getprot(seg, reserved, &tmp,
1766 			    &saddr, &naddr, eaddr);
1767 			if (saddr == naddr)
1768 				continue;
1769 
1770 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1771 
1772 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1773 			mp->pr_size = (size32_t)(naddr - saddr);
1774 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1775 			mp->pr_mflags = 0;
1776 			if (prot & PROT_READ)
1777 				mp->pr_mflags |= MA_READ;
1778 			if (prot & PROT_WRITE)
1779 				mp->pr_mflags |= MA_WRITE;
1780 			if (prot & PROT_EXEC)
1781 				mp->pr_mflags |= MA_EXEC;
1782 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1783 				mp->pr_mflags |= MA_SHARED;
1784 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1785 				mp->pr_mflags |= MA_NORESERVE;
1786 			if (seg->s_ops == &segspt_shmops ||
1787 			    (seg->s_ops == &segvn_ops &&
1788 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1789 				mp->pr_mflags |= MA_ANON;
1790 			if (seg == brkseg)
1791 				mp->pr_mflags |= MA_BREAK;
1792 			else if (seg == stkseg) {
1793 				mp->pr_mflags |= MA_STACK;
1794 				if (reserved) {
1795 					size_t maxstack =
1796 					    ((size_t)p->p_stk_ctl +
1797 					    PAGEOFFSET) & PAGEMASK;
1798 					uintptr_t vaddr =
1799 					    (uintptr_t)prgetstackbase(p) +
1800 					    p->p_stksize - maxstack;
1801 					mp->pr_vaddr = (caddr32_t)vaddr;
1802 					mp->pr_size = (size32_t)
1803 					    ((uintptr_t)naddr - vaddr);
1804 				}
1805 			}
1806 			if (seg->s_ops == &segspt_shmops)
1807 				mp->pr_mflags |= MA_ISM | MA_SHM;
1808 			mp->pr_pagesize = PAGESIZE;
1809 
1810 			/*
1811 			 * Manufacture a filename for the "object" directory.
1812 			 */
1813 			vattr.va_mask = AT_FSID|AT_NODEID;
1814 			if (seg->s_ops == &segvn_ops &&
1815 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1816 			    vp != NULL && vp->v_type == VREG &&
1817 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1818 				if (vp == p->p_exec)
1819 					(void) strcpy(mp->pr_mapname, "a.out");
1820 				else
1821 					pr_object_name(mp->pr_mapname,
1822 					    vp, &vattr);
1823 			}
1824 
1825 			/*
1826 			 * Get the SysV shared memory id, if any.
1827 			 */
1828 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1829 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1830 			    SHMID_NONE) {
1831 				if (mp->pr_shmid == SHMID_FREE)
1832 					mp->pr_shmid = -1;
1833 
1834 				mp->pr_mflags |= MA_SHM;
1835 			} else {
1836 				mp->pr_shmid = -1;
1837 			}
1838 		}
1839 		ASSERT(tmp == NULL);
1840 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1841 
1842 	return (0);
1843 }
1844 #endif	/* _SYSCALL32_IMPL */
1845 
1846 /*
1847  * Return the size of the /proc page data file.
1848  */
1849 size_t
1850 prpdsize(struct as *as)
1851 {
1852 	struct seg *seg;
1853 	size_t size;
1854 
1855 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1856 
1857 	if ((seg = AS_SEGFIRST(as)) == NULL)
1858 		return (0);
1859 
1860 	size = sizeof (prpageheader_t);
1861 	do {
1862 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1863 		caddr_t saddr, naddr;
1864 		void *tmp = NULL;
1865 		size_t npage;
1866 
1867 		if ((seg->s_flags & S_HOLE) != 0) {
1868 			continue;
1869 		}
1870 
1871 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1872 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1873 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1874 				size += sizeof (prasmap_t) + round8(npage);
1875 		}
1876 		ASSERT(tmp == NULL);
1877 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1878 
1879 	return (size);
1880 }
1881 
1882 #ifdef _SYSCALL32_IMPL
1883 size_t
1884 prpdsize32(struct as *as)
1885 {
1886 	struct seg *seg;
1887 	size_t size;
1888 
1889 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1890 
1891 	if ((seg = AS_SEGFIRST(as)) == NULL)
1892 		return (0);
1893 
1894 	size = sizeof (prpageheader32_t);
1895 	do {
1896 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1897 		caddr_t saddr, naddr;
1898 		void *tmp = NULL;
1899 		size_t npage;
1900 
1901 		if ((seg->s_flags & S_HOLE) != 0) {
1902 			continue;
1903 		}
1904 
1905 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1906 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1907 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1908 				size += sizeof (prasmap32_t) + round8(npage);
1909 		}
1910 		ASSERT(tmp == NULL);
1911 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1912 
1913 	return (size);
1914 }
1915 #endif	/* _SYSCALL32_IMPL */
1916 
1917 /*
1918  * Read page data information.
1919  */
1920 int
1921 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1922 {
1923 	struct as *as = p->p_as;
1924 	caddr_t buf;
1925 	size_t size;
1926 	prpageheader_t *php;
1927 	prasmap_t *pmp;
1928 	struct seg *seg;
1929 	int error;
1930 
1931 again:
1932 	AS_LOCK_ENTER(as, RW_WRITER);
1933 
1934 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1935 		AS_LOCK_EXIT(as);
1936 		return (0);
1937 	}
1938 	size = prpdsize(as);
1939 	if (uiop->uio_resid < size) {
1940 		AS_LOCK_EXIT(as);
1941 		return (E2BIG);
1942 	}
1943 
1944 	buf = kmem_zalloc(size, KM_SLEEP);
1945 	php = (prpageheader_t *)buf;
1946 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1947 
1948 	hrt2ts(gethrtime(), &php->pr_tstamp);
1949 	php->pr_nmap = 0;
1950 	php->pr_npage = 0;
1951 	do {
1952 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1953 		caddr_t saddr, naddr;
1954 		void *tmp = NULL;
1955 
1956 		if ((seg->s_flags & S_HOLE) != 0) {
1957 			continue;
1958 		}
1959 
1960 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1961 			struct vnode *vp;
1962 			struct vattr vattr;
1963 			size_t len;
1964 			size_t npage;
1965 			uint_t prot;
1966 			uintptr_t next;
1967 
1968 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1969 			if ((len = (size_t)(naddr - saddr)) == 0)
1970 				continue;
1971 			npage = len / PAGESIZE;
1972 			next = (uintptr_t)(pmp + 1) + round8(npage);
1973 			/*
1974 			 * It's possible that the address space can change
1975 			 * subtlely even though we're holding as->a_lock
1976 			 * due to the nondeterminism of page_exists() in
1977 			 * the presence of asychronously flushed pages or
1978 			 * mapped files whose sizes are changing.
1979 			 * page_exists() may be called indirectly from
1980 			 * pr_getprot() by a SEGOP_INCORE() routine.
1981 			 * If this happens we need to make sure we don't
1982 			 * overrun the buffer whose size we computed based
1983 			 * on the initial iteration through the segments.
1984 			 * Once we've detected an overflow, we need to clean
1985 			 * up the temporary memory allocated in pr_getprot()
1986 			 * and retry. If there's a pending signal, we return
1987 			 * EINTR so that this thread can be dislodged if
1988 			 * a latent bug causes us to spin indefinitely.
1989 			 */
1990 			if (next > (uintptr_t)buf + size) {
1991 				pr_getprot_done(&tmp);
1992 				AS_LOCK_EXIT(as);
1993 
1994 				kmem_free(buf, size);
1995 
1996 				if (ISSIG(curthread, JUSTLOOKING))
1997 					return (EINTR);
1998 
1999 				goto again;
2000 			}
2001 
2002 			php->pr_nmap++;
2003 			php->pr_npage += npage;
2004 			pmp->pr_vaddr = (uintptr_t)saddr;
2005 			pmp->pr_npage = npage;
2006 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2007 			pmp->pr_mflags = 0;
2008 			if (prot & PROT_READ)
2009 				pmp->pr_mflags |= MA_READ;
2010 			if (prot & PROT_WRITE)
2011 				pmp->pr_mflags |= MA_WRITE;
2012 			if (prot & PROT_EXEC)
2013 				pmp->pr_mflags |= MA_EXEC;
2014 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2015 				pmp->pr_mflags |= MA_SHARED;
2016 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2017 				pmp->pr_mflags |= MA_NORESERVE;
2018 			if (seg->s_ops == &segspt_shmops ||
2019 			    (seg->s_ops == &segvn_ops &&
2020 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2021 				pmp->pr_mflags |= MA_ANON;
2022 			if (seg->s_ops == &segspt_shmops)
2023 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2024 			pmp->pr_pagesize = PAGESIZE;
2025 			/*
2026 			 * Manufacture a filename for the "object" directory.
2027 			 */
2028 			vattr.va_mask = AT_FSID|AT_NODEID;
2029 			if (seg->s_ops == &segvn_ops &&
2030 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2031 			    vp != NULL && vp->v_type == VREG &&
2032 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2033 				if (vp == p->p_exec)
2034 					(void) strcpy(pmp->pr_mapname, "a.out");
2035 				else
2036 					pr_object_name(pmp->pr_mapname,
2037 					    vp, &vattr);
2038 			}
2039 
2040 			/*
2041 			 * Get the SysV shared memory id, if any.
2042 			 */
2043 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2044 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2045 			    SHMID_NONE) {
2046 				if (pmp->pr_shmid == SHMID_FREE)
2047 					pmp->pr_shmid = -1;
2048 
2049 				pmp->pr_mflags |= MA_SHM;
2050 			} else {
2051 				pmp->pr_shmid = -1;
2052 			}
2053 
2054 			hat_getstat(as, saddr, len, hatid,
2055 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2056 			pmp = (prasmap_t *)next;
2057 		}
2058 		ASSERT(tmp == NULL);
2059 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2060 
2061 	AS_LOCK_EXIT(as);
2062 
2063 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2064 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2065 	kmem_free(buf, size);
2066 
2067 	return (error);
2068 }
2069 
2070 #ifdef _SYSCALL32_IMPL
2071 int
2072 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2073 {
2074 	struct as *as = p->p_as;
2075 	caddr_t buf;
2076 	size_t size;
2077 	prpageheader32_t *php;
2078 	prasmap32_t *pmp;
2079 	struct seg *seg;
2080 	int error;
2081 
2082 again:
2083 	AS_LOCK_ENTER(as, RW_WRITER);
2084 
2085 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2086 		AS_LOCK_EXIT(as);
2087 		return (0);
2088 	}
2089 	size = prpdsize32(as);
2090 	if (uiop->uio_resid < size) {
2091 		AS_LOCK_EXIT(as);
2092 		return (E2BIG);
2093 	}
2094 
2095 	buf = kmem_zalloc(size, KM_SLEEP);
2096 	php = (prpageheader32_t *)buf;
2097 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2098 
2099 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2100 	php->pr_nmap = 0;
2101 	php->pr_npage = 0;
2102 	do {
2103 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2104 		caddr_t saddr, naddr;
2105 		void *tmp = NULL;
2106 
2107 		if ((seg->s_flags & S_HOLE) != 0) {
2108 			continue;
2109 		}
2110 
2111 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2112 			struct vnode *vp;
2113 			struct vattr vattr;
2114 			size_t len;
2115 			size_t npage;
2116 			uint_t prot;
2117 			uintptr_t next;
2118 
2119 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2120 			if ((len = (size_t)(naddr - saddr)) == 0)
2121 				continue;
2122 			npage = len / PAGESIZE;
2123 			next = (uintptr_t)(pmp + 1) + round8(npage);
2124 			/*
2125 			 * It's possible that the address space can change
2126 			 * subtlely even though we're holding as->a_lock
2127 			 * due to the nondeterminism of page_exists() in
2128 			 * the presence of asychronously flushed pages or
2129 			 * mapped files whose sizes are changing.
2130 			 * page_exists() may be called indirectly from
2131 			 * pr_getprot() by a SEGOP_INCORE() routine.
2132 			 * If this happens we need to make sure we don't
2133 			 * overrun the buffer whose size we computed based
2134 			 * on the initial iteration through the segments.
2135 			 * Once we've detected an overflow, we need to clean
2136 			 * up the temporary memory allocated in pr_getprot()
2137 			 * and retry. If there's a pending signal, we return
2138 			 * EINTR so that this thread can be dislodged if
2139 			 * a latent bug causes us to spin indefinitely.
2140 			 */
2141 			if (next > (uintptr_t)buf + size) {
2142 				pr_getprot_done(&tmp);
2143 				AS_LOCK_EXIT(as);
2144 
2145 				kmem_free(buf, size);
2146 
2147 				if (ISSIG(curthread, JUSTLOOKING))
2148 					return (EINTR);
2149 
2150 				goto again;
2151 			}
2152 
2153 			php->pr_nmap++;
2154 			php->pr_npage += npage;
2155 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2156 			pmp->pr_npage = (size32_t)npage;
2157 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2158 			pmp->pr_mflags = 0;
2159 			if (prot & PROT_READ)
2160 				pmp->pr_mflags |= MA_READ;
2161 			if (prot & PROT_WRITE)
2162 				pmp->pr_mflags |= MA_WRITE;
2163 			if (prot & PROT_EXEC)
2164 				pmp->pr_mflags |= MA_EXEC;
2165 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2166 				pmp->pr_mflags |= MA_SHARED;
2167 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2168 				pmp->pr_mflags |= MA_NORESERVE;
2169 			if (seg->s_ops == &segspt_shmops ||
2170 			    (seg->s_ops == &segvn_ops &&
2171 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2172 				pmp->pr_mflags |= MA_ANON;
2173 			if (seg->s_ops == &segspt_shmops)
2174 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2175 			pmp->pr_pagesize = PAGESIZE;
2176 			/*
2177 			 * Manufacture a filename for the "object" directory.
2178 			 */
2179 			vattr.va_mask = AT_FSID|AT_NODEID;
2180 			if (seg->s_ops == &segvn_ops &&
2181 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2182 			    vp != NULL && vp->v_type == VREG &&
2183 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2184 				if (vp == p->p_exec)
2185 					(void) strcpy(pmp->pr_mapname, "a.out");
2186 				else
2187 					pr_object_name(pmp->pr_mapname,
2188 					    vp, &vattr);
2189 			}
2190 
2191 			/*
2192 			 * Get the SysV shared memory id, if any.
2193 			 */
2194 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2195 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2196 			    SHMID_NONE) {
2197 				if (pmp->pr_shmid == SHMID_FREE)
2198 					pmp->pr_shmid = -1;
2199 
2200 				pmp->pr_mflags |= MA_SHM;
2201 			} else {
2202 				pmp->pr_shmid = -1;
2203 			}
2204 
2205 			hat_getstat(as, saddr, len, hatid,
2206 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2207 			pmp = (prasmap32_t *)next;
2208 		}
2209 		ASSERT(tmp == NULL);
2210 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2211 
2212 	AS_LOCK_EXIT(as);
2213 
2214 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2215 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2216 	kmem_free(buf, size);
2217 
2218 	return (error);
2219 }
2220 #endif	/* _SYSCALL32_IMPL */
2221 
2222 ushort_t
2223 prgetpctcpu(uint64_t pct)
2224 {
2225 	/*
2226 	 * The value returned will be relevant in the zone of the examiner,
2227 	 * which may not be the same as the zone which performed the procfs
2228 	 * mount.
2229 	 */
2230 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2231 
2232 	/*
2233 	 * Prorate over online cpus so we don't exceed 100%
2234 	 */
2235 	if (nonline > 1)
2236 		pct /= nonline;
2237 	pct >>= 16;		/* convert to 16-bit scaled integer */
2238 	if (pct > 0x8000)	/* might happen, due to rounding */
2239 		pct = 0x8000;
2240 	return ((ushort_t)pct);
2241 }
2242 
2243 /*
2244  * Return information used by ps(1).
2245  */
2246 void
2247 prgetpsinfo(proc_t *p, psinfo_t *psp)
2248 {
2249 	kthread_t *t;
2250 	struct cred *cred;
2251 	hrtime_t hrutime, hrstime;
2252 
2253 	ASSERT(MUTEX_HELD(&p->p_lock));
2254 
2255 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2256 		bzero(psp, sizeof (*psp));
2257 	else {
2258 		thread_unlock(t);
2259 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2260 	}
2261 
2262 	/*
2263 	 * only export SSYS and SMSACCT; everything else is off-limits to
2264 	 * userland apps.
2265 	 */
2266 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2267 	psp->pr_nlwp = p->p_lwpcnt;
2268 	psp->pr_nzomb = p->p_zombcnt;
2269 	mutex_enter(&p->p_crlock);
2270 	cred = p->p_cred;
2271 	psp->pr_uid = crgetruid(cred);
2272 	psp->pr_euid = crgetuid(cred);
2273 	psp->pr_gid = crgetrgid(cred);
2274 	psp->pr_egid = crgetgid(cred);
2275 	mutex_exit(&p->p_crlock);
2276 	psp->pr_pid = p->p_pid;
2277 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2278 	    (p->p_flag & SZONETOP)) {
2279 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2280 		/*
2281 		 * Inside local zones, fake zsched's pid as parent pids for
2282 		 * processes which reference processes outside of the zone.
2283 		 */
2284 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2285 	} else {
2286 		psp->pr_ppid = p->p_ppid;
2287 	}
2288 	psp->pr_pgid = p->p_pgrp;
2289 	psp->pr_sid = p->p_sessp->s_sid;
2290 	psp->pr_taskid = p->p_task->tk_tkid;
2291 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2292 	psp->pr_poolid = p->p_pool->pool_id;
2293 	psp->pr_zoneid = p->p_zone->zone_id;
2294 	if ((psp->pr_contract = PRCTID(p)) == 0)
2295 		psp->pr_contract = -1;
2296 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2297 	switch (p->p_model) {
2298 	case DATAMODEL_ILP32:
2299 		psp->pr_dmodel = PR_MODEL_ILP32;
2300 		break;
2301 	case DATAMODEL_LP64:
2302 		psp->pr_dmodel = PR_MODEL_LP64;
2303 		break;
2304 	}
2305 	hrutime = mstate_aggr_state(p, LMS_USER);
2306 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2307 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2308 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2309 
2310 	if (t == NULL) {
2311 		int wcode = p->p_wcode;		/* must be atomic read */
2312 
2313 		if (wcode)
2314 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2315 		psp->pr_ttydev = PRNODEV;
2316 		psp->pr_lwp.pr_state = SZOMB;
2317 		psp->pr_lwp.pr_sname = 'Z';
2318 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2319 		psp->pr_lwp.pr_bindpset = PS_NONE;
2320 	} else {
2321 		user_t *up = PTOU(p);
2322 		struct as *as;
2323 		dev_t d;
2324 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2325 
2326 		d = cttydev(p);
2327 		/*
2328 		 * If the controlling terminal is the real
2329 		 * or workstation console device, map to what the
2330 		 * user thinks is the console device. Handle case when
2331 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2332 		 */
2333 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2334 			d = uconsdev;
2335 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2336 		psp->pr_start = up->u_start;
2337 		bcopy(up->u_comm, psp->pr_fname,
2338 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2339 		bcopy(up->u_psargs, psp->pr_psargs,
2340 		    MIN(PRARGSZ-1, PSARGSZ));
2341 		psp->pr_argc = up->u_argc;
2342 		psp->pr_argv = up->u_argv;
2343 		psp->pr_envp = up->u_envp;
2344 
2345 		/* get the chosen lwp's lwpsinfo */
2346 		prgetlwpsinfo(t, &psp->pr_lwp);
2347 
2348 		/* compute %cpu for the process */
2349 		if (p->p_lwpcnt == 1)
2350 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2351 		else {
2352 			uint64_t pct = 0;
2353 			hrtime_t cur_time = gethrtime_unscaled();
2354 
2355 			t = p->p_tlist;
2356 			do {
2357 				pct += cpu_update_pct(t, cur_time);
2358 			} while ((t = t->t_forw) != p->p_tlist);
2359 
2360 			psp->pr_pctcpu = prgetpctcpu(pct);
2361 		}
2362 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2363 			psp->pr_size = 0;
2364 			psp->pr_rssize = 0;
2365 		} else {
2366 			mutex_exit(&p->p_lock);
2367 			AS_LOCK_ENTER(as, RW_READER);
2368 			psp->pr_size = btopr(as->a_resvsize) *
2369 			    (PAGESIZE / 1024);
2370 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2371 			psp->pr_pctmem = rm_pctmemory(as);
2372 			AS_LOCK_EXIT(as);
2373 			mutex_enter(&p->p_lock);
2374 		}
2375 	}
2376 }
2377 
2378 #ifdef _SYSCALL32_IMPL
2379 void
2380 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2381 {
2382 	kthread_t *t;
2383 	struct cred *cred;
2384 	hrtime_t hrutime, hrstime;
2385 
2386 	ASSERT(MUTEX_HELD(&p->p_lock));
2387 
2388 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2389 		bzero(psp, sizeof (*psp));
2390 	else {
2391 		thread_unlock(t);
2392 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2393 	}
2394 
2395 	/*
2396 	 * only export SSYS and SMSACCT; everything else is off-limits to
2397 	 * userland apps.
2398 	 */
2399 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2400 	psp->pr_nlwp = p->p_lwpcnt;
2401 	psp->pr_nzomb = p->p_zombcnt;
2402 	mutex_enter(&p->p_crlock);
2403 	cred = p->p_cred;
2404 	psp->pr_uid = crgetruid(cred);
2405 	psp->pr_euid = crgetuid(cred);
2406 	psp->pr_gid = crgetrgid(cred);
2407 	psp->pr_egid = crgetgid(cred);
2408 	mutex_exit(&p->p_crlock);
2409 	psp->pr_pid = p->p_pid;
2410 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2411 	    (p->p_flag & SZONETOP)) {
2412 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2413 		/*
2414 		 * Inside local zones, fake zsched's pid as parent pids for
2415 		 * processes which reference processes outside of the zone.
2416 		 */
2417 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2418 	} else {
2419 		psp->pr_ppid = p->p_ppid;
2420 	}
2421 	psp->pr_pgid = p->p_pgrp;
2422 	psp->pr_sid = p->p_sessp->s_sid;
2423 	psp->pr_taskid = p->p_task->tk_tkid;
2424 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2425 	psp->pr_poolid = p->p_pool->pool_id;
2426 	psp->pr_zoneid = p->p_zone->zone_id;
2427 	if ((psp->pr_contract = PRCTID(p)) == 0)
2428 		psp->pr_contract = -1;
2429 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2430 	switch (p->p_model) {
2431 	case DATAMODEL_ILP32:
2432 		psp->pr_dmodel = PR_MODEL_ILP32;
2433 		break;
2434 	case DATAMODEL_LP64:
2435 		psp->pr_dmodel = PR_MODEL_LP64;
2436 		break;
2437 	}
2438 	hrutime = mstate_aggr_state(p, LMS_USER);
2439 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2440 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2441 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2442 
2443 	if (t == NULL) {
2444 		extern int wstat(int, int);	/* needs a header file */
2445 		int wcode = p->p_wcode;		/* must be atomic read */
2446 
2447 		if (wcode)
2448 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2449 		psp->pr_ttydev = PRNODEV32;
2450 		psp->pr_lwp.pr_state = SZOMB;
2451 		psp->pr_lwp.pr_sname = 'Z';
2452 	} else {
2453 		user_t *up = PTOU(p);
2454 		struct as *as;
2455 		dev_t d;
2456 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2457 
2458 		d = cttydev(p);
2459 		/*
2460 		 * If the controlling terminal is the real
2461 		 * or workstation console device, map to what the
2462 		 * user thinks is the console device. Handle case when
2463 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2464 		 */
2465 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2466 			d = uconsdev;
2467 		(void) cmpldev(&psp->pr_ttydev, d);
2468 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2469 		bcopy(up->u_comm, psp->pr_fname,
2470 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2471 		bcopy(up->u_psargs, psp->pr_psargs,
2472 		    MIN(PRARGSZ-1, PSARGSZ));
2473 		psp->pr_argc = up->u_argc;
2474 		psp->pr_argv = (caddr32_t)up->u_argv;
2475 		psp->pr_envp = (caddr32_t)up->u_envp;
2476 
2477 		/* get the chosen lwp's lwpsinfo */
2478 		prgetlwpsinfo32(t, &psp->pr_lwp);
2479 
2480 		/* compute %cpu for the process */
2481 		if (p->p_lwpcnt == 1)
2482 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2483 		else {
2484 			uint64_t pct = 0;
2485 			hrtime_t cur_time;
2486 
2487 			t = p->p_tlist;
2488 			cur_time = gethrtime_unscaled();
2489 			do {
2490 				pct += cpu_update_pct(t, cur_time);
2491 			} while ((t = t->t_forw) != p->p_tlist);
2492 
2493 			psp->pr_pctcpu = prgetpctcpu(pct);
2494 		}
2495 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2496 			psp->pr_size = 0;
2497 			psp->pr_rssize = 0;
2498 		} else {
2499 			mutex_exit(&p->p_lock);
2500 			AS_LOCK_ENTER(as, RW_READER);
2501 			psp->pr_size = (size32_t)
2502 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2503 			psp->pr_rssize = (size32_t)
2504 			    (rm_asrss(as) * (PAGESIZE / 1024));
2505 			psp->pr_pctmem = rm_pctmemory(as);
2506 			AS_LOCK_EXIT(as);
2507 			mutex_enter(&p->p_lock);
2508 		}
2509 	}
2510 
2511 	/*
2512 	 * If we are looking at an LP64 process, zero out
2513 	 * the fields that cannot be represented in ILP32.
2514 	 */
2515 	if (p->p_model != DATAMODEL_ILP32) {
2516 		psp->pr_size = 0;
2517 		psp->pr_rssize = 0;
2518 		psp->pr_argv = 0;
2519 		psp->pr_envp = 0;
2520 	}
2521 }
2522 
2523 #endif	/* _SYSCALL32_IMPL */
2524 
2525 void
2526 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2527 {
2528 	klwp_t *lwp = ttolwp(t);
2529 	sobj_ops_t *sobj;
2530 	char c, state;
2531 	uint64_t pct;
2532 	int retval, niceval;
2533 	hrtime_t hrutime, hrstime;
2534 
2535 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2536 
2537 	bzero(psp, sizeof (*psp));
2538 
2539 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2540 	psp->pr_lwpid = t->t_tid;
2541 	psp->pr_addr = (uintptr_t)t;
2542 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2543 
2544 	/* map the thread state enum into a process state enum */
2545 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2546 	switch (state) {
2547 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2548 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2549 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2550 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2551 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2552 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2553 	default:		state = 0;		c = '?';	break;
2554 	}
2555 	psp->pr_state = state;
2556 	psp->pr_sname = c;
2557 	if ((sobj = t->t_sobj_ops) != NULL)
2558 		psp->pr_stype = SOBJ_TYPE(sobj);
2559 	retval = CL_DONICE(t, NULL, 0, &niceval);
2560 	if (retval == 0) {
2561 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2562 		psp->pr_nice = niceval + NZERO;
2563 	}
2564 	psp->pr_syscall = t->t_sysnum;
2565 	psp->pr_pri = t->t_pri;
2566 	psp->pr_start.tv_sec = t->t_start;
2567 	psp->pr_start.tv_nsec = 0L;
2568 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2569 	scalehrtime(&hrutime);
2570 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2571 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2572 	scalehrtime(&hrstime);
2573 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2574 	/* compute %cpu for the lwp */
2575 	pct = cpu_update_pct(t, gethrtime_unscaled());
2576 	psp->pr_pctcpu = prgetpctcpu(pct);
2577 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2578 	if (psp->pr_cpu > 99)
2579 		psp->pr_cpu = 99;
2580 
2581 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2582 	    sizeof (psp->pr_clname) - 1);
2583 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2584 	psp->pr_onpro = t->t_cpu->cpu_id;
2585 	psp->pr_bindpro = t->t_bind_cpu;
2586 	psp->pr_bindpset = t->t_bind_pset;
2587 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2588 }
2589 
2590 #ifdef _SYSCALL32_IMPL
2591 void
2592 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2593 {
2594 	proc_t *p = ttoproc(t);
2595 	klwp_t *lwp = ttolwp(t);
2596 	sobj_ops_t *sobj;
2597 	char c, state;
2598 	uint64_t pct;
2599 	int retval, niceval;
2600 	hrtime_t hrutime, hrstime;
2601 
2602 	ASSERT(MUTEX_HELD(&p->p_lock));
2603 
2604 	bzero(psp, sizeof (*psp));
2605 
2606 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2607 	psp->pr_lwpid = t->t_tid;
2608 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2609 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2610 
2611 	/* map the thread state enum into a process state enum */
2612 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2613 	switch (state) {
2614 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2615 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2616 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2617 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2618 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2619 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2620 	default:		state = 0;		c = '?';	break;
2621 	}
2622 	psp->pr_state = state;
2623 	psp->pr_sname = c;
2624 	if ((sobj = t->t_sobj_ops) != NULL)
2625 		psp->pr_stype = SOBJ_TYPE(sobj);
2626 	retval = CL_DONICE(t, NULL, 0, &niceval);
2627 	if (retval == 0) {
2628 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2629 		psp->pr_nice = niceval + NZERO;
2630 	} else {
2631 		psp->pr_oldpri = 0;
2632 		psp->pr_nice = 0;
2633 	}
2634 	psp->pr_syscall = t->t_sysnum;
2635 	psp->pr_pri = t->t_pri;
2636 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2637 	psp->pr_start.tv_nsec = 0L;
2638 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2639 	scalehrtime(&hrutime);
2640 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2641 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2642 	scalehrtime(&hrstime);
2643 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2644 	/* compute %cpu for the lwp */
2645 	pct = cpu_update_pct(t, gethrtime_unscaled());
2646 	psp->pr_pctcpu = prgetpctcpu(pct);
2647 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2648 	if (psp->pr_cpu > 99)
2649 		psp->pr_cpu = 99;
2650 
2651 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2652 	    sizeof (psp->pr_clname) - 1);
2653 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2654 	psp->pr_onpro = t->t_cpu->cpu_id;
2655 	psp->pr_bindpro = t->t_bind_cpu;
2656 	psp->pr_bindpset = t->t_bind_pset;
2657 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2658 }
2659 #endif	/* _SYSCALL32_IMPL */
2660 
2661 #ifdef _SYSCALL32_IMPL
2662 
2663 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
2664 
2665 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
2666 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
2667 		d->field = s->field;				\
2668 	}
2669 
2670 #define	PR_COPY_TIMESPEC(s, d, field)				\
2671 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2672 
2673 #define	PR_COPY_BUF(s, d, field)	 			\
2674 	bcopy(s->field, d->field, sizeof (d->field));
2675 
2676 #define	PR_IGNORE_FIELD(s, d, field)
2677 
2678 void
2679 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2680 {
2681 	bzero(dest, sizeof (*dest));
2682 
2683 	PR_COPY_FIELD(src, dest, pr_flag);
2684 	PR_COPY_FIELD(src, dest, pr_lwpid);
2685 	PR_IGNORE_FIELD(src, dest, pr_addr);
2686 	PR_IGNORE_FIELD(src, dest, pr_wchan);
2687 	PR_COPY_FIELD(src, dest, pr_stype);
2688 	PR_COPY_FIELD(src, dest, pr_state);
2689 	PR_COPY_FIELD(src, dest, pr_sname);
2690 	PR_COPY_FIELD(src, dest, pr_nice);
2691 	PR_COPY_FIELD(src, dest, pr_syscall);
2692 	PR_COPY_FIELD(src, dest, pr_oldpri);
2693 	PR_COPY_FIELD(src, dest, pr_cpu);
2694 	PR_COPY_FIELD(src, dest, pr_pri);
2695 	PR_COPY_FIELD(src, dest, pr_pctcpu);
2696 	PR_COPY_TIMESPEC(src, dest, pr_start);
2697 	PR_COPY_BUF(src, dest, pr_clname);
2698 	PR_COPY_BUF(src, dest, pr_name);
2699 	PR_COPY_FIELD(src, dest, pr_onpro);
2700 	PR_COPY_FIELD(src, dest, pr_bindpro);
2701 	PR_COPY_FIELD(src, dest, pr_bindpset);
2702 	PR_COPY_FIELD(src, dest, pr_lgrp);
2703 }
2704 
2705 void
2706 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2707 {
2708 	bzero(dest, sizeof (*dest));
2709 
2710 	PR_COPY_FIELD(src, dest, pr_flag);
2711 	PR_COPY_FIELD(src, dest, pr_nlwp);
2712 	PR_COPY_FIELD(src, dest, pr_pid);
2713 	PR_COPY_FIELD(src, dest, pr_ppid);
2714 	PR_COPY_FIELD(src, dest, pr_pgid);
2715 	PR_COPY_FIELD(src, dest, pr_sid);
2716 	PR_COPY_FIELD(src, dest, pr_uid);
2717 	PR_COPY_FIELD(src, dest, pr_euid);
2718 	PR_COPY_FIELD(src, dest, pr_gid);
2719 	PR_COPY_FIELD(src, dest, pr_egid);
2720 	PR_IGNORE_FIELD(src, dest, pr_addr);
2721 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
2722 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2723 	PR_COPY_FIELD(src, dest, pr_ttydev);
2724 	PR_COPY_FIELD(src, dest, pr_pctcpu);
2725 	PR_COPY_FIELD(src, dest, pr_pctmem);
2726 	PR_COPY_TIMESPEC(src, dest, pr_start);
2727 	PR_COPY_TIMESPEC(src, dest, pr_time);
2728 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
2729 	PR_COPY_BUF(src, dest, pr_fname);
2730 	PR_COPY_BUF(src, dest, pr_psargs);
2731 	PR_COPY_FIELD(src, dest, pr_wstat);
2732 	PR_COPY_FIELD(src, dest, pr_argc);
2733 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2734 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2735 	PR_COPY_FIELD(src, dest, pr_dmodel);
2736 	PR_COPY_FIELD(src, dest, pr_taskid);
2737 	PR_COPY_FIELD(src, dest, pr_projid);
2738 	PR_COPY_FIELD(src, dest, pr_nzomb);
2739 	PR_COPY_FIELD(src, dest, pr_poolid);
2740 	PR_COPY_FIELD(src, dest, pr_contract);
2741 	PR_COPY_FIELD(src, dest, pr_poolid);
2742 	PR_COPY_FIELD(src, dest, pr_poolid);
2743 
2744 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2745 }
2746 
2747 #undef	PR_COPY_FIELD
2748 #undef	PR_COPY_FIELD_ILP32
2749 #undef	PR_COPY_TIMESPEC
2750 #undef	PR_COPY_BUF
2751 #undef	PR_IGNORE_FIELD
2752 
2753 #endif	/* _SYSCALL32_IMPL */
2754 
2755 /*
2756  * This used to get called when microstate accounting was disabled but
2757  * microstate information was requested.  Since Microstate accounting is on
2758  * regardless of the proc flags, this simply makes it appear to procfs that
2759  * microstate accounting is on.  This is relatively meaningless since you
2760  * can't turn it off, but this is here for the sake of appearances.
2761  */
2762 
2763 /*ARGSUSED*/
2764 void
2765 estimate_msacct(kthread_t *t, hrtime_t curtime)
2766 {
2767 	proc_t *p;
2768 
2769 	if (t == NULL)
2770 		return;
2771 
2772 	p = ttoproc(t);
2773 	ASSERT(MUTEX_HELD(&p->p_lock));
2774 
2775 	/*
2776 	 * A system process (p0) could be referenced if the thread is
2777 	 * in the process of exiting.  Don't turn on microstate accounting
2778 	 * in that case.
2779 	 */
2780 	if (p->p_flag & SSYS)
2781 		return;
2782 
2783 	/*
2784 	 * Loop through all the LWPs (kernel threads) in the process.
2785 	 */
2786 	t = p->p_tlist;
2787 	do {
2788 		t->t_proc_flag |= TP_MSACCT;
2789 	} while ((t = t->t_forw) != p->p_tlist);
2790 
2791 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2792 }
2793 
2794 /*
2795  * It's not really possible to disable microstate accounting anymore.
2796  * However, this routine simply turns off the ms accounting flags in a process
2797  * This way procfs can still pretend to turn microstate accounting on and
2798  * off for a process, but it actually doesn't do anything.  This is
2799  * a neutered form of preemptive idiot-proofing.
2800  */
2801 void
2802 disable_msacct(proc_t *p)
2803 {
2804 	kthread_t *t;
2805 
2806 	ASSERT(MUTEX_HELD(&p->p_lock));
2807 
2808 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2809 	/*
2810 	 * Loop through all the LWPs (kernel threads) in the process.
2811 	 */
2812 	if ((t = p->p_tlist) != NULL) {
2813 		do {
2814 			/* clear per-thread flag */
2815 			t->t_proc_flag &= ~TP_MSACCT;
2816 		} while ((t = t->t_forw) != p->p_tlist);
2817 	}
2818 }
2819 
2820 /*
2821  * Return resource usage information.
2822  */
2823 void
2824 prgetusage(kthread_t *t, prhusage_t *pup)
2825 {
2826 	klwp_t *lwp = ttolwp(t);
2827 	hrtime_t *mstimep;
2828 	struct mstate *ms = &lwp->lwp_mstate;
2829 	int state;
2830 	int i;
2831 	hrtime_t curtime;
2832 	hrtime_t waitrq;
2833 	hrtime_t tmp1;
2834 
2835 	curtime = gethrtime_unscaled();
2836 
2837 	pup->pr_lwpid	= t->t_tid;
2838 	pup->pr_count	= 1;
2839 	pup->pr_create	= ms->ms_start;
2840 	pup->pr_term    = ms->ms_term;
2841 	scalehrtime(&pup->pr_create);
2842 	scalehrtime(&pup->pr_term);
2843 	if (ms->ms_term == 0) {
2844 		pup->pr_rtime = curtime - ms->ms_start;
2845 		scalehrtime(&pup->pr_rtime);
2846 	} else {
2847 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2848 		scalehrtime(&pup->pr_rtime);
2849 	}
2850 
2851 
2852 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2853 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2854 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2855 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2856 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2857 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2858 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2859 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2860 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2861 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2862 
2863 	prscaleusage(pup);
2864 
2865 	/*
2866 	 * Adjust for time waiting in the dispatcher queue.
2867 	 */
2868 	waitrq = t->t_waitrq;	/* hopefully atomic */
2869 	if (waitrq != 0) {
2870 		if (waitrq > curtime) {
2871 			curtime = gethrtime_unscaled();
2872 		}
2873 		tmp1 = curtime - waitrq;
2874 		scalehrtime(&tmp1);
2875 		pup->pr_wtime += tmp1;
2876 		curtime = waitrq;
2877 	}
2878 
2879 	/*
2880 	 * Adjust for time spent in current microstate.
2881 	 */
2882 	if (ms->ms_state_start > curtime) {
2883 		curtime = gethrtime_unscaled();
2884 	}
2885 
2886 	i = 0;
2887 	do {
2888 		switch (state = t->t_mstate) {
2889 		case LMS_SLEEP:
2890 			/*
2891 			 * Update the timer for the current sleep state.
2892 			 */
2893 			switch (state = ms->ms_prev) {
2894 			case LMS_TFAULT:
2895 			case LMS_DFAULT:
2896 			case LMS_KFAULT:
2897 			case LMS_USER_LOCK:
2898 				break;
2899 			default:
2900 				state = LMS_SLEEP;
2901 				break;
2902 			}
2903 			break;
2904 		case LMS_TFAULT:
2905 		case LMS_DFAULT:
2906 		case LMS_KFAULT:
2907 		case LMS_USER_LOCK:
2908 			state = LMS_SYSTEM;
2909 			break;
2910 		}
2911 		switch (state) {
2912 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2913 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2914 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2915 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2916 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2917 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2918 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2919 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2920 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2921 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2922 		default:		panic("prgetusage: unknown microstate");
2923 		}
2924 		tmp1 = curtime - ms->ms_state_start;
2925 		if (tmp1 < 0) {
2926 			curtime = gethrtime_unscaled();
2927 			i++;
2928 			continue;
2929 		}
2930 		scalehrtime(&tmp1);
2931 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2932 
2933 	*mstimep += tmp1;
2934 
2935 	/* update pup timestamp */
2936 	pup->pr_tstamp = curtime;
2937 	scalehrtime(&pup->pr_tstamp);
2938 
2939 	/*
2940 	 * Resource usage counters.
2941 	 */
2942 	pup->pr_minf  = lwp->lwp_ru.minflt;
2943 	pup->pr_majf  = lwp->lwp_ru.majflt;
2944 	pup->pr_nswap = lwp->lwp_ru.nswap;
2945 	pup->pr_inblk = lwp->lwp_ru.inblock;
2946 	pup->pr_oublk = lwp->lwp_ru.oublock;
2947 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2948 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2949 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2950 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2951 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2952 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2953 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2954 }
2955 
2956 /*
2957  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2958  */
2959 void
2960 prscaleusage(prhusage_t *usg)
2961 {
2962 	scalehrtime(&usg->pr_utime);
2963 	scalehrtime(&usg->pr_stime);
2964 	scalehrtime(&usg->pr_ttime);
2965 	scalehrtime(&usg->pr_tftime);
2966 	scalehrtime(&usg->pr_dftime);
2967 	scalehrtime(&usg->pr_kftime);
2968 	scalehrtime(&usg->pr_ltime);
2969 	scalehrtime(&usg->pr_slptime);
2970 	scalehrtime(&usg->pr_wtime);
2971 	scalehrtime(&usg->pr_stoptime);
2972 }
2973 
2974 
2975 /*
2976  * Sum resource usage information.
2977  */
2978 void
2979 praddusage(kthread_t *t, prhusage_t *pup)
2980 {
2981 	klwp_t *lwp = ttolwp(t);
2982 	hrtime_t *mstimep;
2983 	struct mstate *ms = &lwp->lwp_mstate;
2984 	int state;
2985 	int i;
2986 	hrtime_t curtime;
2987 	hrtime_t waitrq;
2988 	hrtime_t tmp;
2989 	prhusage_t conv;
2990 
2991 	curtime = gethrtime_unscaled();
2992 
2993 	if (ms->ms_term == 0) {
2994 		tmp = curtime - ms->ms_start;
2995 		scalehrtime(&tmp);
2996 		pup->pr_rtime += tmp;
2997 	} else {
2998 		tmp = ms->ms_term - ms->ms_start;
2999 		scalehrtime(&tmp);
3000 		pup->pr_rtime += tmp;
3001 	}
3002 
3003 	conv.pr_utime = ms->ms_acct[LMS_USER];
3004 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3005 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3006 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3007 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3008 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3009 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3010 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3011 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3012 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3013 
3014 	prscaleusage(&conv);
3015 
3016 	pup->pr_utime	+= conv.pr_utime;
3017 	pup->pr_stime	+= conv.pr_stime;
3018 	pup->pr_ttime	+= conv.pr_ttime;
3019 	pup->pr_tftime	+= conv.pr_tftime;
3020 	pup->pr_dftime	+= conv.pr_dftime;
3021 	pup->pr_kftime	+= conv.pr_kftime;
3022 	pup->pr_ltime	+= conv.pr_ltime;
3023 	pup->pr_slptime	+= conv.pr_slptime;
3024 	pup->pr_wtime	+= conv.pr_wtime;
3025 	pup->pr_stoptime += conv.pr_stoptime;
3026 
3027 	/*
3028 	 * Adjust for time waiting in the dispatcher queue.
3029 	 */
3030 	waitrq = t->t_waitrq;	/* hopefully atomic */
3031 	if (waitrq != 0) {
3032 		if (waitrq > curtime) {
3033 			curtime = gethrtime_unscaled();
3034 		}
3035 		tmp = curtime - waitrq;
3036 		scalehrtime(&tmp);
3037 		pup->pr_wtime += tmp;
3038 		curtime = waitrq;
3039 	}
3040 
3041 	/*
3042 	 * Adjust for time spent in current microstate.
3043 	 */
3044 	if (ms->ms_state_start > curtime) {
3045 		curtime = gethrtime_unscaled();
3046 	}
3047 
3048 	i = 0;
3049 	do {
3050 		switch (state = t->t_mstate) {
3051 		case LMS_SLEEP:
3052 			/*
3053 			 * Update the timer for the current sleep state.
3054 			 */
3055 			switch (state = ms->ms_prev) {
3056 			case LMS_TFAULT:
3057 			case LMS_DFAULT:
3058 			case LMS_KFAULT:
3059 			case LMS_USER_LOCK:
3060 				break;
3061 			default:
3062 				state = LMS_SLEEP;
3063 				break;
3064 			}
3065 			break;
3066 		case LMS_TFAULT:
3067 		case LMS_DFAULT:
3068 		case LMS_KFAULT:
3069 		case LMS_USER_LOCK:
3070 			state = LMS_SYSTEM;
3071 			break;
3072 		}
3073 		switch (state) {
3074 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3075 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3076 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3077 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3078 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3079 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3080 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3081 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3082 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3083 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3084 		default:		panic("praddusage: unknown microstate");
3085 		}
3086 		tmp = curtime - ms->ms_state_start;
3087 		if (tmp < 0) {
3088 			curtime = gethrtime_unscaled();
3089 			i++;
3090 			continue;
3091 		}
3092 		scalehrtime(&tmp);
3093 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3094 
3095 	*mstimep += tmp;
3096 
3097 	/* update pup timestamp */
3098 	pup->pr_tstamp = curtime;
3099 	scalehrtime(&pup->pr_tstamp);
3100 
3101 	/*
3102 	 * Resource usage counters.
3103 	 */
3104 	pup->pr_minf  += lwp->lwp_ru.minflt;
3105 	pup->pr_majf  += lwp->lwp_ru.majflt;
3106 	pup->pr_nswap += lwp->lwp_ru.nswap;
3107 	pup->pr_inblk += lwp->lwp_ru.inblock;
3108 	pup->pr_oublk += lwp->lwp_ru.oublock;
3109 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3110 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3111 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3112 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3113 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3114 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3115 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3116 }
3117 
3118 /*
3119  * Convert a prhusage_t to a prusage_t.
3120  * This means convert each hrtime_t to a timestruc_t
3121  * and copy the count fields uint64_t => ulong_t.
3122  */
3123 void
3124 prcvtusage(prhusage_t *pup, prusage_t *upup)
3125 {
3126 	uint64_t *ullp;
3127 	ulong_t *ulp;
3128 	int i;
3129 
3130 	upup->pr_lwpid = pup->pr_lwpid;
3131 	upup->pr_count = pup->pr_count;
3132 
3133 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3134 	hrt2ts(pup->pr_create,	&upup->pr_create);
3135 	hrt2ts(pup->pr_term,	&upup->pr_term);
3136 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3137 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3138 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3139 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3140 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3141 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3142 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3143 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3144 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3145 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3146 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3147 	bzero(upup->filltime, sizeof (upup->filltime));
3148 
3149 	ullp = &pup->pr_minf;
3150 	ulp = &upup->pr_minf;
3151 	for (i = 0; i < 22; i++)
3152 		*ulp++ = (ulong_t)*ullp++;
3153 }
3154 
3155 #ifdef _SYSCALL32_IMPL
3156 void
3157 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3158 {
3159 	uint64_t *ullp;
3160 	uint32_t *ulp;
3161 	int i;
3162 
3163 	upup->pr_lwpid = pup->pr_lwpid;
3164 	upup->pr_count = pup->pr_count;
3165 
3166 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3167 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3168 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3169 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3170 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3171 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3172 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3173 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3174 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3175 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3176 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3177 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3178 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3179 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3180 	bzero(upup->filltime, sizeof (upup->filltime));
3181 
3182 	ullp = &pup->pr_minf;
3183 	ulp = &upup->pr_minf;
3184 	for (i = 0; i < 22; i++)
3185 		*ulp++ = (uint32_t)*ullp++;
3186 }
3187 #endif	/* _SYSCALL32_IMPL */
3188 
3189 /*
3190  * Determine whether a set is empty.
3191  */
3192 int
3193 setisempty(uint32_t *sp, uint_t n)
3194 {
3195 	while (n--)
3196 		if (*sp++)
3197 			return (0);
3198 	return (1);
3199 }
3200 
3201 /*
3202  * Utility routine for establishing a watched area in the process.
3203  * Keep the list of watched areas sorted by virtual address.
3204  */
3205 int
3206 set_watched_area(proc_t *p, struct watched_area *pwa)
3207 {
3208 	caddr_t vaddr = pwa->wa_vaddr;
3209 	caddr_t eaddr = pwa->wa_eaddr;
3210 	ulong_t flags = pwa->wa_flags;
3211 	struct watched_area *target;
3212 	avl_index_t where;
3213 	int error = 0;
3214 
3215 	/* we must not be holding p->p_lock, but the process must be locked */
3216 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3217 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3218 
3219 	/*
3220 	 * If this is our first watchpoint, enable watchpoints for the process.
3221 	 */
3222 	if (!pr_watch_active(p)) {
3223 		kthread_t *t;
3224 
3225 		mutex_enter(&p->p_lock);
3226 		if ((t = p->p_tlist) != NULL) {
3227 			do {
3228 				watch_enable(t);
3229 			} while ((t = t->t_forw) != p->p_tlist);
3230 		}
3231 		mutex_exit(&p->p_lock);
3232 	}
3233 
3234 	target = pr_find_watched_area(p, pwa, &where);
3235 	if (target != NULL) {
3236 		/*
3237 		 * We discovered an existing, overlapping watched area.
3238 		 * Allow it only if it is an exact match.
3239 		 */
3240 		if (target->wa_vaddr != vaddr ||
3241 		    target->wa_eaddr != eaddr)
3242 			error = EINVAL;
3243 		else if (target->wa_flags != flags) {
3244 			error = set_watched_page(p, vaddr, eaddr,
3245 			    flags, target->wa_flags);
3246 			target->wa_flags = flags;
3247 		}
3248 		kmem_free(pwa, sizeof (struct watched_area));
3249 	} else {
3250 		avl_insert(&p->p_warea, pwa, where);
3251 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3252 	}
3253 
3254 	return (error);
3255 }
3256 
3257 /*
3258  * Utility routine for clearing a watched area in the process.
3259  * Must be an exact match of the virtual address.
3260  * size and flags don't matter.
3261  */
3262 int
3263 clear_watched_area(proc_t *p, struct watched_area *pwa)
3264 {
3265 	struct watched_area *found;
3266 
3267 	/* we must not be holding p->p_lock, but the process must be locked */
3268 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3269 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3270 
3271 
3272 	if (!pr_watch_active(p)) {
3273 		kmem_free(pwa, sizeof (struct watched_area));
3274 		return (0);
3275 	}
3276 
3277 	/*
3278 	 * Look for a matching address in the watched areas.  If a match is
3279 	 * found, clear the old watched area and adjust the watched page(s).  It
3280 	 * is not an error if there is no match.
3281 	 */
3282 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3283 	    found->wa_vaddr == pwa->wa_vaddr) {
3284 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3285 		    found->wa_flags);
3286 		avl_remove(&p->p_warea, found);
3287 		kmem_free(found, sizeof (struct watched_area));
3288 	}
3289 
3290 	kmem_free(pwa, sizeof (struct watched_area));
3291 
3292 	/*
3293 	 * If we removed the last watched area from the process, disable
3294 	 * watchpoints.
3295 	 */
3296 	if (!pr_watch_active(p)) {
3297 		kthread_t *t;
3298 
3299 		mutex_enter(&p->p_lock);
3300 		if ((t = p->p_tlist) != NULL) {
3301 			do {
3302 				watch_disable(t);
3303 			} while ((t = t->t_forw) != p->p_tlist);
3304 		}
3305 		mutex_exit(&p->p_lock);
3306 	}
3307 
3308 	return (0);
3309 }
3310 
3311 /*
3312  * Frees all the watched_area structures
3313  */
3314 void
3315 pr_free_watchpoints(proc_t *p)
3316 {
3317 	struct watched_area *delp;
3318 	void *cookie;
3319 
3320 	cookie = NULL;
3321 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3322 		kmem_free(delp, sizeof (struct watched_area));
3323 
3324 	avl_destroy(&p->p_warea);
3325 }
3326 
3327 /*
3328  * This one is called by the traced process to unwatch all the
3329  * pages while deallocating the list of watched_page structs.
3330  */
3331 void
3332 pr_free_watched_pages(proc_t *p)
3333 {
3334 	struct as *as = p->p_as;
3335 	struct watched_page *pwp;
3336 	uint_t prot;
3337 	int    retrycnt, err;
3338 	void *cookie;
3339 
3340 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3341 		return;
3342 
3343 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3344 	AS_LOCK_ENTER(as, RW_WRITER);
3345 
3346 	pwp = avl_first(&as->a_wpage);
3347 
3348 	cookie = NULL;
3349 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3350 		retrycnt = 0;
3351 		if ((prot = pwp->wp_oprot) != 0) {
3352 			caddr_t addr = pwp->wp_vaddr;
3353 			struct seg *seg;
3354 		retry:
3355 
3356 			if ((pwp->wp_prot != prot ||
3357 			    (pwp->wp_flags & WP_NOWATCH)) &&
3358 			    (seg = as_segat(as, addr)) != NULL) {
3359 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3360 				if (err == IE_RETRY) {
3361 					ASSERT(retrycnt == 0);
3362 					retrycnt++;
3363 					goto retry;
3364 				}
3365 			}
3366 		}
3367 		kmem_free(pwp, sizeof (struct watched_page));
3368 	}
3369 
3370 	avl_destroy(&as->a_wpage);
3371 	p->p_wprot = NULL;
3372 
3373 	AS_LOCK_EXIT(as);
3374 }
3375 
3376 /*
3377  * Insert a watched area into the list of watched pages.
3378  * If oflags is zero then we are adding a new watched area.
3379  * Otherwise we are changing the flags of an existing watched area.
3380  */
3381 static int
3382 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3383     ulong_t flags, ulong_t oflags)
3384 {
3385 	struct as *as = p->p_as;
3386 	avl_tree_t *pwp_tree;
3387 	struct watched_page *pwp, *newpwp;
3388 	struct watched_page tpw;
3389 	avl_index_t where;
3390 	struct seg *seg;
3391 	uint_t prot;
3392 	caddr_t addr;
3393 
3394 	/*
3395 	 * We need to pre-allocate a list of structures before we grab the
3396 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3397 	 * held.
3398 	 */
3399 	newpwp = NULL;
3400 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3401 	    addr < eaddr; addr += PAGESIZE) {
3402 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3403 		pwp->wp_list = newpwp;
3404 		newpwp = pwp;
3405 	}
3406 
3407 	AS_LOCK_ENTER(as, RW_WRITER);
3408 
3409 	/*
3410 	 * Search for an existing watched page to contain the watched area.
3411 	 * If none is found, grab a new one from the available list
3412 	 * and insert it in the active list, keeping the list sorted
3413 	 * by user-level virtual address.
3414 	 */
3415 	if (p->p_flag & SVFWAIT)
3416 		pwp_tree = &p->p_wpage;
3417 	else
3418 		pwp_tree = &as->a_wpage;
3419 
3420 again:
3421 	if (avl_numnodes(pwp_tree) > prnwatch) {
3422 		AS_LOCK_EXIT(as);
3423 		while (newpwp != NULL) {
3424 			pwp = newpwp->wp_list;
3425 			kmem_free(newpwp, sizeof (struct watched_page));
3426 			newpwp = pwp;
3427 		}
3428 		return (E2BIG);
3429 	}
3430 
3431 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3432 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3433 		pwp = newpwp;
3434 		newpwp = newpwp->wp_list;
3435 		pwp->wp_list = NULL;
3436 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3437 		    (uintptr_t)PAGEMASK);
3438 		avl_insert(pwp_tree, pwp, where);
3439 	}
3440 
3441 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3442 
3443 	if (oflags & WA_READ)
3444 		pwp->wp_read--;
3445 	if (oflags & WA_WRITE)
3446 		pwp->wp_write--;
3447 	if (oflags & WA_EXEC)
3448 		pwp->wp_exec--;
3449 
3450 	ASSERT(pwp->wp_read >= 0);
3451 	ASSERT(pwp->wp_write >= 0);
3452 	ASSERT(pwp->wp_exec >= 0);
3453 
3454 	if (flags & WA_READ)
3455 		pwp->wp_read++;
3456 	if (flags & WA_WRITE)
3457 		pwp->wp_write++;
3458 	if (flags & WA_EXEC)
3459 		pwp->wp_exec++;
3460 
3461 	if (!(p->p_flag & SVFWAIT)) {
3462 		vaddr = pwp->wp_vaddr;
3463 		if (pwp->wp_oprot == 0 &&
3464 		    (seg = as_segat(as, vaddr)) != NULL) {
3465 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3466 			pwp->wp_oprot = (uchar_t)prot;
3467 			pwp->wp_prot = (uchar_t)prot;
3468 		}
3469 		if (pwp->wp_oprot != 0) {
3470 			prot = pwp->wp_oprot;
3471 			if (pwp->wp_read)
3472 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3473 			if (pwp->wp_write)
3474 				prot &= ~PROT_WRITE;
3475 			if (pwp->wp_exec)
3476 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3477 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3478 			    pwp->wp_prot != prot &&
3479 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3480 				pwp->wp_flags |= WP_SETPROT;
3481 				pwp->wp_list = p->p_wprot;
3482 				p->p_wprot = pwp;
3483 			}
3484 			pwp->wp_prot = (uchar_t)prot;
3485 		}
3486 	}
3487 
3488 	/*
3489 	 * If the watched area extends into the next page then do
3490 	 * it over again with the virtual address of the next page.
3491 	 */
3492 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3493 		goto again;
3494 
3495 	AS_LOCK_EXIT(as);
3496 
3497 	/*
3498 	 * Free any pages we may have over-allocated
3499 	 */
3500 	while (newpwp != NULL) {
3501 		pwp = newpwp->wp_list;
3502 		kmem_free(newpwp, sizeof (struct watched_page));
3503 		newpwp = pwp;
3504 	}
3505 
3506 	return (0);
3507 }
3508 
3509 /*
3510  * Remove a watched area from the list of watched pages.
3511  * A watched area may extend over more than one page.
3512  */
3513 static void
3514 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3515 {
3516 	struct as *as = p->p_as;
3517 	struct watched_page *pwp;
3518 	struct watched_page tpw;
3519 	avl_tree_t *tree;
3520 	avl_index_t where;
3521 
3522 	AS_LOCK_ENTER(as, RW_WRITER);
3523 
3524 	if (p->p_flag & SVFWAIT)
3525 		tree = &p->p_wpage;
3526 	else
3527 		tree = &as->a_wpage;
3528 
3529 	tpw.wp_vaddr = vaddr =
3530 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3531 	pwp = avl_find(tree, &tpw, &where);
3532 	if (pwp == NULL)
3533 		pwp = avl_nearest(tree, where, AVL_AFTER);
3534 
3535 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3536 		ASSERT(vaddr <=  pwp->wp_vaddr);
3537 
3538 		if (flags & WA_READ)
3539 			pwp->wp_read--;
3540 		if (flags & WA_WRITE)
3541 			pwp->wp_write--;
3542 		if (flags & WA_EXEC)
3543 			pwp->wp_exec--;
3544 
3545 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3546 			/*
3547 			 * Reset the hat layer's protections on this page.
3548 			 */
3549 			if (pwp->wp_oprot != 0) {
3550 				uint_t prot = pwp->wp_oprot;
3551 
3552 				if (pwp->wp_read)
3553 					prot &=
3554 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3555 				if (pwp->wp_write)
3556 					prot &= ~PROT_WRITE;
3557 				if (pwp->wp_exec)
3558 					prot &=
3559 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3560 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3561 				    pwp->wp_prot != prot &&
3562 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3563 					pwp->wp_flags |= WP_SETPROT;
3564 					pwp->wp_list = p->p_wprot;
3565 					p->p_wprot = pwp;
3566 				}
3567 				pwp->wp_prot = (uchar_t)prot;
3568 			}
3569 		} else {
3570 			/*
3571 			 * No watched areas remain in this page.
3572 			 * Reset everything to normal.
3573 			 */
3574 			if (pwp->wp_oprot != 0) {
3575 				pwp->wp_prot = pwp->wp_oprot;
3576 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3577 					pwp->wp_flags |= WP_SETPROT;
3578 					pwp->wp_list = p->p_wprot;
3579 					p->p_wprot = pwp;
3580 				}
3581 			}
3582 		}
3583 
3584 		pwp = AVL_NEXT(tree, pwp);
3585 	}
3586 
3587 	AS_LOCK_EXIT(as);
3588 }
3589 
3590 /*
3591  * Return the original protections for the specified page.
3592  */
3593 static void
3594 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3595 {
3596 	struct watched_page *pwp;
3597 	struct watched_page tpw;
3598 
3599 	ASSERT(AS_LOCK_HELD(as));
3600 
3601 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3602 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3603 		*prot = pwp->wp_oprot;
3604 }
3605 
3606 static prpagev_t *
3607 pr_pagev_create(struct seg *seg, int check_noreserve)
3608 {
3609 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3610 	size_t total_pages = seg_pages(seg);
3611 
3612 	/*
3613 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3614 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3615 	 * to about a megabyte of kernel heap by default.
3616 	 */
3617 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3618 	pagev->pg_pnbase = 0;
3619 
3620 	pagev->pg_protv =
3621 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3622 
3623 	if (check_noreserve)
3624 		pagev->pg_incore =
3625 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3626 	else
3627 		pagev->pg_incore = NULL;
3628 
3629 	return (pagev);
3630 }
3631 
3632 static void
3633 pr_pagev_destroy(prpagev_t *pagev)
3634 {
3635 	if (pagev->pg_incore != NULL)
3636 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3637 
3638 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3639 	kmem_free(pagev, sizeof (prpagev_t));
3640 }
3641 
3642 static caddr_t
3643 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3644 {
3645 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3646 	ulong_t pn, pnlim;
3647 	caddr_t saddr;
3648 	size_t len;
3649 
3650 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3651 
3652 	if (addr == eaddr)
3653 		return (eaddr);
3654 
3655 refill:
3656 	ASSERT(addr < eaddr);
3657 	pagev->pg_pnbase = seg_page(seg, addr);
3658 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3659 	saddr = addr;
3660 
3661 	if (lastpg < pnlim)
3662 		len = (size_t)(eaddr - addr);
3663 	else
3664 		len = pagev->pg_npages * PAGESIZE;
3665 
3666 	if (pagev->pg_incore != NULL) {
3667 		/*
3668 		 * INCORE cleverly has different semantics than GETPROT:
3669 		 * it returns info on pages up to but NOT including addr + len.
3670 		 */
3671 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3672 		pn = pagev->pg_pnbase;
3673 
3674 		do {
3675 			/*
3676 			 * Guilty knowledge here:  We know that segvn_incore
3677 			 * returns more than just the low-order bit that
3678 			 * indicates the page is actually in memory.  If any
3679 			 * bits are set, then the page has backing store.
3680 			 */
3681 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3682 				goto out;
3683 
3684 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3685 
3686 		/*
3687 		 * If we examined all the pages in the vector but we're not
3688 		 * at the end of the segment, take another lap.
3689 		 */
3690 		if (addr < eaddr)
3691 			goto refill;
3692 	}
3693 
3694 	/*
3695 	 * Need to take len - 1 because addr + len is the address of the
3696 	 * first byte of the page just past the end of what we want.
3697 	 */
3698 out:
3699 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3700 	return (addr);
3701 }
3702 
3703 static caddr_t
3704 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3705     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3706 {
3707 	/*
3708 	 * Our starting address is either the specified address, or the base
3709 	 * address from the start of the pagev.  If the latter is greater,
3710 	 * this means a previous call to pr_pagev_fill has already scanned
3711 	 * further than the end of the previous mapping.
3712 	 */
3713 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3714 	caddr_t addr = MAX(*saddrp, base);
3715 	ulong_t pn = seg_page(seg, addr);
3716 	uint_t prot, nprot;
3717 
3718 	/*
3719 	 * If we're dealing with noreserve pages, then advance addr to
3720 	 * the address of the next page which has backing store.
3721 	 */
3722 	if (pagev->pg_incore != NULL) {
3723 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3724 			if ((addr += PAGESIZE) == eaddr) {
3725 				*saddrp = addr;
3726 				prot = 0;
3727 				goto out;
3728 			}
3729 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3730 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3731 				if (addr == eaddr) {
3732 					*saddrp = addr;
3733 					prot = 0;
3734 					goto out;
3735 				}
3736 				pn = seg_page(seg, addr);
3737 			}
3738 		}
3739 	}
3740 
3741 	/*
3742 	 * Get the protections on the page corresponding to addr.
3743 	 */
3744 	pn = seg_page(seg, addr);
3745 	ASSERT(pn >= pagev->pg_pnbase);
3746 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3747 
3748 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3749 	getwatchprot(seg->s_as, addr, &prot);
3750 	*saddrp = addr;
3751 
3752 	/*
3753 	 * Now loop until we find a backed page with different protections
3754 	 * or we reach the end of this segment.
3755 	 */
3756 	while ((addr += PAGESIZE) < eaddr) {
3757 		/*
3758 		 * If pn has advanced to the page number following what we
3759 		 * have information on, refill the page vector and reset
3760 		 * addr and pn.  If pr_pagev_fill does not return the
3761 		 * address of the next page, we have a discontiguity and
3762 		 * thus have reached the end of the current mapping.
3763 		 */
3764 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3765 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3766 			if (naddr != addr)
3767 				goto out;
3768 			pn = seg_page(seg, addr);
3769 		}
3770 
3771 		/*
3772 		 * The previous page's protections are in prot, and it has
3773 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3774 		 * then end this mapping and return the previous protections.
3775 		 */
3776 		if (pagev->pg_incore != NULL &&
3777 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3778 			break;
3779 
3780 		/*
3781 		 * Otherwise end the mapping if this page's protections (nprot)
3782 		 * are different than those in the previous page (prot).
3783 		 */
3784 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3785 		getwatchprot(seg->s_as, addr, &nprot);
3786 
3787 		if (nprot != prot)
3788 			break;
3789 	}
3790 
3791 out:
3792 	*protp = prot;
3793 	return (addr);
3794 }
3795 
3796 size_t
3797 pr_getsegsize(struct seg *seg, int reserved)
3798 {
3799 	size_t size = seg->s_size;
3800 
3801 	/*
3802 	 * If we're interested in the reserved space, return the size of the
3803 	 * segment itself.  Everything else in this function is a special case
3804 	 * to determine the actual underlying size of various segment types.
3805 	 */
3806 	if (reserved)
3807 		return (size);
3808 
3809 	/*
3810 	 * If this is a segvn mapping of a regular file, return the smaller
3811 	 * of the segment size and the remaining size of the file beyond
3812 	 * the file offset corresponding to seg->s_base.
3813 	 */
3814 	if (seg->s_ops == &segvn_ops) {
3815 		vattr_t vattr;
3816 		vnode_t *vp;
3817 
3818 		vattr.va_mask = AT_SIZE;
3819 
3820 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3821 		    vp != NULL && vp->v_type == VREG &&
3822 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3823 
3824 			u_offset_t fsize = vattr.va_size;
3825 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3826 
3827 			if (fsize < offset)
3828 				fsize = 0;
3829 			else
3830 				fsize -= offset;
3831 
3832 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3833 
3834 			if (fsize < (u_offset_t)size)
3835 				size = (size_t)fsize;
3836 		}
3837 
3838 		return (size);
3839 	}
3840 
3841 	/*
3842 	 * If this is an ISM shared segment, don't include pages that are
3843 	 * beyond the real size of the spt segment that backs it.
3844 	 */
3845 	if (seg->s_ops == &segspt_shmops)
3846 		return (MIN(spt_realsize(seg), size));
3847 
3848 	/*
3849 	 * If this is segment is a mapping from /dev/null, then this is a
3850 	 * reservation of virtual address space and has no actual size.
3851 	 * Such segments are backed by segdev and have type set to neither
3852 	 * MAP_SHARED nor MAP_PRIVATE.
3853 	 */
3854 	if (seg->s_ops == &segdev_ops &&
3855 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3856 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3857 		return (0);
3858 
3859 	/*
3860 	 * If this segment doesn't match one of the special types we handle,
3861 	 * just return the size of the segment itself.
3862 	 */
3863 	return (size);
3864 }
3865 
3866 uint_t
3867 pr_getprot(struct seg *seg, int reserved, void **tmp,
3868     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3869 {
3870 	struct as *as = seg->s_as;
3871 
3872 	caddr_t saddr = *saddrp;
3873 	caddr_t naddr;
3874 
3875 	int check_noreserve;
3876 	uint_t prot;
3877 
3878 	union {
3879 		struct segvn_data *svd;
3880 		struct segdev_data *sdp;
3881 		void *data;
3882 	} s;
3883 
3884 	s.data = seg->s_data;
3885 
3886 	ASSERT(AS_WRITE_HELD(as));
3887 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3888 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3889 
3890 	/*
3891 	 * Don't include MAP_NORESERVE pages in the address range
3892 	 * unless their mappings have actually materialized.
3893 	 * We cheat by knowing that segvn is the only segment
3894 	 * driver that supports MAP_NORESERVE.
3895 	 */
3896 	check_noreserve =
3897 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3898 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3899 	    (s.svd->flags & MAP_NORESERVE));
3900 
3901 	/*
3902 	 * Examine every page only as a last resort.  We use guilty knowledge
3903 	 * of segvn and segdev to avoid this: if there are no per-page
3904 	 * protections present in the segment and we don't care about
3905 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3906 	 */
3907 	if (!check_noreserve && saddr == seg->s_base &&
3908 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3909 		prot = s.svd->prot;
3910 		getwatchprot(as, saddr, &prot);
3911 		naddr = eaddr;
3912 
3913 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3914 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3915 		prot = s.sdp->prot;
3916 		getwatchprot(as, saddr, &prot);
3917 		naddr = eaddr;
3918 
3919 	} else {
3920 		prpagev_t *pagev;
3921 
3922 		/*
3923 		 * If addr is sitting at the start of the segment, then
3924 		 * create a page vector to store protection and incore
3925 		 * information for pages in the segment, and fill it.
3926 		 * Otherwise, we expect *tmp to address the prpagev_t
3927 		 * allocated by a previous call to this function.
3928 		 */
3929 		if (saddr == seg->s_base) {
3930 			pagev = pr_pagev_create(seg, check_noreserve);
3931 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3932 
3933 			ASSERT(*tmp == NULL);
3934 			*tmp = pagev;
3935 
3936 			ASSERT(saddr <= eaddr);
3937 			*saddrp = saddr;
3938 
3939 			if (saddr == eaddr) {
3940 				naddr = saddr;
3941 				prot = 0;
3942 				goto out;
3943 			}
3944 
3945 		} else {
3946 			ASSERT(*tmp != NULL);
3947 			pagev = (prpagev_t *)*tmp;
3948 		}
3949 
3950 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3951 		ASSERT(naddr <= eaddr);
3952 	}
3953 
3954 out:
3955 	if (naddr == eaddr)
3956 		pr_getprot_done(tmp);
3957 	*naddrp = naddr;
3958 	return (prot);
3959 }
3960 
3961 void
3962 pr_getprot_done(void **tmp)
3963 {
3964 	if (*tmp != NULL) {
3965 		pr_pagev_destroy((prpagev_t *)*tmp);
3966 		*tmp = NULL;
3967 	}
3968 }
3969 
3970 /*
3971  * Return true iff the vnode is a /proc file from the object directory.
3972  */
3973 int
3974 pr_isobject(vnode_t *vp)
3975 {
3976 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3977 }
3978 
3979 /*
3980  * Return true iff the vnode is a /proc file opened by the process itself.
3981  */
3982 int
3983 pr_isself(vnode_t *vp)
3984 {
3985 	/*
3986 	 * XXX: To retain binary compatibility with the old
3987 	 * ioctl()-based version of /proc, we exempt self-opens
3988 	 * of /proc/<pid> from being marked close-on-exec.
3989 	 */
3990 	return (vn_matchops(vp, prvnodeops) &&
3991 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3992 	    VTOP(vp)->pr_type != PR_PIDDIR);
3993 }
3994 
3995 static ssize_t
3996 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3997 {
3998 	ssize_t pagesize, hatsize;
3999 
4000 	ASSERT(AS_WRITE_HELD(seg->s_as));
4001 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4002 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4003 	ASSERT(saddr < eaddr);
4004 
4005 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4006 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4007 	ASSERT(pagesize != 0);
4008 
4009 	if (pagesize == -1)
4010 		pagesize = PAGESIZE;
4011 
4012 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4013 
4014 	while (saddr < eaddr) {
4015 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4016 			break;
4017 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4018 		saddr += pagesize;
4019 	}
4020 
4021 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4022 	return (hatsize);
4023 }
4024 
4025 /*
4026  * Return an array of structures with extended memory map information.
4027  * We allocate here; the caller must deallocate.
4028  */
4029 int
4030 prgetxmap(proc_t *p, list_t *iolhead)
4031 {
4032 	struct as *as = p->p_as;
4033 	prxmap_t *mp;
4034 	struct seg *seg;
4035 	struct seg *brkseg, *stkseg;
4036 	struct vnode *vp;
4037 	struct vattr vattr;
4038 	uint_t prot;
4039 
4040 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4041 
4042 	/*
4043 	 * Request an initial buffer size that doesn't waste memory
4044 	 * if the address space has only a small number of segments.
4045 	 */
4046 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4047 
4048 	if ((seg = AS_SEGFIRST(as)) == NULL)
4049 		return (0);
4050 
4051 	brkseg = break_seg(p);
4052 	stkseg = as_segat(as, prgetstackbase(p));
4053 
4054 	do {
4055 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4056 		caddr_t saddr, naddr, baddr;
4057 		void *tmp = NULL;
4058 		ssize_t psz;
4059 		char *parr;
4060 		uint64_t npages;
4061 		uint64_t pagenum;
4062 
4063 		if ((seg->s_flags & S_HOLE) != 0) {
4064 			continue;
4065 		}
4066 		/*
4067 		 * Segment loop part one: iterate from the base of the segment
4068 		 * to its end, pausing at each address boundary (baddr) between
4069 		 * ranges that have different virtual memory protections.
4070 		 */
4071 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4072 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4073 			ASSERT(baddr >= saddr && baddr <= eaddr);
4074 
4075 			/*
4076 			 * Segment loop part two: iterate from the current
4077 			 * position to the end of the protection boundary,
4078 			 * pausing at each address boundary (naddr) between
4079 			 * ranges that have different underlying page sizes.
4080 			 */
4081 			for (; saddr < baddr; saddr = naddr) {
4082 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4083 				ASSERT(naddr >= saddr && naddr <= baddr);
4084 
4085 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4086 
4087 				mp->pr_vaddr = (uintptr_t)saddr;
4088 				mp->pr_size = naddr - saddr;
4089 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4090 				mp->pr_mflags = 0;
4091 				if (prot & PROT_READ)
4092 					mp->pr_mflags |= MA_READ;
4093 				if (prot & PROT_WRITE)
4094 					mp->pr_mflags |= MA_WRITE;
4095 				if (prot & PROT_EXEC)
4096 					mp->pr_mflags |= MA_EXEC;
4097 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4098 					mp->pr_mflags |= MA_SHARED;
4099 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4100 					mp->pr_mflags |= MA_NORESERVE;
4101 				if (seg->s_ops == &segspt_shmops ||
4102 				    (seg->s_ops == &segvn_ops &&
4103 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4104 				    vp == NULL)))
4105 					mp->pr_mflags |= MA_ANON;
4106 				if (seg == brkseg)
4107 					mp->pr_mflags |= MA_BREAK;
4108 				else if (seg == stkseg)
4109 					mp->pr_mflags |= MA_STACK;
4110 				if (seg->s_ops == &segspt_shmops)
4111 					mp->pr_mflags |= MA_ISM | MA_SHM;
4112 
4113 				mp->pr_pagesize = PAGESIZE;
4114 				if (psz == -1) {
4115 					mp->pr_hatpagesize = 0;
4116 				} else {
4117 					mp->pr_hatpagesize = psz;
4118 				}
4119 
4120 				/*
4121 				 * Manufacture a filename for the "object" dir.
4122 				 */
4123 				mp->pr_dev = PRNODEV;
4124 				vattr.va_mask = AT_FSID|AT_NODEID;
4125 				if (seg->s_ops == &segvn_ops &&
4126 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4127 				    vp != NULL && vp->v_type == VREG &&
4128 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4129 				    NULL) == 0) {
4130 					mp->pr_dev = vattr.va_fsid;
4131 					mp->pr_ino = vattr.va_nodeid;
4132 					if (vp == p->p_exec)
4133 						(void) strcpy(mp->pr_mapname,
4134 						    "a.out");
4135 					else
4136 						pr_object_name(mp->pr_mapname,
4137 						    vp, &vattr);
4138 				}
4139 
4140 				/*
4141 				 * Get the SysV shared memory id, if any.
4142 				 */
4143 				if ((mp->pr_mflags & MA_SHARED) &&
4144 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4145 				    seg->s_base)) != SHMID_NONE) {
4146 					if (mp->pr_shmid == SHMID_FREE)
4147 						mp->pr_shmid = -1;
4148 
4149 					mp->pr_mflags |= MA_SHM;
4150 				} else {
4151 					mp->pr_shmid = -1;
4152 				}
4153 
4154 				npages = ((uintptr_t)(naddr - saddr)) >>
4155 				    PAGESHIFT;
4156 				parr = kmem_zalloc(npages, KM_SLEEP);
4157 
4158 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4159 
4160 				for (pagenum = 0; pagenum < npages; pagenum++) {
4161 					if (parr[pagenum] & SEG_PAGE_INCORE)
4162 						mp->pr_rss++;
4163 					if (parr[pagenum] & SEG_PAGE_ANON)
4164 						mp->pr_anon++;
4165 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4166 						mp->pr_locked++;
4167 				}
4168 				kmem_free(parr, npages);
4169 			}
4170 		}
4171 		ASSERT(tmp == NULL);
4172 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4173 
4174 	return (0);
4175 }
4176 
4177 /*
4178  * Return the process's credentials.  We don't need a 32-bit equivalent of
4179  * this function because prcred_t and prcred32_t are actually the same.
4180  */
4181 void
4182 prgetcred(proc_t *p, prcred_t *pcrp)
4183 {
4184 	mutex_enter(&p->p_crlock);
4185 	cred2prcred(p->p_cred, pcrp);
4186 	mutex_exit(&p->p_crlock);
4187 }
4188 
4189 void
4190 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4191 {
4192 	ASSERT(psfp != NULL);
4193 
4194 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4195 	psfp->pr_lower = p->p_secflags.psf_lower;
4196 	psfp->pr_upper = p->p_secflags.psf_upper;
4197 	psfp->pr_effective = p->p_secflags.psf_effective;
4198 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4199 }
4200 
4201 /*
4202  * Compute actual size of the prpriv_t structure.
4203  */
4204 
4205 size_t
4206 prgetprivsize(void)
4207 {
4208 	return (priv_prgetprivsize(NULL));
4209 }
4210 
4211 /*
4212  * Return the process's privileges.  We don't need a 32-bit equivalent of
4213  * this function because prpriv_t and prpriv32_t are actually the same.
4214  */
4215 void
4216 prgetpriv(proc_t *p, prpriv_t *pprp)
4217 {
4218 	mutex_enter(&p->p_crlock);
4219 	cred2prpriv(p->p_cred, pprp);
4220 	mutex_exit(&p->p_crlock);
4221 }
4222 
4223 #ifdef _SYSCALL32_IMPL
4224 /*
4225  * Return an array of structures with HAT memory map information.
4226  * We allocate here; the caller must deallocate.
4227  */
4228 int
4229 prgetxmap32(proc_t *p, list_t *iolhead)
4230 {
4231 	struct as *as = p->p_as;
4232 	prxmap32_t *mp;
4233 	struct seg *seg;
4234 	struct seg *brkseg, *stkseg;
4235 	struct vnode *vp;
4236 	struct vattr vattr;
4237 	uint_t prot;
4238 
4239 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4240 
4241 	/*
4242 	 * Request an initial buffer size that doesn't waste memory
4243 	 * if the address space has only a small number of segments.
4244 	 */
4245 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4246 
4247 	if ((seg = AS_SEGFIRST(as)) == NULL)
4248 		return (0);
4249 
4250 	brkseg = break_seg(p);
4251 	stkseg = as_segat(as, prgetstackbase(p));
4252 
4253 	do {
4254 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4255 		caddr_t saddr, naddr, baddr;
4256 		void *tmp = NULL;
4257 		ssize_t psz;
4258 		char *parr;
4259 		uint64_t npages;
4260 		uint64_t pagenum;
4261 
4262 		if ((seg->s_flags & S_HOLE) != 0) {
4263 			continue;
4264 		}
4265 
4266 		/*
4267 		 * Segment loop part one: iterate from the base of the segment
4268 		 * to its end, pausing at each address boundary (baddr) between
4269 		 * ranges that have different virtual memory protections.
4270 		 */
4271 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4272 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4273 			ASSERT(baddr >= saddr && baddr <= eaddr);
4274 
4275 			/*
4276 			 * Segment loop part two: iterate from the current
4277 			 * position to the end of the protection boundary,
4278 			 * pausing at each address boundary (naddr) between
4279 			 * ranges that have different underlying page sizes.
4280 			 */
4281 			for (; saddr < baddr; saddr = naddr) {
4282 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4283 				ASSERT(naddr >= saddr && naddr <= baddr);
4284 
4285 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4286 
4287 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4288 				mp->pr_size = (size32_t)(naddr - saddr);
4289 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4290 				mp->pr_mflags = 0;
4291 				if (prot & PROT_READ)
4292 					mp->pr_mflags |= MA_READ;
4293 				if (prot & PROT_WRITE)
4294 					mp->pr_mflags |= MA_WRITE;
4295 				if (prot & PROT_EXEC)
4296 					mp->pr_mflags |= MA_EXEC;
4297 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4298 					mp->pr_mflags |= MA_SHARED;
4299 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4300 					mp->pr_mflags |= MA_NORESERVE;
4301 				if (seg->s_ops == &segspt_shmops ||
4302 				    (seg->s_ops == &segvn_ops &&
4303 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4304 				    vp == NULL)))
4305 					mp->pr_mflags |= MA_ANON;
4306 				if (seg == brkseg)
4307 					mp->pr_mflags |= MA_BREAK;
4308 				else if (seg == stkseg)
4309 					mp->pr_mflags |= MA_STACK;
4310 				if (seg->s_ops == &segspt_shmops)
4311 					mp->pr_mflags |= MA_ISM | MA_SHM;
4312 
4313 				mp->pr_pagesize = PAGESIZE;
4314 				if (psz == -1) {
4315 					mp->pr_hatpagesize = 0;
4316 				} else {
4317 					mp->pr_hatpagesize = psz;
4318 				}
4319 
4320 				/*
4321 				 * Manufacture a filename for the "object" dir.
4322 				 */
4323 				mp->pr_dev = PRNODEV32;
4324 				vattr.va_mask = AT_FSID|AT_NODEID;
4325 				if (seg->s_ops == &segvn_ops &&
4326 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4327 				    vp != NULL && vp->v_type == VREG &&
4328 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4329 				    NULL) == 0) {
4330 					(void) cmpldev(&mp->pr_dev,
4331 					    vattr.va_fsid);
4332 					mp->pr_ino = vattr.va_nodeid;
4333 					if (vp == p->p_exec)
4334 						(void) strcpy(mp->pr_mapname,
4335 						    "a.out");
4336 					else
4337 						pr_object_name(mp->pr_mapname,
4338 						    vp, &vattr);
4339 				}
4340 
4341 				/*
4342 				 * Get the SysV shared memory id, if any.
4343 				 */
4344 				if ((mp->pr_mflags & MA_SHARED) &&
4345 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4346 				    seg->s_base)) != SHMID_NONE) {
4347 					if (mp->pr_shmid == SHMID_FREE)
4348 						mp->pr_shmid = -1;
4349 
4350 					mp->pr_mflags |= MA_SHM;
4351 				} else {
4352 					mp->pr_shmid = -1;
4353 				}
4354 
4355 				npages = ((uintptr_t)(naddr - saddr)) >>
4356 				    PAGESHIFT;
4357 				parr = kmem_zalloc(npages, KM_SLEEP);
4358 
4359 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4360 
4361 				for (pagenum = 0; pagenum < npages; pagenum++) {
4362 					if (parr[pagenum] & SEG_PAGE_INCORE)
4363 						mp->pr_rss++;
4364 					if (parr[pagenum] & SEG_PAGE_ANON)
4365 						mp->pr_anon++;
4366 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4367 						mp->pr_locked++;
4368 				}
4369 				kmem_free(parr, npages);
4370 			}
4371 		}
4372 		ASSERT(tmp == NULL);
4373 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4374 
4375 	return (0);
4376 }
4377 #endif	/* _SYSCALL32_IMPL */
4378