xref: /titanic_41/usr/src/uts/common/fs/proc/prsubr.c (revision 6185db853e024a486ff8837e6784dd290d866112)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/cmn_err.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/inline.h>
42 #include <sys/kmem.h>
43 #include <sys/mman.h>
44 #include <sys/proc.h>
45 #include <sys/sobject.h>
46 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/uio.h>
49 #include <sys/var.h>
50 #include <sys/vfs.h>
51 #include <sys/vnode.h>
52 #include <sys/session.h>
53 #include <sys/pcb.h>
54 #include <sys/signal.h>
55 #include <sys/user.h>
56 #include <sys/disp.h>
57 #include <sys/class.h>
58 #include <sys/ts.h>
59 #include <sys/bitmap.h>
60 #include <sys/poll.h>
61 #include <sys/shm_impl.h>
62 #include <sys/fault.h>
63 #include <sys/syscall.h>
64 #include <sys/procfs.h>
65 #include <sys/processor.h>
66 #include <sys/cpuvar.h>
67 #include <sys/copyops.h>
68 #include <sys/time.h>
69 #include <sys/msacct.h>
70 #include <vm/as.h>
71 #include <vm/rm.h>
72 #include <vm/seg.h>
73 #include <vm/seg_vn.h>
74 #include <vm/seg_dev.h>
75 #include <vm/seg_spt.h>
76 #include <vm/page.h>
77 #include <sys/vmparam.h>
78 #include <sys/swap.h>
79 #include <fs/proc/prdata.h>
80 #include <sys/task.h>
81 #include <sys/project.h>
82 #include <sys/contract_impl.h>
83 #include <sys/contract/process.h>
84 #include <sys/contract/process_impl.h>
85 #include <sys/schedctl.h>
86 #include <sys/pool.h>
87 #include <sys/zone.h>
88 #include <sys/atomic.h>
89 #include <sys/sdt.h>
90 
91 #define	MAX_ITERS_SPIN	5
92 
93 typedef struct prpagev {
94 	uint_t *pg_protv;	/* vector of page permissions */
95 	char *pg_incore;	/* vector of incore flags */
96 	size_t pg_npages;	/* number of pages in protv and incore */
97 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
98 } prpagev_t;
99 
100 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
101 
102 extern struct seg_ops segdev_ops;	/* needs a header file */
103 extern struct seg_ops segspt_shmops;	/* needs a header file */
104 
105 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
106 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
107 
108 /*
109  * Choose an lwp from the complete set of lwps for the process.
110  * This is called for any operation applied to the process
111  * file descriptor that requires an lwp to operate upon.
112  *
113  * Returns a pointer to the thread for the selected LWP,
114  * and with the dispatcher lock held for the thread.
115  *
116  * The algorithm for choosing an lwp is critical for /proc semantics;
117  * don't touch this code unless you know all of the implications.
118  */
119 kthread_t *
120 prchoose(proc_t *p)
121 {
122 	kthread_t *t;
123 	kthread_t *t_onproc = NULL;	/* running on processor */
124 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
125 	kthread_t *t_sleep = NULL;	/* sleeping */
126 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
127 	kthread_t *t_susp = NULL;	/* suspended stop */
128 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
129 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
130 	kthread_t *t_req = NULL;	/* requested stop */
131 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
132 
133 	ASSERT(MUTEX_HELD(&p->p_lock));
134 
135 	/*
136 	 * If the agent lwp exists, it takes precedence over all others.
137 	 */
138 	if ((t = p->p_agenttp) != NULL) {
139 		thread_lock(t);
140 		return (t);
141 	}
142 
143 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
144 		return (t);
145 	do {		/* for eacn lwp in the process */
146 		if (VSTOPPED(t)) {	/* virtually stopped */
147 			if (t_req == NULL)
148 				t_req = t;
149 			continue;
150 		}
151 
152 		thread_lock(t);		/* make sure thread is in good state */
153 		switch (t->t_state) {
154 		default:
155 			panic("prchoose: bad thread state %d, thread 0x%p",
156 			    t->t_state, (void *)t);
157 			/*NOTREACHED*/
158 		case TS_SLEEP:
159 			/* this is filthy */
160 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
161 			    t->t_wchan0 == NULL) {
162 				if (t_hold == NULL)
163 					t_hold = t;
164 			} else {
165 				if (t_sleep == NULL)
166 					t_sleep = t;
167 			}
168 			break;
169 		case TS_RUN:
170 			if (t_run == NULL)
171 				t_run = t;
172 			break;
173 		case TS_ONPROC:
174 			if (t_onproc == NULL)
175 				t_onproc = t;
176 			break;
177 		case TS_ZOMB:		/* last possible choice */
178 			break;
179 		case TS_STOPPED:
180 			switch (t->t_whystop) {
181 			case PR_SUSPENDED:
182 				if (t_susp == NULL)
183 					t_susp = t;
184 				break;
185 			case PR_JOBCONTROL:
186 				if (t->t_proc_flag & TP_PRSTOP) {
187 					if (t_jdstop == NULL)
188 						t_jdstop = t;
189 				} else {
190 					if (t_jstop == NULL)
191 						t_jstop = t;
192 				}
193 				break;
194 			case PR_REQUESTED:
195 				if (t_req == NULL)
196 					t_req = t;
197 				break;
198 			case PR_SYSENTRY:
199 			case PR_SYSEXIT:
200 			case PR_SIGNALLED:
201 			case PR_FAULTED:
202 				/*
203 				 * Make an lwp calling exit() be the
204 				 * last lwp seen in the process.
205 				 */
206 				if (t_istop == NULL ||
207 				    (t_istop->t_whystop == PR_SYSENTRY &&
208 				    t_istop->t_whatstop == SYS_exit))
209 					t_istop = t;
210 				break;
211 			case PR_CHECKPOINT:	/* can't happen? */
212 				break;
213 			default:
214 				panic("prchoose: bad t_whystop %d, thread 0x%p",
215 				    t->t_whystop, (void *)t);
216 				/*NOTREACHED*/
217 			}
218 			break;
219 		}
220 		thread_unlock(t);
221 	} while ((t = t->t_forw) != p->p_tlist);
222 
223 	if (t_onproc)
224 		t = t_onproc;
225 	else if (t_run)
226 		t = t_run;
227 	else if (t_sleep)
228 		t = t_sleep;
229 	else if (t_jstop)
230 		t = t_jstop;
231 	else if (t_jdstop)
232 		t = t_jdstop;
233 	else if (t_istop)
234 		t = t_istop;
235 	else if (t_req)
236 		t = t_req;
237 	else if (t_hold)
238 		t = t_hold;
239 	else if (t_susp)
240 		t = t_susp;
241 	else			/* TS_ZOMB */
242 		t = p->p_tlist;
243 
244 	if (t != NULL)
245 		thread_lock(t);
246 	return (t);
247 }
248 
249 /*
250  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
251  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
252  * on the /proc file descriptor.  Called from stop() when a traced
253  * process stops on an event of interest.  Also called from exit()
254  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
255  */
256 void
257 prnotify(struct vnode *vp)
258 {
259 	prcommon_t *pcp = VTOP(vp)->pr_common;
260 
261 	mutex_enter(&pcp->prc_mutex);
262 	cv_broadcast(&pcp->prc_wait);
263 	mutex_exit(&pcp->prc_mutex);
264 	if (pcp->prc_flags & PRC_POLL) {
265 		/*
266 		 * We call pollwakeup() with POLLHUP to ensure that
267 		 * the pollers are awakened even if they are polling
268 		 * for nothing (i.e., waiting for the process to exit).
269 		 * This enables the use of the PRC_POLL flag for optimization
270 		 * (we can turn off PRC_POLL only if we know no pollers remain).
271 		 */
272 		pcp->prc_flags &= ~PRC_POLL;
273 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
274 	}
275 }
276 
277 /* called immediately below, in prfree() */
278 static void
279 prfreenotify(vnode_t *vp)
280 {
281 	prnode_t *pnp;
282 	prcommon_t *pcp;
283 
284 	while (vp != NULL) {
285 		pnp = VTOP(vp);
286 		pcp = pnp->pr_common;
287 		ASSERT(pcp->prc_thread == NULL);
288 		pcp->prc_proc = NULL;
289 		/*
290 		 * We can't call prnotify() here because we are holding
291 		 * pidlock.  We assert that there is no need to.
292 		 */
293 		mutex_enter(&pcp->prc_mutex);
294 		cv_broadcast(&pcp->prc_wait);
295 		mutex_exit(&pcp->prc_mutex);
296 		ASSERT(!(pcp->prc_flags & PRC_POLL));
297 
298 		vp = pnp->pr_next;
299 		pnp->pr_next = NULL;
300 	}
301 }
302 
303 /*
304  * Called from a hook in freeproc() when a traced process is removed
305  * from the process table.  The proc-table pointers of all associated
306  * /proc vnodes are cleared to indicate that the process has gone away.
307  */
308 void
309 prfree(proc_t *p)
310 {
311 	uint_t slot = p->p_slot;
312 
313 	ASSERT(MUTEX_HELD(&pidlock));
314 
315 	/*
316 	 * Block the process against /proc so it can be freed.
317 	 * It cannot be freed while locked by some controlling process.
318 	 * Lock ordering:
319 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
320 	 */
321 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
322 	mutex_enter(&p->p_lock);
323 	while (p->p_proc_flag & P_PR_LOCK) {
324 		mutex_exit(&pr_pidlock);
325 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
326 		mutex_exit(&p->p_lock);
327 		mutex_enter(&pr_pidlock);
328 		mutex_enter(&p->p_lock);
329 	}
330 
331 	ASSERT(p->p_tlist == NULL);
332 
333 	prfreenotify(p->p_plist);
334 	p->p_plist = NULL;
335 
336 	prfreenotify(p->p_trace);
337 	p->p_trace = NULL;
338 
339 	/*
340 	 * We broadcast to wake up everyone waiting for this process.
341 	 * No one can reach this process from this point on.
342 	 */
343 	cv_broadcast(&pr_pid_cv[slot]);
344 
345 	mutex_exit(&p->p_lock);
346 	mutex_exit(&pr_pidlock);
347 }
348 
349 /*
350  * Called from a hook in exit() when a traced process is becoming a zombie.
351  */
352 void
353 prexit(proc_t *p)
354 {
355 	ASSERT(MUTEX_HELD(&p->p_lock));
356 
357 	if (pr_watch_active(p)) {
358 		pr_free_watchpoints(p);
359 		watch_disable(curthread);
360 	}
361 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
362 	if (p->p_trace) {
363 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
364 		prnotify(p->p_trace);
365 	}
366 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
367 }
368 
369 /*
370  * Called when a thread calls lwp_exit().
371  */
372 void
373 prlwpexit(kthread_t *t)
374 {
375 	vnode_t *vp;
376 	prnode_t *pnp;
377 	prcommon_t *pcp;
378 	proc_t *p = ttoproc(t);
379 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
380 
381 	ASSERT(t == curthread);
382 	ASSERT(MUTEX_HELD(&p->p_lock));
383 
384 	/*
385 	 * The process must be blocked against /proc to do this safely.
386 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
387 	 * It is the caller's responsibility to have called prbarrier(p).
388 	 */
389 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
390 
391 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
392 		pnp = VTOP(vp);
393 		pcp = pnp->pr_common;
394 		if (pcp->prc_thread == t) {
395 			pcp->prc_thread = NULL;
396 			pcp->prc_flags |= PRC_DESTROY;
397 		}
398 	}
399 
400 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
401 		pnp = VTOP(vp);
402 		pcp = pnp->pr_common;
403 		pcp->prc_thread = NULL;
404 		pcp->prc_flags |= PRC_DESTROY;
405 		prnotify(vp);
406 	}
407 
408 	if (p->p_trace)
409 		prnotify(p->p_trace);
410 }
411 
412 /*
413  * Called when a zombie thread is joined or when a
414  * detached lwp exits.  Called from lwp_hash_out().
415  */
416 void
417 prlwpfree(proc_t *p, lwpent_t *lep)
418 {
419 	vnode_t *vp;
420 	prnode_t *pnp;
421 	prcommon_t *pcp;
422 
423 	ASSERT(MUTEX_HELD(&p->p_lock));
424 
425 	/*
426 	 * The process must be blocked against /proc to do this safely.
427 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
428 	 * It is the caller's responsibility to have called prbarrier(p).
429 	 */
430 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
431 
432 	vp = lep->le_trace;
433 	lep->le_trace = NULL;
434 	while (vp) {
435 		prnotify(vp);
436 		pnp = VTOP(vp);
437 		pcp = pnp->pr_common;
438 		ASSERT(pcp->prc_thread == NULL &&
439 		    (pcp->prc_flags & PRC_DESTROY));
440 		pcp->prc_tslot = -1;
441 		vp = pnp->pr_next;
442 		pnp->pr_next = NULL;
443 	}
444 
445 	if (p->p_trace)
446 		prnotify(p->p_trace);
447 }
448 
449 /*
450  * Called from a hook in exec() when a thread starts exec().
451  */
452 void
453 prexecstart(void)
454 {
455 	proc_t *p = ttoproc(curthread);
456 	klwp_t *lwp = ttolwp(curthread);
457 
458 	/*
459 	 * The P_PR_EXEC flag blocks /proc operations for
460 	 * the duration of the exec().
461 	 * We can't start exec() while the process is
462 	 * locked by /proc, so we call prbarrier().
463 	 * lwp_nostop keeps the process from being stopped
464 	 * via job control for the duration of the exec().
465 	 */
466 
467 	ASSERT(MUTEX_HELD(&p->p_lock));
468 	prbarrier(p);
469 	lwp->lwp_nostop++;
470 	p->p_proc_flag |= P_PR_EXEC;
471 }
472 
473 /*
474  * Called from a hook in exec() when a thread finishes exec().
475  * The thread may or may not have succeeded.  Some other thread
476  * may have beat it to the punch.
477  */
478 void
479 prexecend(void)
480 {
481 	proc_t *p = ttoproc(curthread);
482 	klwp_t *lwp = ttolwp(curthread);
483 	vnode_t *vp;
484 	prnode_t *pnp;
485 	prcommon_t *pcp;
486 	model_t model = p->p_model;
487 	id_t tid = curthread->t_tid;
488 	int tslot = curthread->t_dslot;
489 
490 	ASSERT(MUTEX_HELD(&p->p_lock));
491 
492 	lwp->lwp_nostop--;
493 	if (p->p_flag & SEXITLWPS) {
494 		/*
495 		 * We are on our way to exiting because some
496 		 * other thread beat us in the race to exec().
497 		 * Don't clear the P_PR_EXEC flag in this case.
498 		 */
499 		return;
500 	}
501 
502 	/*
503 	 * Wake up anyone waiting in /proc for the process to complete exec().
504 	 */
505 	p->p_proc_flag &= ~P_PR_EXEC;
506 	if ((vp = p->p_trace) != NULL) {
507 		pcp = VTOP(vp)->pr_common;
508 		mutex_enter(&pcp->prc_mutex);
509 		cv_broadcast(&pcp->prc_wait);
510 		mutex_exit(&pcp->prc_mutex);
511 		for (; vp != NULL; vp = pnp->pr_next) {
512 			pnp = VTOP(vp);
513 			pnp->pr_common->prc_datamodel = model;
514 		}
515 	}
516 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
517 		/*
518 		 * We dealt with the process common above.
519 		 */
520 		ASSERT(p->p_trace != NULL);
521 		pcp = VTOP(vp)->pr_common;
522 		mutex_enter(&pcp->prc_mutex);
523 		cv_broadcast(&pcp->prc_wait);
524 		mutex_exit(&pcp->prc_mutex);
525 		for (; vp != NULL; vp = pnp->pr_next) {
526 			pnp = VTOP(vp);
527 			pcp = pnp->pr_common;
528 			pcp->prc_datamodel = model;
529 			pcp->prc_tid = tid;
530 			pcp->prc_tslot = tslot;
531 		}
532 	}
533 }
534 
535 /*
536  * Called from a hook in relvm() just before freeing the address space.
537  * We free all the watched areas now.
538  */
539 void
540 prrelvm(void)
541 {
542 	proc_t *p = ttoproc(curthread);
543 
544 	mutex_enter(&p->p_lock);
545 	prbarrier(p);	/* block all other /proc operations */
546 	if (pr_watch_active(p)) {
547 		pr_free_watchpoints(p);
548 		watch_disable(curthread);
549 	}
550 	mutex_exit(&p->p_lock);
551 	pr_free_watched_pages(p);
552 }
553 
554 /*
555  * Called from hooks in exec-related code when a traced process
556  * attempts to exec(2) a setuid/setgid program or an unreadable
557  * file.  Rather than fail the exec we invalidate the associated
558  * /proc vnodes so that subsequent attempts to use them will fail.
559  *
560  * All /proc vnodes, except directory vnodes, are retained on a linked
561  * list (rooted at p_plist in the process structure) until last close.
562  *
563  * A controlling process must re-open the /proc files in order to
564  * regain control.
565  */
566 void
567 prinvalidate(struct user *up)
568 {
569 	kthread_t *t = curthread;
570 	proc_t *p = ttoproc(t);
571 	vnode_t *vp;
572 	prnode_t *pnp;
573 	int writers = 0;
574 
575 	mutex_enter(&p->p_lock);
576 	prbarrier(p);	/* block all other /proc operations */
577 
578 	/*
579 	 * At this moment, there can be only one lwp in the process.
580 	 */
581 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
582 
583 	/*
584 	 * Invalidate any currently active /proc vnodes.
585 	 */
586 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
587 		pnp = VTOP(vp);
588 		switch (pnp->pr_type) {
589 		case PR_PSINFO:		/* these files can read by anyone */
590 		case PR_LPSINFO:
591 		case PR_LWPSINFO:
592 		case PR_LWPDIR:
593 		case PR_LWPIDDIR:
594 		case PR_USAGE:
595 		case PR_LUSAGE:
596 		case PR_LWPUSAGE:
597 			break;
598 		default:
599 			pnp->pr_flags |= PR_INVAL;
600 			break;
601 		}
602 	}
603 	/*
604 	 * Wake up anyone waiting for the process or lwp.
605 	 * p->p_trace is guaranteed to be non-NULL if there
606 	 * are any open /proc files for this process.
607 	 */
608 	if ((vp = p->p_trace) != NULL) {
609 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
610 
611 		prnotify(vp);
612 		/*
613 		 * Are there any writers?
614 		 */
615 		if ((writers = pcp->prc_writers) != 0) {
616 			/*
617 			 * Clear the exclusive open flag (old /proc interface).
618 			 * Set prc_selfopens equal to prc_writers so that
619 			 * the next O_EXCL|O_WRITE open will succeed
620 			 * even with existing (though invalid) writers.
621 			 * prclose() must decrement prc_selfopens when
622 			 * the invalid files are closed.
623 			 */
624 			pcp->prc_flags &= ~PRC_EXCL;
625 			ASSERT(pcp->prc_selfopens <= writers);
626 			pcp->prc_selfopens = writers;
627 		}
628 	}
629 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
630 	while (vp != NULL) {
631 		/*
632 		 * We should not invalidate the lwpiddir vnodes,
633 		 * but the necessities of maintaining the old
634 		 * ioctl()-based version of /proc require it.
635 		 */
636 		pnp = VTOP(vp);
637 		pnp->pr_flags |= PR_INVAL;
638 		prnotify(vp);
639 		vp = pnp->pr_next;
640 	}
641 
642 	/*
643 	 * If any tracing flags are in effect and any vnodes are open for
644 	 * writing then set the requested-stop and run-on-last-close flags.
645 	 * Otherwise, clear all tracing flags.
646 	 */
647 	t->t_proc_flag &= ~TP_PAUSE;
648 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
649 		t->t_proc_flag |= TP_PRSTOP;
650 		aston(t);		/* so ISSIG will see the flag */
651 		p->p_proc_flag |= P_PR_RUNLCL;
652 	} else {
653 		premptyset(&up->u_entrymask);		/* syscalls */
654 		premptyset(&up->u_exitmask);
655 		up->u_systrap = 0;
656 		premptyset(&p->p_sigmask);		/* signals */
657 		premptyset(&p->p_fltmask);		/* faults */
658 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
659 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
660 		prnostep(ttolwp(t));
661 	}
662 
663 	mutex_exit(&p->p_lock);
664 }
665 
666 /*
667  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
668  * Return with pr_pidlock held in all cases.
669  * Return with p_lock held if the the process still exists.
670  * Return value is the process pointer if the process still exists, else NULL.
671  * If we lock the process, give ourself kernel priority to avoid deadlocks;
672  * this is undone in prunlock().
673  */
674 proc_t *
675 pr_p_lock(prnode_t *pnp)
676 {
677 	proc_t *p;
678 	prcommon_t *pcp;
679 
680 	mutex_enter(&pr_pidlock);
681 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
682 		return (NULL);
683 	mutex_enter(&p->p_lock);
684 	while (p->p_proc_flag & P_PR_LOCK) {
685 		/*
686 		 * This cv/mutex pair is persistent even if
687 		 * the process disappears while we sleep.
688 		 */
689 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
690 		kmutex_t *mp = &p->p_lock;
691 
692 		mutex_exit(&pr_pidlock);
693 		cv_wait(cv, mp);
694 		mutex_exit(mp);
695 		mutex_enter(&pr_pidlock);
696 		if (pcp->prc_proc == NULL)
697 			return (NULL);
698 		ASSERT(p == pcp->prc_proc);
699 		mutex_enter(&p->p_lock);
700 	}
701 	p->p_proc_flag |= P_PR_LOCK;
702 	THREAD_KPRI_REQUEST();
703 	return (p);
704 }
705 
706 /*
707  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
708  * This prevents any lwp of the process from disappearing and
709  * blocks most operations that a process can perform on itself.
710  * Returns 0 on success, a non-zero error number on failure.
711  *
712  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
713  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
714  *
715  * error returns:
716  *	ENOENT: process or lwp has disappeared or process is exiting
717  *		(or has become a zombie and zdisp == ZNO).
718  *	EAGAIN: procfs vnode has become invalid.
719  *	EINTR:  signal arrived while waiting for exec to complete.
720  */
721 int
722 prlock(prnode_t *pnp, int zdisp)
723 {
724 	prcommon_t *pcp;
725 	proc_t *p;
726 
727 again:
728 	pcp = pnp->pr_common;
729 	p = pr_p_lock(pnp);
730 	mutex_exit(&pr_pidlock);
731 
732 	/*
733 	 * Return ENOENT immediately if there is no process.
734 	 */
735 	if (p == NULL)
736 		return (ENOENT);
737 
738 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
739 
740 	/*
741 	 * Return ENOENT if process entered zombie state or is exiting
742 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
743 	 */
744 	if (zdisp == ZNO &&
745 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
746 		prunlock(pnp);
747 		return (ENOENT);
748 	}
749 
750 	/*
751 	 * If lwp-specific, check to see if lwp has disappeared.
752 	 */
753 	if (pcp->prc_flags & PRC_LWP) {
754 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
755 		    pcp->prc_tslot == -1) {
756 			prunlock(pnp);
757 			return (ENOENT);
758 		}
759 	}
760 
761 	/*
762 	 * Return EAGAIN if we have encountered a security violation.
763 	 * (The process exec'd a set-id or unreadable executable file.)
764 	 */
765 	if (pnp->pr_flags & PR_INVAL) {
766 		prunlock(pnp);
767 		return (EAGAIN);
768 	}
769 
770 	/*
771 	 * If process is undergoing an exec(), wait for
772 	 * completion and then start all over again.
773 	 */
774 	if (p->p_proc_flag & P_PR_EXEC) {
775 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
776 		mutex_enter(&pcp->prc_mutex);
777 		prunlock(pnp);
778 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
779 			mutex_exit(&pcp->prc_mutex);
780 			return (EINTR);
781 		}
782 		mutex_exit(&pcp->prc_mutex);
783 		goto again;
784 	}
785 
786 	/*
787 	 * We return holding p->p_lock.
788 	 */
789 	return (0);
790 }
791 
792 /*
793  * Undo prlock() and pr_p_lock().
794  * p->p_lock is still held; pr_pidlock is no longer held.
795  *
796  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
797  * if any, waiting for the flag to be dropped; it retains p->p_lock.
798  *
799  * prunlock() calls prunmark() and then drops p->p_lock.
800  */
801 void
802 prunmark(proc_t *p)
803 {
804 	ASSERT(p->p_proc_flag & P_PR_LOCK);
805 	ASSERT(MUTEX_HELD(&p->p_lock));
806 
807 	cv_signal(&pr_pid_cv[p->p_slot]);
808 	p->p_proc_flag &= ~P_PR_LOCK;
809 	THREAD_KPRI_RELEASE();
810 }
811 
812 void
813 prunlock(prnode_t *pnp)
814 {
815 	prcommon_t *pcp = pnp->pr_common;
816 	proc_t *p = pcp->prc_proc;
817 
818 	/*
819 	 * If we (or someone) gave it a SIGKILL, and it is not
820 	 * already a zombie, set it running unconditionally.
821 	 */
822 	if ((p->p_flag & SKILLED) &&
823 	    !(p->p_flag & SEXITING) &&
824 	    !(pcp->prc_flags & PRC_DESTROY) &&
825 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
826 		(void) pr_setrun(pnp, 0);
827 	prunmark(p);
828 	mutex_exit(&p->p_lock);
829 }
830 
831 /*
832  * Called while holding p->p_lock to delay until the process is unlocked.
833  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
834  * The process cannot become locked again until p->p_lock is dropped.
835  */
836 void
837 prbarrier(proc_t *p)
838 {
839 	ASSERT(MUTEX_HELD(&p->p_lock));
840 
841 	if (p->p_proc_flag & P_PR_LOCK) {
842 		/* The process is locked; delay until not locked */
843 		uint_t slot = p->p_slot;
844 
845 		while (p->p_proc_flag & P_PR_LOCK)
846 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
847 		cv_signal(&pr_pid_cv[slot]);
848 	}
849 }
850 
851 /*
852  * Return process/lwp status.
853  * The u-block is mapped in by this routine and unmapped at the end.
854  */
855 void
856 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
857 {
858 	kthread_t *t;
859 
860 	ASSERT(MUTEX_HELD(&p->p_lock));
861 
862 	t = prchoose(p);	/* returns locked thread */
863 	ASSERT(t != NULL);
864 	thread_unlock(t);
865 
866 	/* just bzero the process part, prgetlwpstatus() does the rest */
867 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
868 	sp->pr_nlwp = p->p_lwpcnt;
869 	sp->pr_nzomb = p->p_zombcnt;
870 	prassignset(&sp->pr_sigpend, &p->p_sig);
871 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
872 	sp->pr_brksize = p->p_brksize;
873 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
874 	sp->pr_stksize = p->p_stksize;
875 	sp->pr_pid = p->p_pid;
876 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
877 	    (p->p_flag & SZONETOP)) {
878 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
879 		/*
880 		 * Inside local zones, fake zsched's pid as parent pids for
881 		 * processes which reference processes outside of the zone.
882 		 */
883 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
884 	} else {
885 		sp->pr_ppid = p->p_ppid;
886 	}
887 	sp->pr_pgid  = p->p_pgrp;
888 	sp->pr_sid   = p->p_sessp->s_sid;
889 	sp->pr_taskid = p->p_task->tk_tkid;
890 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
891 	sp->pr_zoneid = p->p_zone->zone_id;
892 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
893 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
894 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
895 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
896 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
897 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
898 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
899 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
900 	switch (p->p_model) {
901 	case DATAMODEL_ILP32:
902 		sp->pr_dmodel = PR_MODEL_ILP32;
903 		break;
904 	case DATAMODEL_LP64:
905 		sp->pr_dmodel = PR_MODEL_LP64;
906 		break;
907 	}
908 	if (p->p_agenttp)
909 		sp->pr_agentid = p->p_agenttp->t_tid;
910 
911 	/* get the chosen lwp's status */
912 	prgetlwpstatus(t, &sp->pr_lwp, zp);
913 
914 	/* replicate the flags */
915 	sp->pr_flags = sp->pr_lwp.pr_flags;
916 }
917 
918 #ifdef _SYSCALL32_IMPL
919 void
920 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
921 {
922 	proc_t *p = ttoproc(t);
923 	klwp_t *lwp = ttolwp(t);
924 	struct mstate *ms = &lwp->lwp_mstate;
925 	hrtime_t usr, sys;
926 	int flags;
927 	ulong_t instr;
928 
929 	ASSERT(MUTEX_HELD(&p->p_lock));
930 
931 	bzero(sp, sizeof (*sp));
932 	flags = 0L;
933 	if (t->t_state == TS_STOPPED) {
934 		flags |= PR_STOPPED;
935 		if ((t->t_schedflag & TS_PSTART) == 0)
936 			flags |= PR_ISTOP;
937 	} else if (VSTOPPED(t)) {
938 		flags |= PR_STOPPED|PR_ISTOP;
939 	}
940 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
941 		flags |= PR_DSTOP;
942 	if (lwp->lwp_asleep)
943 		flags |= PR_ASLEEP;
944 	if (t == p->p_agenttp)
945 		flags |= PR_AGENT;
946 	if (!(t->t_proc_flag & TP_TWAIT))
947 		flags |= PR_DETACH;
948 	if (t->t_proc_flag & TP_DAEMON)
949 		flags |= PR_DAEMON;
950 	if (p->p_proc_flag & P_PR_FORK)
951 		flags |= PR_FORK;
952 	if (p->p_proc_flag & P_PR_RUNLCL)
953 		flags |= PR_RLC;
954 	if (p->p_proc_flag & P_PR_KILLCL)
955 		flags |= PR_KLC;
956 	if (p->p_proc_flag & P_PR_ASYNC)
957 		flags |= PR_ASYNC;
958 	if (p->p_proc_flag & P_PR_BPTADJ)
959 		flags |= PR_BPTADJ;
960 	if (p->p_proc_flag & P_PR_PTRACE)
961 		flags |= PR_PTRACE;
962 	if (p->p_flag & SMSACCT)
963 		flags |= PR_MSACCT;
964 	if (p->p_flag & SMSFORK)
965 		flags |= PR_MSFORK;
966 	if (p->p_flag & SVFWAIT)
967 		flags |= PR_VFORKP;
968 	sp->pr_flags = flags;
969 	if (VSTOPPED(t)) {
970 		sp->pr_why   = PR_REQUESTED;
971 		sp->pr_what  = 0;
972 	} else {
973 		sp->pr_why   = t->t_whystop;
974 		sp->pr_what  = t->t_whatstop;
975 	}
976 	sp->pr_lwpid = t->t_tid;
977 	sp->pr_cursig  = lwp->lwp_cursig;
978 	prassignset(&sp->pr_lwppend, &t->t_sig);
979 	schedctl_finish_sigblock(t);
980 	prassignset(&sp->pr_lwphold, &t->t_hold);
981 	if (t->t_whystop == PR_FAULTED) {
982 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
983 		if (t->t_whatstop == FLTPAGE)
984 			sp->pr_info.si_addr =
985 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
986 	} else if (lwp->lwp_curinfo)
987 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
988 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
989 	    sp->pr_info.si_zoneid != zp->zone_id) {
990 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
991 		sp->pr_info.si_uid = 0;
992 		sp->pr_info.si_ctid = -1;
993 		sp->pr_info.si_zoneid = zp->zone_id;
994 	}
995 	sp->pr_altstack.ss_sp =
996 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
997 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
998 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
999 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1000 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1001 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1002 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1003 		sizeof (sp->pr_clname) - 1);
1004 	if (flags & PR_STOPPED)
1005 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1006 	usr = ms->ms_acct[LMS_USER];
1007 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1008 	scalehrtime(&usr);
1009 	scalehrtime(&sys);
1010 	hrt2ts32(usr, &sp->pr_utime);
1011 	hrt2ts32(sys, &sp->pr_stime);
1012 
1013 	/*
1014 	 * Fetch the current instruction, if not a system process.
1015 	 * We don't attempt this unless the lwp is stopped.
1016 	 */
1017 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1018 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1019 	else if (!(flags & PR_STOPPED))
1020 		sp->pr_flags |= PR_PCINVAL;
1021 	else if (!prfetchinstr(lwp, &instr))
1022 		sp->pr_flags |= PR_PCINVAL;
1023 	else
1024 		sp->pr_instr = (uint32_t)instr;
1025 
1026 	/*
1027 	 * Drop p_lock while touching the lwp's stack.
1028 	 */
1029 	mutex_exit(&p->p_lock);
1030 	if (prisstep(lwp))
1031 		sp->pr_flags |= PR_STEP;
1032 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1033 		int i;
1034 
1035 		sp->pr_syscall = get_syscall32_args(lwp,
1036 			(int *)sp->pr_sysarg, &i);
1037 		sp->pr_nsysarg = (ushort_t)i;
1038 	}
1039 	if ((flags & PR_STOPPED) || t == curthread)
1040 		prgetprregs32(lwp, sp->pr_reg);
1041 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1042 	    (flags & PR_VFORKP)) {
1043 		long r1, r2;
1044 		user_t *up;
1045 		auxv_t *auxp;
1046 		int i;
1047 
1048 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1049 		if (sp->pr_errno == 0) {
1050 			sp->pr_rval1 = (int32_t)r1;
1051 			sp->pr_rval2 = (int32_t)r2;
1052 			sp->pr_errpriv = PRIV_NONE;
1053 		} else
1054 			sp->pr_errpriv = lwp->lwp_badpriv;
1055 
1056 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1057 			up = PTOU(p);
1058 			sp->pr_sysarg[0] = 0;
1059 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1060 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1061 			for (i = 0, auxp = up->u_auxv;
1062 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1063 			    i++, auxp++) {
1064 				if (auxp->a_type == AT_SUN_EXECNAME) {
1065 					sp->pr_sysarg[0] =
1066 					(caddr32_t)(uintptr_t)auxp->a_un.a_ptr;
1067 					break;
1068 				}
1069 			}
1070 		}
1071 	}
1072 	if (prhasfp())
1073 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1074 	mutex_enter(&p->p_lock);
1075 }
1076 
1077 void
1078 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1079 {
1080 	kthread_t *t;
1081 
1082 	ASSERT(MUTEX_HELD(&p->p_lock));
1083 
1084 	t = prchoose(p);	/* returns locked thread */
1085 	ASSERT(t != NULL);
1086 	thread_unlock(t);
1087 
1088 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1089 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1090 	sp->pr_nlwp = p->p_lwpcnt;
1091 	sp->pr_nzomb = p->p_zombcnt;
1092 	prassignset(&sp->pr_sigpend, &p->p_sig);
1093 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1094 	sp->pr_brksize = (uint32_t)p->p_brksize;
1095 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1096 	sp->pr_stksize = (uint32_t)p->p_stksize;
1097 	sp->pr_pid   = p->p_pid;
1098 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1099 	    (p->p_flag & SZONETOP)) {
1100 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1101 		/*
1102 		 * Inside local zones, fake zsched's pid as parent pids for
1103 		 * processes which reference processes outside of the zone.
1104 		 */
1105 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1106 	} else {
1107 		sp->pr_ppid = p->p_ppid;
1108 	}
1109 	sp->pr_pgid  = p->p_pgrp;
1110 	sp->pr_sid   = p->p_sessp->s_sid;
1111 	sp->pr_taskid = p->p_task->tk_tkid;
1112 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1113 	sp->pr_zoneid = p->p_zone->zone_id;
1114 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1115 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1116 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1117 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1118 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1119 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1120 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1121 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1122 	switch (p->p_model) {
1123 	case DATAMODEL_ILP32:
1124 		sp->pr_dmodel = PR_MODEL_ILP32;
1125 		break;
1126 	case DATAMODEL_LP64:
1127 		sp->pr_dmodel = PR_MODEL_LP64;
1128 		break;
1129 	}
1130 	if (p->p_agenttp)
1131 		sp->pr_agentid = p->p_agenttp->t_tid;
1132 
1133 	/* get the chosen lwp's status */
1134 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1135 
1136 	/* replicate the flags */
1137 	sp->pr_flags = sp->pr_lwp.pr_flags;
1138 }
1139 #endif	/* _SYSCALL32_IMPL */
1140 
1141 /*
1142  * Return lwp status.
1143  */
1144 void
1145 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1146 {
1147 	proc_t *p = ttoproc(t);
1148 	klwp_t *lwp = ttolwp(t);
1149 	struct mstate *ms = &lwp->lwp_mstate;
1150 	hrtime_t usr, sys;
1151 	int flags;
1152 	ulong_t instr;
1153 
1154 	ASSERT(MUTEX_HELD(&p->p_lock));
1155 
1156 	bzero(sp, sizeof (*sp));
1157 	flags = 0L;
1158 	if (t->t_state == TS_STOPPED) {
1159 		flags |= PR_STOPPED;
1160 		if ((t->t_schedflag & TS_PSTART) == 0)
1161 			flags |= PR_ISTOP;
1162 	} else if (VSTOPPED(t)) {
1163 		flags |= PR_STOPPED|PR_ISTOP;
1164 	}
1165 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1166 		flags |= PR_DSTOP;
1167 	if (lwp->lwp_asleep)
1168 		flags |= PR_ASLEEP;
1169 	if (t == p->p_agenttp)
1170 		flags |= PR_AGENT;
1171 	if (!(t->t_proc_flag & TP_TWAIT))
1172 		flags |= PR_DETACH;
1173 	if (t->t_proc_flag & TP_DAEMON)
1174 		flags |= PR_DAEMON;
1175 	if (p->p_proc_flag & P_PR_FORK)
1176 		flags |= PR_FORK;
1177 	if (p->p_proc_flag & P_PR_RUNLCL)
1178 		flags |= PR_RLC;
1179 	if (p->p_proc_flag & P_PR_KILLCL)
1180 		flags |= PR_KLC;
1181 	if (p->p_proc_flag & P_PR_ASYNC)
1182 		flags |= PR_ASYNC;
1183 	if (p->p_proc_flag & P_PR_BPTADJ)
1184 		flags |= PR_BPTADJ;
1185 	if (p->p_proc_flag & P_PR_PTRACE)
1186 		flags |= PR_PTRACE;
1187 	if (p->p_flag & SMSACCT)
1188 		flags |= PR_MSACCT;
1189 	if (p->p_flag & SMSFORK)
1190 		flags |= PR_MSFORK;
1191 	if (p->p_flag & SVFWAIT)
1192 		flags |= PR_VFORKP;
1193 	if (p->p_pgidp->pid_pgorphaned)
1194 		flags |= PR_ORPHAN;
1195 	sp->pr_flags = flags;
1196 	if (VSTOPPED(t)) {
1197 		sp->pr_why   = PR_REQUESTED;
1198 		sp->pr_what  = 0;
1199 	} else {
1200 		sp->pr_why   = t->t_whystop;
1201 		sp->pr_what  = t->t_whatstop;
1202 	}
1203 	sp->pr_lwpid = t->t_tid;
1204 	sp->pr_cursig  = lwp->lwp_cursig;
1205 	prassignset(&sp->pr_lwppend, &t->t_sig);
1206 	schedctl_finish_sigblock(t);
1207 	prassignset(&sp->pr_lwphold, &t->t_hold);
1208 	if (t->t_whystop == PR_FAULTED)
1209 		bcopy(&lwp->lwp_siginfo,
1210 		    &sp->pr_info, sizeof (k_siginfo_t));
1211 	else if (lwp->lwp_curinfo)
1212 		bcopy(&lwp->lwp_curinfo->sq_info,
1213 		    &sp->pr_info, sizeof (k_siginfo_t));
1214 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1215 	    sp->pr_info.si_zoneid != zp->zone_id) {
1216 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1217 		sp->pr_info.si_uid = 0;
1218 		sp->pr_info.si_ctid = -1;
1219 		sp->pr_info.si_zoneid = zp->zone_id;
1220 	}
1221 	sp->pr_altstack = lwp->lwp_sigaltstack;
1222 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1223 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1224 	sp->pr_ustack = lwp->lwp_ustack;
1225 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1226 		sizeof (sp->pr_clname) - 1);
1227 	if (flags & PR_STOPPED)
1228 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1229 	usr = ms->ms_acct[LMS_USER];
1230 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1231 	scalehrtime(&usr);
1232 	scalehrtime(&sys);
1233 	hrt2ts(usr, &sp->pr_utime);
1234 	hrt2ts(sys, &sp->pr_stime);
1235 
1236 	/*
1237 	 * Fetch the current instruction, if not a system process.
1238 	 * We don't attempt this unless the lwp is stopped.
1239 	 */
1240 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1241 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1242 	else if (!(flags & PR_STOPPED))
1243 		sp->pr_flags |= PR_PCINVAL;
1244 	else if (!prfetchinstr(lwp, &instr))
1245 		sp->pr_flags |= PR_PCINVAL;
1246 	else
1247 		sp->pr_instr = instr;
1248 
1249 	/*
1250 	 * Drop p_lock while touching the lwp's stack.
1251 	 */
1252 	mutex_exit(&p->p_lock);
1253 	if (prisstep(lwp))
1254 		sp->pr_flags |= PR_STEP;
1255 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1256 		int i;
1257 
1258 		sp->pr_syscall = get_syscall_args(lwp,
1259 			(long *)sp->pr_sysarg, &i);
1260 		sp->pr_nsysarg = (ushort_t)i;
1261 	}
1262 	if ((flags & PR_STOPPED) || t == curthread)
1263 		prgetprregs(lwp, sp->pr_reg);
1264 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1265 	    (flags & PR_VFORKP)) {
1266 		user_t *up;
1267 		auxv_t *auxp;
1268 		int i;
1269 
1270 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1271 		if (sp->pr_errno == 0)
1272 			sp->pr_errpriv = PRIV_NONE;
1273 		else
1274 			sp->pr_errpriv = lwp->lwp_badpriv;
1275 
1276 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1277 			up = PTOU(p);
1278 			sp->pr_sysarg[0] = 0;
1279 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1280 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1281 			for (i = 0, auxp = up->u_auxv;
1282 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1283 			    i++, auxp++) {
1284 				if (auxp->a_type == AT_SUN_EXECNAME) {
1285 					sp->pr_sysarg[0] =
1286 						(uintptr_t)auxp->a_un.a_ptr;
1287 					break;
1288 				}
1289 			}
1290 		}
1291 	}
1292 	if (prhasfp())
1293 		prgetprfpregs(lwp, &sp->pr_fpreg);
1294 	mutex_enter(&p->p_lock);
1295 }
1296 
1297 /*
1298  * Get the sigaction structure for the specified signal.  The u-block
1299  * must already have been mapped in by the caller.
1300  */
1301 void
1302 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1303 {
1304 	bzero(sp, sizeof (*sp));
1305 
1306 	if (sig != 0 && (unsigned)sig < NSIG) {
1307 		sp->sa_handler = up->u_signal[sig-1];
1308 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1309 		if (sigismember(&up->u_sigonstack, sig))
1310 			sp->sa_flags |= SA_ONSTACK;
1311 		if (sigismember(&up->u_sigresethand, sig))
1312 			sp->sa_flags |= SA_RESETHAND;
1313 		if (sigismember(&up->u_sigrestart, sig))
1314 			sp->sa_flags |= SA_RESTART;
1315 		if (sigismember(&p->p_siginfo, sig))
1316 			sp->sa_flags |= SA_SIGINFO;
1317 		if (sigismember(&up->u_signodefer, sig))
1318 			sp->sa_flags |= SA_NODEFER;
1319 		if (sig == SIGCLD) {
1320 			if (p->p_flag & SNOWAIT)
1321 				sp->sa_flags |= SA_NOCLDWAIT;
1322 			if ((p->p_flag & SJCTL) == 0)
1323 				sp->sa_flags |= SA_NOCLDSTOP;
1324 		}
1325 	}
1326 }
1327 
1328 #ifdef _SYSCALL32_IMPL
1329 void
1330 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1331 {
1332 	bzero(sp, sizeof (*sp));
1333 
1334 	if (sig != 0 && (unsigned)sig < NSIG) {
1335 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1336 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1337 		if (sigismember(&up->u_sigonstack, sig))
1338 			sp->sa_flags |= SA_ONSTACK;
1339 		if (sigismember(&up->u_sigresethand, sig))
1340 			sp->sa_flags |= SA_RESETHAND;
1341 		if (sigismember(&up->u_sigrestart, sig))
1342 			sp->sa_flags |= SA_RESTART;
1343 		if (sigismember(&p->p_siginfo, sig))
1344 			sp->sa_flags |= SA_SIGINFO;
1345 		if (sigismember(&up->u_signodefer, sig))
1346 			sp->sa_flags |= SA_NODEFER;
1347 		if (sig == SIGCLD) {
1348 			if (p->p_flag & SNOWAIT)
1349 				sp->sa_flags |= SA_NOCLDWAIT;
1350 			if ((p->p_flag & SJCTL) == 0)
1351 				sp->sa_flags |= SA_NOCLDSTOP;
1352 		}
1353 	}
1354 }
1355 #endif	/* _SYSCALL32_IMPL */
1356 
1357 /*
1358  * Count the number of segments in this process's address space.
1359  */
1360 int
1361 prnsegs(struct as *as, int reserved)
1362 {
1363 	int n = 0;
1364 	struct seg *seg;
1365 
1366 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1367 
1368 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1369 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1370 		caddr_t saddr, naddr;
1371 		void *tmp = NULL;
1372 
1373 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1374 			(void) pr_getprot(seg, reserved, &tmp,
1375 			    &saddr, &naddr, eaddr);
1376 			if (saddr != naddr)
1377 				n++;
1378 		}
1379 
1380 		ASSERT(tmp == NULL);
1381 	}
1382 
1383 	return (n);
1384 }
1385 
1386 /*
1387  * Convert uint32_t to decimal string w/o leading zeros.
1388  * Add trailing null characters if 'len' is greater than string length.
1389  * Return the string length.
1390  */
1391 int
1392 pr_u32tos(uint32_t n, char *s, int len)
1393 {
1394 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1395 	char *cp = cbuf;
1396 	char *end = s + len;
1397 
1398 	do {
1399 		*cp++ = (char)(n % 10 + '0');
1400 		n /= 10;
1401 	} while (n);
1402 
1403 	len = (int)(cp - cbuf);
1404 
1405 	do {
1406 		*s++ = *--cp;
1407 	} while (cp > cbuf);
1408 
1409 	while (s < end)		/* optional pad */
1410 		*s++ = '\0';
1411 
1412 	return (len);
1413 }
1414 
1415 /*
1416  * Convert uint64_t to decimal string w/o leading zeros.
1417  * Return the string length.
1418  */
1419 static int
1420 pr_u64tos(uint64_t n, char *s)
1421 {
1422 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1423 	char *cp = cbuf;
1424 	int len;
1425 
1426 	do {
1427 		*cp++ = (char)(n % 10 + '0');
1428 		n /= 10;
1429 	} while (n);
1430 
1431 	len = (int)(cp - cbuf);
1432 
1433 	do {
1434 		*s++ = *--cp;
1435 	} while (cp > cbuf);
1436 
1437 	return (len);
1438 }
1439 
1440 void
1441 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1442 {
1443 	char *s = name;
1444 	struct vfs *vfsp;
1445 	struct vfssw *vfsswp;
1446 
1447 	if ((vfsp = vp->v_vfsp) != NULL &&
1448 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1449 	    *vfsswp->vsw_name) {
1450 		(void) strcpy(s, vfsswp->vsw_name);
1451 		s += strlen(s);
1452 		*s++ = '.';
1453 	}
1454 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1455 	*s++ = '.';
1456 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1457 	*s++ = '.';
1458 	s += pr_u64tos(vattr->va_nodeid, s);
1459 	*s++ = '\0';
1460 }
1461 
1462 struct seg *
1463 break_seg(proc_t *p)
1464 {
1465 	caddr_t addr = p->p_brkbase;
1466 	struct seg *seg;
1467 	struct vnode *vp;
1468 
1469 	if (p->p_brksize != 0)
1470 		addr += p->p_brksize - 1;
1471 	seg = as_segat(p->p_as, addr);
1472 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1473 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1474 		return (seg);
1475 	return (NULL);
1476 }
1477 
1478 /*
1479  * Implementation of service functions to handle procfs generic chained
1480  * copyout buffers.
1481  */
1482 typedef struct pr_iobuf_list {
1483 	list_node_t	piol_link;	/* buffer linkage */
1484 	size_t		piol_size;	/* total size (header + data) */
1485 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1486 } piol_t;
1487 
1488 #define	MAPSIZE	(64 * 1024)
1489 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1490 
1491 void
1492 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1493 {
1494 	piol_t	*iol;
1495 	size_t	initial_size = MIN(1, n) * itemsize;
1496 
1497 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1498 
1499 	ASSERT(list_head(iolhead) == NULL);
1500 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1501 	ASSERT(initial_size > 0);
1502 
1503 	/*
1504 	 * Someone creating chained copyout buffers may ask for less than
1505 	 * MAPSIZE if the amount of data to be buffered is known to be
1506 	 * smaller than that.
1507 	 * But in order to prevent involuntary self-denial of service,
1508 	 * the requested input size is clamped at MAPSIZE.
1509 	 */
1510 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1511 	iol = kmem_alloc(initial_size, KM_SLEEP);
1512 	list_insert_head(iolhead, iol);
1513 	iol->piol_usedsize = 0;
1514 	iol->piol_size = initial_size;
1515 }
1516 
1517 void *
1518 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1519 {
1520 	piol_t	*iol;
1521 	char	*new;
1522 
1523 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1524 	ASSERT(list_head(iolhead) != NULL);
1525 
1526 	iol = (piol_t *)list_tail(iolhead);
1527 
1528 	if (iol->piol_size <
1529 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1530 		/*
1531 		 * Out of space in the current buffer. Allocate more.
1532 		 */
1533 		piol_t *newiol;
1534 
1535 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1536 		newiol->piol_size = MAPSIZE;
1537 		newiol->piol_usedsize = 0;
1538 
1539 		list_insert_after(iolhead, iol, newiol);
1540 		iol = list_next(iolhead, iol);
1541 		ASSERT(iol == newiol);
1542 	}
1543 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1544 	iol->piol_usedsize += itemsize;
1545 	bzero(new, itemsize);
1546 	return (new);
1547 }
1548 
1549 int
1550 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1551 {
1552 	int error = errin;
1553 	piol_t	*iol;
1554 
1555 	while ((iol = list_head(iolhead)) != NULL) {
1556 		list_remove(iolhead, iol);
1557 		if (!error) {
1558 			if (copyout(PIOL_DATABUF(iol), *tgt,
1559 			    iol->piol_usedsize))
1560 				error = EFAULT;
1561 			*tgt += iol->piol_usedsize;
1562 		}
1563 		kmem_free(iol, iol->piol_size);
1564 	}
1565 	list_destroy(iolhead);
1566 
1567 	return (error);
1568 }
1569 
1570 int
1571 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1572 {
1573 	offset_t	off = uiop->uio_offset;
1574 	char		*base;
1575 	size_t		size;
1576 	piol_t		*iol;
1577 	int		error = errin;
1578 
1579 	while ((iol = list_head(iolhead)) != NULL) {
1580 		list_remove(iolhead, iol);
1581 		base = PIOL_DATABUF(iol);
1582 		size = iol->piol_usedsize;
1583 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1584 			error = uiomove(base + off, size - off,
1585 			    UIO_READ, uiop);
1586 		off = MAX(0, off - (offset_t)size);
1587 		kmem_free(iol, iol->piol_size);
1588 	}
1589 	list_destroy(iolhead);
1590 
1591 	return (error);
1592 }
1593 
1594 /*
1595  * Return an array of structures with memory map information.
1596  * We allocate here; the caller must deallocate.
1597  */
1598 int
1599 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1600 {
1601 	struct as *as = p->p_as;
1602 	prmap_t *mp;
1603 	struct seg *seg;
1604 	struct seg *brkseg, *stkseg;
1605 	struct vnode *vp;
1606 	struct vattr vattr;
1607 	uint_t prot;
1608 
1609 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1610 
1611 	/*
1612 	 * Request an initial buffer size that doesn't waste memory
1613 	 * if the address space has only a small number of segments.
1614 	 */
1615 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1616 
1617 	if ((seg = AS_SEGFIRST(as)) == NULL)
1618 		return (0);
1619 
1620 	brkseg = break_seg(p);
1621 	stkseg = as_segat(as, prgetstackbase(p));
1622 
1623 	do {
1624 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1625 		caddr_t saddr, naddr;
1626 		void *tmp = NULL;
1627 
1628 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1629 			prot = pr_getprot(seg, reserved, &tmp,
1630 			    &saddr, &naddr, eaddr);
1631 			if (saddr == naddr)
1632 				continue;
1633 
1634 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1635 
1636 			mp->pr_vaddr = (uintptr_t)saddr;
1637 			mp->pr_size = naddr - saddr;
1638 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1639 			mp->pr_mflags = 0;
1640 			if (prot & PROT_READ)
1641 				mp->pr_mflags |= MA_READ;
1642 			if (prot & PROT_WRITE)
1643 				mp->pr_mflags |= MA_WRITE;
1644 			if (prot & PROT_EXEC)
1645 				mp->pr_mflags |= MA_EXEC;
1646 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1647 				mp->pr_mflags |= MA_SHARED;
1648 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1649 				mp->pr_mflags |= MA_NORESERVE;
1650 			if (seg->s_ops == &segspt_shmops ||
1651 			    (seg->s_ops == &segvn_ops &&
1652 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1653 				mp->pr_mflags |= MA_ANON;
1654 			if (seg == brkseg)
1655 				mp->pr_mflags |= MA_BREAK;
1656 			else if (seg == stkseg) {
1657 				mp->pr_mflags |= MA_STACK;
1658 				if (reserved) {
1659 					size_t maxstack =
1660 					    ((size_t)p->p_stk_ctl +
1661 					    PAGEOFFSET) & PAGEMASK;
1662 					mp->pr_vaddr =
1663 					    (uintptr_t)prgetstackbase(p) +
1664 					    p->p_stksize - maxstack;
1665 					mp->pr_size = (uintptr_t)naddr -
1666 					    mp->pr_vaddr;
1667 				}
1668 			}
1669 			if (seg->s_ops == &segspt_shmops)
1670 				mp->pr_mflags |= MA_ISM | MA_SHM;
1671 			mp->pr_pagesize = PAGESIZE;
1672 
1673 			/*
1674 			 * Manufacture a filename for the "object" directory.
1675 			 */
1676 			vattr.va_mask = AT_FSID|AT_NODEID;
1677 			if (seg->s_ops == &segvn_ops &&
1678 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1679 			    vp != NULL && vp->v_type == VREG &&
1680 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1681 				if (vp == p->p_exec)
1682 					(void) strcpy(mp->pr_mapname, "a.out");
1683 				else
1684 					pr_object_name(mp->pr_mapname,
1685 						vp, &vattr);
1686 			}
1687 
1688 			/*
1689 			 * Get the SysV shared memory id, if any.
1690 			 */
1691 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1692 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1693 			    SHMID_NONE) {
1694 				if (mp->pr_shmid == SHMID_FREE)
1695 					mp->pr_shmid = -1;
1696 
1697 				mp->pr_mflags |= MA_SHM;
1698 			} else {
1699 				mp->pr_shmid = -1;
1700 			}
1701 		}
1702 		ASSERT(tmp == NULL);
1703 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1704 
1705 	return (0);
1706 }
1707 
1708 #ifdef _SYSCALL32_IMPL
1709 int
1710 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1711 {
1712 	struct as *as = p->p_as;
1713 	prmap32_t *mp;
1714 	struct seg *seg;
1715 	struct seg *brkseg, *stkseg;
1716 	struct vnode *vp;
1717 	struct vattr vattr;
1718 	uint_t prot;
1719 
1720 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1721 
1722 	/*
1723 	 * Request an initial buffer size that doesn't waste memory
1724 	 * if the address space has only a small number of segments.
1725 	 */
1726 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1727 
1728 	if ((seg = AS_SEGFIRST(as)) == NULL)
1729 		return (0);
1730 
1731 	brkseg = break_seg(p);
1732 	stkseg = as_segat(as, prgetstackbase(p));
1733 
1734 	do {
1735 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1736 		caddr_t saddr, naddr;
1737 		void *tmp = NULL;
1738 
1739 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1740 			prot = pr_getprot(seg, reserved, &tmp,
1741 			    &saddr, &naddr, eaddr);
1742 			if (saddr == naddr)
1743 				continue;
1744 
1745 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1746 
1747 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1748 			mp->pr_size = (size32_t)(naddr - saddr);
1749 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1750 			mp->pr_mflags = 0;
1751 			if (prot & PROT_READ)
1752 				mp->pr_mflags |= MA_READ;
1753 			if (prot & PROT_WRITE)
1754 				mp->pr_mflags |= MA_WRITE;
1755 			if (prot & PROT_EXEC)
1756 				mp->pr_mflags |= MA_EXEC;
1757 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1758 				mp->pr_mflags |= MA_SHARED;
1759 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1760 				mp->pr_mflags |= MA_NORESERVE;
1761 			if (seg->s_ops == &segspt_shmops ||
1762 			    (seg->s_ops == &segvn_ops &&
1763 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1764 				mp->pr_mflags |= MA_ANON;
1765 			if (seg == brkseg)
1766 				mp->pr_mflags |= MA_BREAK;
1767 			else if (seg == stkseg) {
1768 				mp->pr_mflags |= MA_STACK;
1769 				if (reserved) {
1770 					size_t maxstack =
1771 					    ((size_t)p->p_stk_ctl +
1772 					    PAGEOFFSET) & PAGEMASK;
1773 					uintptr_t vaddr =
1774 					    (uintptr_t)prgetstackbase(p) +
1775 					    p->p_stksize - maxstack;
1776 					mp->pr_vaddr = (caddr32_t)vaddr;
1777 					mp->pr_size = (size32_t)
1778 					    ((uintptr_t)naddr - vaddr);
1779 				}
1780 			}
1781 			if (seg->s_ops == &segspt_shmops)
1782 				mp->pr_mflags |= MA_ISM | MA_SHM;
1783 			mp->pr_pagesize = PAGESIZE;
1784 
1785 			/*
1786 			 * Manufacture a filename for the "object" directory.
1787 			 */
1788 			vattr.va_mask = AT_FSID|AT_NODEID;
1789 			if (seg->s_ops == &segvn_ops &&
1790 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1791 			    vp != NULL && vp->v_type == VREG &&
1792 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1793 				if (vp == p->p_exec)
1794 					(void) strcpy(mp->pr_mapname, "a.out");
1795 				else
1796 					pr_object_name(mp->pr_mapname,
1797 						vp, &vattr);
1798 			}
1799 
1800 			/*
1801 			 * Get the SysV shared memory id, if any.
1802 			 */
1803 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1804 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1805 			    SHMID_NONE) {
1806 				if (mp->pr_shmid == SHMID_FREE)
1807 					mp->pr_shmid = -1;
1808 
1809 				mp->pr_mflags |= MA_SHM;
1810 			} else {
1811 				mp->pr_shmid = -1;
1812 			}
1813 		}
1814 		ASSERT(tmp == NULL);
1815 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1816 
1817 	return (0);
1818 }
1819 #endif	/* _SYSCALL32_IMPL */
1820 
1821 /*
1822  * Return the size of the /proc page data file.
1823  */
1824 size_t
1825 prpdsize(struct as *as)
1826 {
1827 	struct seg *seg;
1828 	size_t size;
1829 
1830 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1831 
1832 	if ((seg = AS_SEGFIRST(as)) == NULL)
1833 		return (0);
1834 
1835 	size = sizeof (prpageheader_t);
1836 	do {
1837 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1838 		caddr_t saddr, naddr;
1839 		void *tmp = NULL;
1840 		size_t npage;
1841 
1842 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1843 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1844 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1845 				size += sizeof (prasmap_t) + round8(npage);
1846 		}
1847 		ASSERT(tmp == NULL);
1848 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1849 
1850 	return (size);
1851 }
1852 
1853 #ifdef _SYSCALL32_IMPL
1854 size_t
1855 prpdsize32(struct as *as)
1856 {
1857 	struct seg *seg;
1858 	size_t size;
1859 
1860 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1861 
1862 	if ((seg = AS_SEGFIRST(as)) == NULL)
1863 		return (0);
1864 
1865 	size = sizeof (prpageheader32_t);
1866 	do {
1867 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1868 		caddr_t saddr, naddr;
1869 		void *tmp = NULL;
1870 		size_t npage;
1871 
1872 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1873 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1874 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1875 				size += sizeof (prasmap32_t) + round8(npage);
1876 		}
1877 		ASSERT(tmp == NULL);
1878 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1879 
1880 	return (size);
1881 }
1882 #endif	/* _SYSCALL32_IMPL */
1883 
1884 /*
1885  * Read page data information.
1886  */
1887 int
1888 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1889 {
1890 	struct as *as = p->p_as;
1891 	caddr_t buf;
1892 	size_t size;
1893 	prpageheader_t *php;
1894 	prasmap_t *pmp;
1895 	struct seg *seg;
1896 	int error;
1897 
1898 again:
1899 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1900 
1901 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1902 		AS_LOCK_EXIT(as, &as->a_lock);
1903 		return (0);
1904 	}
1905 	size = prpdsize(as);
1906 	if (uiop->uio_resid < size) {
1907 		AS_LOCK_EXIT(as, &as->a_lock);
1908 		return (E2BIG);
1909 	}
1910 
1911 	buf = kmem_zalloc(size, KM_SLEEP);
1912 	php = (prpageheader_t *)buf;
1913 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1914 
1915 	hrt2ts(gethrtime(), &php->pr_tstamp);
1916 	php->pr_nmap = 0;
1917 	php->pr_npage = 0;
1918 	do {
1919 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1920 		caddr_t saddr, naddr;
1921 		void *tmp = NULL;
1922 
1923 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1924 			struct vnode *vp;
1925 			struct vattr vattr;
1926 			size_t len;
1927 			size_t npage;
1928 			uint_t prot;
1929 			uintptr_t next;
1930 
1931 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1932 			if ((len = (size_t)(naddr - saddr)) == 0)
1933 				continue;
1934 			npage = len / PAGESIZE;
1935 			next = (uintptr_t)(pmp + 1) + round8(npage);
1936 			/*
1937 			 * It's possible that the address space can change
1938 			 * subtlely even though we're holding as->a_lock
1939 			 * due to the nondeterminism of page_exists() in
1940 			 * the presence of asychronously flushed pages or
1941 			 * mapped files whose sizes are changing.
1942 			 * page_exists() may be called indirectly from
1943 			 * pr_getprot() by a SEGOP_INCORE() routine.
1944 			 * If this happens we need to make sure we don't
1945 			 * overrun the buffer whose size we computed based
1946 			 * on the initial iteration through the segments.
1947 			 * Once we've detected an overflow, we need to clean
1948 			 * up the temporary memory allocated in pr_getprot()
1949 			 * and retry. If there's a pending signal, we return
1950 			 * EINTR so that this thread can be dislodged if
1951 			 * a latent bug causes us to spin indefinitely.
1952 			 */
1953 			if (next > (uintptr_t)buf + size) {
1954 				pr_getprot_done(&tmp);
1955 				AS_LOCK_EXIT(as, &as->a_lock);
1956 
1957 				kmem_free(buf, size);
1958 
1959 				if (ISSIG(curthread, JUSTLOOKING))
1960 					return (EINTR);
1961 
1962 				goto again;
1963 			}
1964 
1965 			php->pr_nmap++;
1966 			php->pr_npage += npage;
1967 			pmp->pr_vaddr = (uintptr_t)saddr;
1968 			pmp->pr_npage = npage;
1969 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1970 			pmp->pr_mflags = 0;
1971 			if (prot & PROT_READ)
1972 				pmp->pr_mflags |= MA_READ;
1973 			if (prot & PROT_WRITE)
1974 				pmp->pr_mflags |= MA_WRITE;
1975 			if (prot & PROT_EXEC)
1976 				pmp->pr_mflags |= MA_EXEC;
1977 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1978 				pmp->pr_mflags |= MA_SHARED;
1979 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1980 				pmp->pr_mflags |= MA_NORESERVE;
1981 			if (seg->s_ops == &segspt_shmops ||
1982 			    (seg->s_ops == &segvn_ops &&
1983 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1984 				pmp->pr_mflags |= MA_ANON;
1985 			if (seg->s_ops == &segspt_shmops)
1986 				pmp->pr_mflags |= MA_ISM | MA_SHM;
1987 			pmp->pr_pagesize = PAGESIZE;
1988 			/*
1989 			 * Manufacture a filename for the "object" directory.
1990 			 */
1991 			vattr.va_mask = AT_FSID|AT_NODEID;
1992 			if (seg->s_ops == &segvn_ops &&
1993 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1994 			    vp != NULL && vp->v_type == VREG &&
1995 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1996 				if (vp == p->p_exec)
1997 					(void) strcpy(pmp->pr_mapname, "a.out");
1998 				else
1999 					pr_object_name(pmp->pr_mapname,
2000 						vp, &vattr);
2001 			}
2002 
2003 			/*
2004 			 * Get the SysV shared memory id, if any.
2005 			 */
2006 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2007 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2008 			    SHMID_NONE) {
2009 				if (pmp->pr_shmid == SHMID_FREE)
2010 					pmp->pr_shmid = -1;
2011 
2012 				pmp->pr_mflags |= MA_SHM;
2013 			} else {
2014 				pmp->pr_shmid = -1;
2015 			}
2016 
2017 			hat_getstat(as, saddr, len, hatid,
2018 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2019 			pmp = (prasmap_t *)next;
2020 		}
2021 		ASSERT(tmp == NULL);
2022 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2023 
2024 	AS_LOCK_EXIT(as, &as->a_lock);
2025 
2026 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2027 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2028 	kmem_free(buf, size);
2029 
2030 	return (error);
2031 }
2032 
2033 #ifdef _SYSCALL32_IMPL
2034 int
2035 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2036 {
2037 	struct as *as = p->p_as;
2038 	caddr_t buf;
2039 	size_t size;
2040 	prpageheader32_t *php;
2041 	prasmap32_t *pmp;
2042 	struct seg *seg;
2043 	int error;
2044 
2045 again:
2046 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2047 
2048 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2049 		AS_LOCK_EXIT(as, &as->a_lock);
2050 		return (0);
2051 	}
2052 	size = prpdsize32(as);
2053 	if (uiop->uio_resid < size) {
2054 		AS_LOCK_EXIT(as, &as->a_lock);
2055 		return (E2BIG);
2056 	}
2057 
2058 	buf = kmem_zalloc(size, KM_SLEEP);
2059 	php = (prpageheader32_t *)buf;
2060 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2061 
2062 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2063 	php->pr_nmap = 0;
2064 	php->pr_npage = 0;
2065 	do {
2066 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2067 		caddr_t saddr, naddr;
2068 		void *tmp = NULL;
2069 
2070 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2071 			struct vnode *vp;
2072 			struct vattr vattr;
2073 			size_t len;
2074 			size_t npage;
2075 			uint_t prot;
2076 			uintptr_t next;
2077 
2078 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2079 			if ((len = (size_t)(naddr - saddr)) == 0)
2080 				continue;
2081 			npage = len / PAGESIZE;
2082 			next = (uintptr_t)(pmp + 1) + round8(npage);
2083 			/*
2084 			 * It's possible that the address space can change
2085 			 * subtlely even though we're holding as->a_lock
2086 			 * due to the nondeterminism of page_exists() in
2087 			 * the presence of asychronously flushed pages or
2088 			 * mapped files whose sizes are changing.
2089 			 * page_exists() may be called indirectly from
2090 			 * pr_getprot() by a SEGOP_INCORE() routine.
2091 			 * If this happens we need to make sure we don't
2092 			 * overrun the buffer whose size we computed based
2093 			 * on the initial iteration through the segments.
2094 			 * Once we've detected an overflow, we need to clean
2095 			 * up the temporary memory allocated in pr_getprot()
2096 			 * and retry. If there's a pending signal, we return
2097 			 * EINTR so that this thread can be dislodged if
2098 			 * a latent bug causes us to spin indefinitely.
2099 			 */
2100 			if (next > (uintptr_t)buf + size) {
2101 				pr_getprot_done(&tmp);
2102 				AS_LOCK_EXIT(as, &as->a_lock);
2103 
2104 				kmem_free(buf, size);
2105 
2106 				if (ISSIG(curthread, JUSTLOOKING))
2107 					return (EINTR);
2108 
2109 				goto again;
2110 			}
2111 
2112 			php->pr_nmap++;
2113 			php->pr_npage += npage;
2114 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2115 			pmp->pr_npage = (size32_t)npage;
2116 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2117 			pmp->pr_mflags = 0;
2118 			if (prot & PROT_READ)
2119 				pmp->pr_mflags |= MA_READ;
2120 			if (prot & PROT_WRITE)
2121 				pmp->pr_mflags |= MA_WRITE;
2122 			if (prot & PROT_EXEC)
2123 				pmp->pr_mflags |= MA_EXEC;
2124 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2125 				pmp->pr_mflags |= MA_SHARED;
2126 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2127 				pmp->pr_mflags |= MA_NORESERVE;
2128 			if (seg->s_ops == &segspt_shmops ||
2129 			    (seg->s_ops == &segvn_ops &&
2130 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2131 				pmp->pr_mflags |= MA_ANON;
2132 			if (seg->s_ops == &segspt_shmops)
2133 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2134 			pmp->pr_pagesize = PAGESIZE;
2135 			/*
2136 			 * Manufacture a filename for the "object" directory.
2137 			 */
2138 			vattr.va_mask = AT_FSID|AT_NODEID;
2139 			if (seg->s_ops == &segvn_ops &&
2140 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2141 			    vp != NULL && vp->v_type == VREG &&
2142 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
2143 				if (vp == p->p_exec)
2144 					(void) strcpy(pmp->pr_mapname, "a.out");
2145 				else
2146 					pr_object_name(pmp->pr_mapname,
2147 						vp, &vattr);
2148 			}
2149 
2150 			/*
2151 			 * Get the SysV shared memory id, if any.
2152 			 */
2153 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2154 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2155 			    SHMID_NONE) {
2156 				if (pmp->pr_shmid == SHMID_FREE)
2157 					pmp->pr_shmid = -1;
2158 
2159 				pmp->pr_mflags |= MA_SHM;
2160 			} else {
2161 				pmp->pr_shmid = -1;
2162 			}
2163 
2164 			hat_getstat(as, saddr, len, hatid,
2165 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2166 			pmp = (prasmap32_t *)next;
2167 		}
2168 		ASSERT(tmp == NULL);
2169 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2170 
2171 	AS_LOCK_EXIT(as, &as->a_lock);
2172 
2173 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2174 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2175 	kmem_free(buf, size);
2176 
2177 	return (error);
2178 }
2179 #endif	/* _SYSCALL32_IMPL */
2180 
2181 ushort_t
2182 prgetpctcpu(uint64_t pct)
2183 {
2184 	/*
2185 	 * The value returned will be relevant in the zone of the examiner,
2186 	 * which may not be the same as the zone which performed the procfs
2187 	 * mount.
2188 	 */
2189 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2190 
2191 	/*
2192 	 * Prorate over online cpus so we don't exceed 100%
2193 	 */
2194 	if (nonline > 1)
2195 		pct /= nonline;
2196 	pct >>= 16;		/* convert to 16-bit scaled integer */
2197 	if (pct > 0x8000)	/* might happen, due to rounding */
2198 		pct = 0x8000;
2199 	return ((ushort_t)pct);
2200 }
2201 
2202 /*
2203  * Return information used by ps(1).
2204  */
2205 void
2206 prgetpsinfo(proc_t *p, psinfo_t *psp)
2207 {
2208 	kthread_t *t;
2209 	struct cred *cred;
2210 	hrtime_t hrutime, hrstime;
2211 
2212 	ASSERT(MUTEX_HELD(&p->p_lock));
2213 
2214 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2215 		bzero(psp, sizeof (*psp));
2216 	else {
2217 		thread_unlock(t);
2218 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2219 	}
2220 
2221 	/*
2222 	 * only export SSYS and SMSACCT; everything else is off-limits to
2223 	 * userland apps.
2224 	 */
2225 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2226 	psp->pr_nlwp = p->p_lwpcnt;
2227 	psp->pr_nzomb = p->p_zombcnt;
2228 	mutex_enter(&p->p_crlock);
2229 	cred = p->p_cred;
2230 	psp->pr_uid = crgetruid(cred);
2231 	psp->pr_euid = crgetuid(cred);
2232 	psp->pr_gid = crgetrgid(cred);
2233 	psp->pr_egid = crgetgid(cred);
2234 	mutex_exit(&p->p_crlock);
2235 	psp->pr_pid = p->p_pid;
2236 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2237 	    (p->p_flag & SZONETOP)) {
2238 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2239 		/*
2240 		 * Inside local zones, fake zsched's pid as parent pids for
2241 		 * processes which reference processes outside of the zone.
2242 		 */
2243 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2244 	} else {
2245 		psp->pr_ppid = p->p_ppid;
2246 	}
2247 	psp->pr_pgid = p->p_pgrp;
2248 	psp->pr_sid = p->p_sessp->s_sid;
2249 	psp->pr_taskid = p->p_task->tk_tkid;
2250 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2251 	psp->pr_poolid = p->p_pool->pool_id;
2252 	psp->pr_zoneid = p->p_zone->zone_id;
2253 	if ((psp->pr_contract = PRCTID(p)) == 0)
2254 		psp->pr_contract = -1;
2255 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2256 	switch (p->p_model) {
2257 	case DATAMODEL_ILP32:
2258 		psp->pr_dmodel = PR_MODEL_ILP32;
2259 		break;
2260 	case DATAMODEL_LP64:
2261 		psp->pr_dmodel = PR_MODEL_LP64;
2262 		break;
2263 	}
2264 	hrutime = mstate_aggr_state(p, LMS_USER);
2265 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2266 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2267 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2268 
2269 	if (t == NULL) {
2270 		int wcode = p->p_wcode;		/* must be atomic read */
2271 
2272 		if (wcode)
2273 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2274 		psp->pr_ttydev = PRNODEV;
2275 		psp->pr_lwp.pr_state = SZOMB;
2276 		psp->pr_lwp.pr_sname = 'Z';
2277 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2278 		psp->pr_lwp.pr_bindpset = PS_NONE;
2279 	} else {
2280 		user_t *up = PTOU(p);
2281 		struct as *as;
2282 		dev_t d;
2283 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2284 
2285 		d = cttydev(p);
2286 		/*
2287 		 * If the controlling terminal is the real
2288 		 * or workstation console device, map to what the
2289 		 * user thinks is the console device.
2290 		 */
2291 		if (d == rwsconsdev || d == rconsdev)
2292 			d = uconsdev;
2293 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2294 		psp->pr_start = up->u_start;
2295 		bcopy(up->u_comm, psp->pr_fname,
2296 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2297 		bcopy(up->u_psargs, psp->pr_psargs,
2298 		    MIN(PRARGSZ-1, PSARGSZ));
2299 		psp->pr_argc = up->u_argc;
2300 		psp->pr_argv = up->u_argv;
2301 		psp->pr_envp = up->u_envp;
2302 
2303 		/* get the chosen lwp's lwpsinfo */
2304 		prgetlwpsinfo(t, &psp->pr_lwp);
2305 
2306 		/* compute %cpu for the process */
2307 		if (p->p_lwpcnt == 1)
2308 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2309 		else {
2310 			uint64_t pct = 0;
2311 			hrtime_t cur_time = gethrtime_unscaled();
2312 
2313 			t = p->p_tlist;
2314 			do {
2315 				pct += cpu_update_pct(t, cur_time);
2316 			} while ((t = t->t_forw) != p->p_tlist);
2317 
2318 			psp->pr_pctcpu = prgetpctcpu(pct);
2319 		}
2320 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2321 			psp->pr_size = 0;
2322 			psp->pr_rssize = 0;
2323 		} else {
2324 			mutex_exit(&p->p_lock);
2325 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2326 			psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024);
2327 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2328 			psp->pr_pctmem = rm_pctmemory(as);
2329 			AS_LOCK_EXIT(as, &as->a_lock);
2330 			mutex_enter(&p->p_lock);
2331 		}
2332 	}
2333 }
2334 
2335 #ifdef _SYSCALL32_IMPL
2336 void
2337 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2338 {
2339 	kthread_t *t;
2340 	struct cred *cred;
2341 	hrtime_t hrutime, hrstime;
2342 
2343 	ASSERT(MUTEX_HELD(&p->p_lock));
2344 
2345 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2346 		bzero(psp, sizeof (*psp));
2347 	else {
2348 		thread_unlock(t);
2349 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2350 	}
2351 
2352 	/*
2353 	 * only export SSYS and SMSACCT; everything else is off-limits to
2354 	 * userland apps.
2355 	 */
2356 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2357 	psp->pr_nlwp = p->p_lwpcnt;
2358 	psp->pr_nzomb = p->p_zombcnt;
2359 	mutex_enter(&p->p_crlock);
2360 	cred = p->p_cred;
2361 	psp->pr_uid = crgetruid(cred);
2362 	psp->pr_euid = crgetuid(cred);
2363 	psp->pr_gid = crgetrgid(cred);
2364 	psp->pr_egid = crgetgid(cred);
2365 	mutex_exit(&p->p_crlock);
2366 	psp->pr_pid = p->p_pid;
2367 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2368 	    (p->p_flag & SZONETOP)) {
2369 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2370 		/*
2371 		 * Inside local zones, fake zsched's pid as parent pids for
2372 		 * processes which reference processes outside of the zone.
2373 		 */
2374 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2375 	} else {
2376 		psp->pr_ppid = p->p_ppid;
2377 	}
2378 	psp->pr_pgid = p->p_pgrp;
2379 	psp->pr_sid = p->p_sessp->s_sid;
2380 	psp->pr_taskid = p->p_task->tk_tkid;
2381 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2382 	psp->pr_poolid = p->p_pool->pool_id;
2383 	psp->pr_zoneid = p->p_zone->zone_id;
2384 	if ((psp->pr_contract = PRCTID(p)) == 0)
2385 		psp->pr_contract = -1;
2386 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2387 	switch (p->p_model) {
2388 	case DATAMODEL_ILP32:
2389 		psp->pr_dmodel = PR_MODEL_ILP32;
2390 		break;
2391 	case DATAMODEL_LP64:
2392 		psp->pr_dmodel = PR_MODEL_LP64;
2393 		break;
2394 	}
2395 	hrutime = mstate_aggr_state(p, LMS_USER);
2396 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2397 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2398 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2399 
2400 	if (t == NULL) {
2401 		extern int wstat(int, int);	/* needs a header file */
2402 		int wcode = p->p_wcode;		/* must be atomic read */
2403 
2404 		if (wcode)
2405 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2406 		psp->pr_ttydev = PRNODEV32;
2407 		psp->pr_lwp.pr_state = SZOMB;
2408 		psp->pr_lwp.pr_sname = 'Z';
2409 	} else {
2410 		user_t *up = PTOU(p);
2411 		struct as *as;
2412 		dev_t d;
2413 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2414 
2415 		d = cttydev(p);
2416 		/*
2417 		 * If the controlling terminal is the real
2418 		 * or workstation console device, map to what the
2419 		 * user thinks is the console device.
2420 		 */
2421 		if (d == rwsconsdev || d == rconsdev)
2422 			d = uconsdev;
2423 		(void) cmpldev(&psp->pr_ttydev, d);
2424 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2425 		bcopy(up->u_comm, psp->pr_fname,
2426 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2427 		bcopy(up->u_psargs, psp->pr_psargs,
2428 		    MIN(PRARGSZ-1, PSARGSZ));
2429 		psp->pr_argc = up->u_argc;
2430 		psp->pr_argv = (caddr32_t)up->u_argv;
2431 		psp->pr_envp = (caddr32_t)up->u_envp;
2432 
2433 		/* get the chosen lwp's lwpsinfo */
2434 		prgetlwpsinfo32(t, &psp->pr_lwp);
2435 
2436 		/* compute %cpu for the process */
2437 		if (p->p_lwpcnt == 1)
2438 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2439 		else {
2440 			uint64_t pct = 0;
2441 			hrtime_t cur_time;
2442 
2443 			t = p->p_tlist;
2444 			cur_time = gethrtime_unscaled();
2445 			do {
2446 				pct += cpu_update_pct(t, cur_time);
2447 			} while ((t = t->t_forw) != p->p_tlist);
2448 
2449 			psp->pr_pctcpu = prgetpctcpu(pct);
2450 		}
2451 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2452 			psp->pr_size = 0;
2453 			psp->pr_rssize = 0;
2454 		} else {
2455 			mutex_exit(&p->p_lock);
2456 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2457 			psp->pr_size = (size32_t)
2458 				(btopr(rm_assize(as)) * (PAGESIZE / 1024));
2459 			psp->pr_rssize = (size32_t)
2460 				(rm_asrss(as) * (PAGESIZE / 1024));
2461 			psp->pr_pctmem = rm_pctmemory(as);
2462 			AS_LOCK_EXIT(as, &as->a_lock);
2463 			mutex_enter(&p->p_lock);
2464 		}
2465 	}
2466 
2467 	/*
2468 	 * If we are looking at an LP64 process, zero out
2469 	 * the fields that cannot be represented in ILP32.
2470 	 */
2471 	if (p->p_model != DATAMODEL_ILP32) {
2472 		psp->pr_size = 0;
2473 		psp->pr_rssize = 0;
2474 		psp->pr_argv = 0;
2475 		psp->pr_envp = 0;
2476 	}
2477 }
2478 #endif	/* _SYSCALL32_IMPL */
2479 
2480 void
2481 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2482 {
2483 	klwp_t *lwp = ttolwp(t);
2484 	sobj_ops_t *sobj;
2485 	char c, state;
2486 	uint64_t pct;
2487 	int retval, niceval;
2488 	hrtime_t hrutime, hrstime;
2489 
2490 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2491 
2492 	bzero(psp, sizeof (*psp));
2493 
2494 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2495 	psp->pr_lwpid = t->t_tid;
2496 	psp->pr_addr = (uintptr_t)t;
2497 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2498 
2499 	/* map the thread state enum into a process state enum */
2500 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2501 	switch (state) {
2502 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2503 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2504 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2505 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2506 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2507 	default:		state = 0;		c = '?';	break;
2508 	}
2509 	psp->pr_state = state;
2510 	psp->pr_sname = c;
2511 	if ((sobj = t->t_sobj_ops) != NULL)
2512 		psp->pr_stype = SOBJ_TYPE(sobj);
2513 	retval = CL_DONICE(t, NULL, 0, &niceval);
2514 	if (retval == 0) {
2515 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2516 		psp->pr_nice = niceval + NZERO;
2517 	}
2518 	psp->pr_syscall = t->t_sysnum;
2519 	psp->pr_pri = t->t_pri;
2520 	psp->pr_start.tv_sec = t->t_start;
2521 	psp->pr_start.tv_nsec = 0L;
2522 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2523 	scalehrtime(&hrutime);
2524 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2525 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2526 	scalehrtime(&hrstime);
2527 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2528 	/* compute %cpu for the lwp */
2529 	pct = cpu_update_pct(t, gethrtime_unscaled());
2530 	psp->pr_pctcpu = prgetpctcpu(pct);
2531 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2532 	if (psp->pr_cpu > 99)
2533 		psp->pr_cpu = 99;
2534 
2535 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2536 		sizeof (psp->pr_clname) - 1);
2537 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2538 	psp->pr_onpro = t->t_cpu->cpu_id;
2539 	psp->pr_bindpro = t->t_bind_cpu;
2540 	psp->pr_bindpset = t->t_bind_pset;
2541 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2542 }
2543 
2544 #ifdef _SYSCALL32_IMPL
2545 void
2546 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2547 {
2548 	proc_t *p = ttoproc(t);
2549 	klwp_t *lwp = ttolwp(t);
2550 	sobj_ops_t *sobj;
2551 	char c, state;
2552 	uint64_t pct;
2553 	int retval, niceval;
2554 	hrtime_t hrutime, hrstime;
2555 
2556 	ASSERT(MUTEX_HELD(&p->p_lock));
2557 
2558 	bzero(psp, sizeof (*psp));
2559 
2560 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2561 	psp->pr_lwpid = t->t_tid;
2562 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2563 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2564 
2565 	/* map the thread state enum into a process state enum */
2566 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2567 	switch (state) {
2568 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2569 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2570 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2571 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2572 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2573 	default:		state = 0;		c = '?';	break;
2574 	}
2575 	psp->pr_state = state;
2576 	psp->pr_sname = c;
2577 	if ((sobj = t->t_sobj_ops) != NULL)
2578 		psp->pr_stype = SOBJ_TYPE(sobj);
2579 	retval = CL_DONICE(t, NULL, 0, &niceval);
2580 	if (retval == 0) {
2581 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2582 		psp->pr_nice = niceval + NZERO;
2583 	} else {
2584 		psp->pr_oldpri = 0;
2585 		psp->pr_nice = 0;
2586 	}
2587 	psp->pr_syscall = t->t_sysnum;
2588 	psp->pr_pri = t->t_pri;
2589 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2590 	psp->pr_start.tv_nsec = 0L;
2591 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2592 	scalehrtime(&hrutime);
2593 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2594 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2595 	scalehrtime(&hrstime);
2596 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2597 	/* compute %cpu for the lwp */
2598 	pct = cpu_update_pct(t, gethrtime_unscaled());
2599 	psp->pr_pctcpu = prgetpctcpu(pct);
2600 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2601 	if (psp->pr_cpu > 99)
2602 		psp->pr_cpu = 99;
2603 
2604 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2605 		sizeof (psp->pr_clname) - 1);
2606 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2607 	psp->pr_onpro = t->t_cpu->cpu_id;
2608 	psp->pr_bindpro = t->t_bind_cpu;
2609 	psp->pr_bindpset = t->t_bind_pset;
2610 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2611 }
2612 #endif	/* _SYSCALL32_IMPL */
2613 
2614 /*
2615  * This used to get called when microstate accounting was disabled but
2616  * microstate information was requested.  Since Microstate accounting is on
2617  * regardless of the proc flags, this simply makes it appear to procfs that
2618  * microstate accounting is on.  This is relatively meaningless since you
2619  * can't turn it off, but this is here for the sake of appearances.
2620  */
2621 
2622 /*ARGSUSED*/
2623 void
2624 estimate_msacct(kthread_t *t, hrtime_t curtime)
2625 {
2626 	proc_t *p;
2627 
2628 	if (t == NULL)
2629 		return;
2630 
2631 	p = ttoproc(t);
2632 	ASSERT(MUTEX_HELD(&p->p_lock));
2633 
2634 	/*
2635 	 * A system process (p0) could be referenced if the thread is
2636 	 * in the process of exiting.  Don't turn on microstate accounting
2637 	 * in that case.
2638 	 */
2639 	if (p->p_flag & SSYS)
2640 		return;
2641 
2642 	/*
2643 	 * Loop through all the LWPs (kernel threads) in the process.
2644 	 */
2645 	t = p->p_tlist;
2646 	do {
2647 		t->t_proc_flag |= TP_MSACCT;
2648 	} while ((t = t->t_forw) != p->p_tlist);
2649 
2650 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2651 }
2652 
2653 /*
2654  * It's not really possible to disable microstate accounting anymore.
2655  * However, this routine simply turns off the ms accounting flags in a process
2656  * This way procfs can still pretend to turn microstate accounting on and
2657  * off for a process, but it actually doesn't do anything.  This is
2658  * a neutered form of preemptive idiot-proofing.
2659  */
2660 void
2661 disable_msacct(proc_t *p)
2662 {
2663 	kthread_t *t;
2664 
2665 	ASSERT(MUTEX_HELD(&p->p_lock));
2666 
2667 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2668 	/*
2669 	 * Loop through all the LWPs (kernel threads) in the process.
2670 	 */
2671 	if ((t = p->p_tlist) != NULL) {
2672 		do {
2673 			/* clear per-thread flag */
2674 			t->t_proc_flag &= ~TP_MSACCT;
2675 		} while ((t = t->t_forw) != p->p_tlist);
2676 	}
2677 }
2678 
2679 /*
2680  * Return resource usage information.
2681  */
2682 void
2683 prgetusage(kthread_t *t, prhusage_t *pup)
2684 {
2685 	klwp_t *lwp = ttolwp(t);
2686 	hrtime_t *mstimep;
2687 	struct mstate *ms = &lwp->lwp_mstate;
2688 	int state;
2689 	int i;
2690 	hrtime_t curtime;
2691 	hrtime_t waitrq;
2692 	hrtime_t tmp1;
2693 
2694 	curtime = gethrtime_unscaled();
2695 
2696 	pup->pr_lwpid	= t->t_tid;
2697 	pup->pr_count	= 1;
2698 	pup->pr_create	= ms->ms_start;
2699 	pup->pr_term    = ms->ms_term;
2700 	scalehrtime(&pup->pr_create);
2701 	scalehrtime(&pup->pr_term);
2702 	if (ms->ms_term == 0) {
2703 		pup->pr_rtime = curtime - ms->ms_start;
2704 		scalehrtime(&pup->pr_rtime);
2705 	} else {
2706 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2707 		scalehrtime(&pup->pr_rtime);
2708 	}
2709 
2710 
2711 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2712 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2713 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2714 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2715 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2716 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2717 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2718 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2719 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2720 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2721 
2722 	prscaleusage(pup);
2723 
2724 	/*
2725 	 * Adjust for time waiting in the dispatcher queue.
2726 	 */
2727 	waitrq = t->t_waitrq;	/* hopefully atomic */
2728 	if (waitrq != 0) {
2729 		tmp1 = curtime - waitrq;
2730 		scalehrtime(&tmp1);
2731 		pup->pr_wtime += tmp1;
2732 		curtime = waitrq;
2733 	}
2734 
2735 	/*
2736 	 * Adjust for time spent in current microstate.
2737 	 */
2738 	if (ms->ms_state_start > curtime) {
2739 		curtime = gethrtime_unscaled();
2740 	}
2741 
2742 	i = 0;
2743 	do {
2744 		switch (state = t->t_mstate) {
2745 		case LMS_SLEEP:
2746 			/*
2747 			 * Update the timer for the current sleep state.
2748 			 */
2749 			switch (state = ms->ms_prev) {
2750 			case LMS_TFAULT:
2751 			case LMS_DFAULT:
2752 			case LMS_KFAULT:
2753 			case LMS_USER_LOCK:
2754 				break;
2755 			default:
2756 				state = LMS_SLEEP;
2757 				break;
2758 			}
2759 			break;
2760 		case LMS_TFAULT:
2761 		case LMS_DFAULT:
2762 		case LMS_KFAULT:
2763 		case LMS_USER_LOCK:
2764 			state = LMS_SYSTEM;
2765 			break;
2766 		}
2767 		switch (state) {
2768 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2769 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2770 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2771 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2772 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2773 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2774 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2775 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2776 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2777 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2778 		default:		panic("prgetusage: unknown microstate");
2779 		}
2780 		tmp1 = curtime - ms->ms_state_start;
2781 		if (tmp1 < 0) {
2782 			curtime = gethrtime_unscaled();
2783 			i++;
2784 			continue;
2785 		}
2786 		scalehrtime(&tmp1);
2787 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2788 
2789 	*mstimep += tmp1;
2790 
2791 	/* update pup timestamp */
2792 	pup->pr_tstamp = curtime;
2793 	scalehrtime(&pup->pr_tstamp);
2794 
2795 	/*
2796 	 * Resource usage counters.
2797 	 */
2798 	pup->pr_minf  = lwp->lwp_ru.minflt;
2799 	pup->pr_majf  = lwp->lwp_ru.majflt;
2800 	pup->pr_nswap = lwp->lwp_ru.nswap;
2801 	pup->pr_inblk = lwp->lwp_ru.inblock;
2802 	pup->pr_oublk = lwp->lwp_ru.oublock;
2803 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2804 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2805 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2806 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2807 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2808 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2809 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2810 }
2811 
2812 /*
2813  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2814  */
2815 void
2816 prscaleusage(prhusage_t *usg)
2817 {
2818 	scalehrtime(&usg->pr_utime);
2819 	scalehrtime(&usg->pr_stime);
2820 	scalehrtime(&usg->pr_ttime);
2821 	scalehrtime(&usg->pr_tftime);
2822 	scalehrtime(&usg->pr_dftime);
2823 	scalehrtime(&usg->pr_kftime);
2824 	scalehrtime(&usg->pr_ltime);
2825 	scalehrtime(&usg->pr_slptime);
2826 	scalehrtime(&usg->pr_wtime);
2827 	scalehrtime(&usg->pr_stoptime);
2828 }
2829 
2830 
2831 /*
2832  * Sum resource usage information.
2833  */
2834 void
2835 praddusage(kthread_t *t, prhusage_t *pup)
2836 {
2837 	klwp_t *lwp = ttolwp(t);
2838 	hrtime_t *mstimep;
2839 	struct mstate *ms = &lwp->lwp_mstate;
2840 	int state;
2841 	int i;
2842 	hrtime_t curtime;
2843 	hrtime_t waitrq;
2844 	hrtime_t tmp;
2845 	prhusage_t conv;
2846 
2847 	curtime = gethrtime_unscaled();
2848 
2849 	if (ms->ms_term == 0) {
2850 		tmp = curtime - ms->ms_start;
2851 		scalehrtime(&tmp);
2852 		pup->pr_rtime += tmp;
2853 	} else {
2854 		tmp = ms->ms_term - ms->ms_start;
2855 		scalehrtime(&tmp);
2856 		pup->pr_rtime += tmp;
2857 	}
2858 
2859 	conv.pr_utime = ms->ms_acct[LMS_USER];
2860 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2861 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2862 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2863 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2864 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2865 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2866 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2867 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2868 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2869 
2870 	prscaleusage(&conv);
2871 
2872 	pup->pr_utime	+= conv.pr_utime;
2873 	pup->pr_stime	+= conv.pr_stime;
2874 	pup->pr_ttime	+= conv.pr_ttime;
2875 	pup->pr_tftime	+= conv.pr_tftime;
2876 	pup->pr_dftime	+= conv.pr_dftime;
2877 	pup->pr_kftime	+= conv.pr_kftime;
2878 	pup->pr_ltime	+= conv.pr_ltime;
2879 	pup->pr_slptime	+= conv.pr_slptime;
2880 	pup->pr_wtime	+= conv.pr_wtime;
2881 	pup->pr_stoptime += conv.pr_stoptime;
2882 
2883 	/*
2884 	 * Adjust for time waiting in the dispatcher queue.
2885 	 */
2886 	waitrq = t->t_waitrq;	/* hopefully atomic */
2887 	if (waitrq != 0) {
2888 		tmp = curtime - waitrq;
2889 		scalehrtime(&tmp);
2890 		pup->pr_wtime += tmp;
2891 		curtime = waitrq;
2892 	}
2893 
2894 	/*
2895 	 * Adjust for time spent in current microstate.
2896 	 */
2897 	if (ms->ms_state_start > curtime) {
2898 		curtime = gethrtime_unscaled();
2899 	}
2900 
2901 	i = 0;
2902 	do {
2903 		switch (state = t->t_mstate) {
2904 		case LMS_SLEEP:
2905 			/*
2906 			 * Update the timer for the current sleep state.
2907 			 */
2908 			switch (state = ms->ms_prev) {
2909 			case LMS_TFAULT:
2910 			case LMS_DFAULT:
2911 			case LMS_KFAULT:
2912 			case LMS_USER_LOCK:
2913 				break;
2914 			default:
2915 				state = LMS_SLEEP;
2916 				break;
2917 			}
2918 			break;
2919 		case LMS_TFAULT:
2920 		case LMS_DFAULT:
2921 		case LMS_KFAULT:
2922 		case LMS_USER_LOCK:
2923 			state = LMS_SYSTEM;
2924 			break;
2925 		}
2926 		switch (state) {
2927 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2928 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2929 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2930 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2931 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2932 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2933 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2934 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2935 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2936 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2937 		default:		panic("praddusage: unknown microstate");
2938 		}
2939 		tmp = curtime - ms->ms_state_start;
2940 		if (tmp < 0) {
2941 			curtime = gethrtime_unscaled();
2942 			i++;
2943 			continue;
2944 		}
2945 		scalehrtime(&tmp);
2946 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2947 
2948 	*mstimep += tmp;
2949 
2950 	/* update pup timestamp */
2951 	pup->pr_tstamp = curtime;
2952 	scalehrtime(&pup->pr_tstamp);
2953 
2954 	/*
2955 	 * Resource usage counters.
2956 	 */
2957 	pup->pr_minf  += lwp->lwp_ru.minflt;
2958 	pup->pr_majf  += lwp->lwp_ru.majflt;
2959 	pup->pr_nswap += lwp->lwp_ru.nswap;
2960 	pup->pr_inblk += lwp->lwp_ru.inblock;
2961 	pup->pr_oublk += lwp->lwp_ru.oublock;
2962 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2963 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2964 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2965 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2966 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2967 	pup->pr_sysc  += lwp->lwp_ru.sysc;
2968 	pup->pr_ioch  += lwp->lwp_ru.ioch;
2969 }
2970 
2971 /*
2972  * Convert a prhusage_t to a prusage_t.
2973  * This means convert each hrtime_t to a timestruc_t
2974  * and copy the count fields uint64_t => ulong_t.
2975  */
2976 void
2977 prcvtusage(prhusage_t *pup, prusage_t *upup)
2978 {
2979 	uint64_t *ullp;
2980 	ulong_t *ulp;
2981 	int i;
2982 
2983 	upup->pr_lwpid = pup->pr_lwpid;
2984 	upup->pr_count = pup->pr_count;
2985 
2986 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
2987 	hrt2ts(pup->pr_create,	&upup->pr_create);
2988 	hrt2ts(pup->pr_term,	&upup->pr_term);
2989 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
2990 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
2991 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
2992 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
2993 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
2994 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
2995 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
2996 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
2997 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
2998 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
2999 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3000 	bzero(upup->filltime, sizeof (upup->filltime));
3001 
3002 	ullp = &pup->pr_minf;
3003 	ulp = &upup->pr_minf;
3004 	for (i = 0; i < 22; i++)
3005 		*ulp++ = (ulong_t)*ullp++;
3006 }
3007 
3008 #ifdef _SYSCALL32_IMPL
3009 void
3010 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3011 {
3012 	uint64_t *ullp;
3013 	uint32_t *ulp;
3014 	int i;
3015 
3016 	upup->pr_lwpid = pup->pr_lwpid;
3017 	upup->pr_count = pup->pr_count;
3018 
3019 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3020 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3021 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3022 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3023 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3024 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3025 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3026 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3027 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3028 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3029 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3030 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3031 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3032 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3033 	bzero(upup->filltime, sizeof (upup->filltime));
3034 
3035 	ullp = &pup->pr_minf;
3036 	ulp = &upup->pr_minf;
3037 	for (i = 0; i < 22; i++)
3038 		*ulp++ = (uint32_t)*ullp++;
3039 }
3040 #endif	/* _SYSCALL32_IMPL */
3041 
3042 /*
3043  * Determine whether a set is empty.
3044  */
3045 int
3046 setisempty(uint32_t *sp, uint_t n)
3047 {
3048 	while (n--)
3049 		if (*sp++)
3050 			return (0);
3051 	return (1);
3052 }
3053 
3054 /*
3055  * Utility routine for establishing a watched area in the process.
3056  * Keep the list of watched areas sorted by virtual address.
3057  */
3058 int
3059 set_watched_area(proc_t *p, struct watched_area *pwa)
3060 {
3061 	caddr_t vaddr = pwa->wa_vaddr;
3062 	caddr_t eaddr = pwa->wa_eaddr;
3063 	ulong_t flags = pwa->wa_flags;
3064 	struct watched_area *target;
3065 	avl_index_t where;
3066 	int error = 0;
3067 
3068 	/* we must not be holding p->p_lock, but the process must be locked */
3069 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3070 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3071 
3072 	/*
3073 	 * If this is our first watchpoint, enable watchpoints for the process.
3074 	 */
3075 	if (!pr_watch_active(p)) {
3076 		kthread_t *t;
3077 
3078 		mutex_enter(&p->p_lock);
3079 		if ((t = p->p_tlist) != NULL) {
3080 			do {
3081 				watch_enable(t);
3082 			} while ((t = t->t_forw) != p->p_tlist);
3083 		}
3084 		mutex_exit(&p->p_lock);
3085 	}
3086 
3087 	target = pr_find_watched_area(p, pwa, &where);
3088 	if (target != NULL) {
3089 		/*
3090 		 * We discovered an existing, overlapping watched area.
3091 		 * Allow it only if it is an exact match.
3092 		 */
3093 		if (target->wa_vaddr != vaddr ||
3094 		    target->wa_eaddr != eaddr)
3095 			error = EINVAL;
3096 		else if (target->wa_flags != flags) {
3097 			error = set_watched_page(p, vaddr, eaddr,
3098 			    flags, target->wa_flags);
3099 			target->wa_flags = flags;
3100 		}
3101 		kmem_free(pwa, sizeof (struct watched_area));
3102 	} else {
3103 		avl_insert(&p->p_warea, pwa, where);
3104 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3105 	}
3106 
3107 	return (error);
3108 }
3109 
3110 /*
3111  * Utility routine for clearing a watched area in the process.
3112  * Must be an exact match of the virtual address.
3113  * size and flags don't matter.
3114  */
3115 int
3116 clear_watched_area(proc_t *p, struct watched_area *pwa)
3117 {
3118 	struct watched_area *found;
3119 
3120 	/* we must not be holding p->p_lock, but the process must be locked */
3121 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3122 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3123 
3124 
3125 	if (!pr_watch_active(p)) {
3126 		kmem_free(pwa, sizeof (struct watched_area));
3127 		return (0);
3128 	}
3129 
3130 	/*
3131 	 * Look for a matching address in the watched areas.  If a match is
3132 	 * found, clear the old watched area and adjust the watched page(s).  It
3133 	 * is not an error if there is no match.
3134 	 */
3135 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3136 	    found->wa_vaddr == pwa->wa_vaddr) {
3137 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3138 		    found->wa_flags);
3139 		avl_remove(&p->p_warea, found);
3140 		kmem_free(found, sizeof (struct watched_area));
3141 	}
3142 
3143 	kmem_free(pwa, sizeof (struct watched_area));
3144 
3145 	/*
3146 	 * If we removed the last watched area from the process, disable
3147 	 * watchpoints.
3148 	 */
3149 	if (!pr_watch_active(p)) {
3150 		kthread_t *t;
3151 
3152 		mutex_enter(&p->p_lock);
3153 		if ((t = p->p_tlist) != NULL) {
3154 			do {
3155 				watch_disable(t);
3156 			} while ((t = t->t_forw) != p->p_tlist);
3157 		}
3158 		mutex_exit(&p->p_lock);
3159 	}
3160 
3161 	return (0);
3162 }
3163 
3164 /*
3165  * Frees all the watched_area structures
3166  */
3167 void
3168 pr_free_watchpoints(proc_t *p)
3169 {
3170 	struct watched_area *delp;
3171 	void *cookie;
3172 
3173 	cookie = NULL;
3174 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3175 		kmem_free(delp, sizeof (struct watched_area));
3176 
3177 	avl_destroy(&p->p_warea);
3178 }
3179 
3180 /*
3181  * This one is called by the traced process to unwatch all the
3182  * pages while deallocating the list of watched_page structs.
3183  */
3184 void
3185 pr_free_watched_pages(proc_t *p)
3186 {
3187 	struct as *as = p->p_as;
3188 	struct watched_page *pwp;
3189 	uint_t prot;
3190 	int    retrycnt, err;
3191 	void *cookie;
3192 
3193 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3194 		return;
3195 
3196 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3197 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3198 
3199 	pwp = avl_first(&as->a_wpage);
3200 
3201 	cookie = NULL;
3202 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3203 		retrycnt = 0;
3204 		if ((prot = pwp->wp_oprot) != 0) {
3205 			caddr_t addr = pwp->wp_vaddr;
3206 			struct seg *seg;
3207 		retry:
3208 
3209 			if ((pwp->wp_prot != prot ||
3210 			    (pwp->wp_flags & WP_NOWATCH)) &&
3211 			    (seg = as_segat(as, addr)) != NULL) {
3212 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3213 				if (err == IE_RETRY) {
3214 					ASSERT(retrycnt == 0);
3215 					retrycnt++;
3216 					goto retry;
3217 				}
3218 			}
3219 		}
3220 		kmem_free(pwp, sizeof (struct watched_page));
3221 	}
3222 
3223 	avl_destroy(&as->a_wpage);
3224 	p->p_wprot = NULL;
3225 
3226 	AS_LOCK_EXIT(as, &as->a_lock);
3227 }
3228 
3229 /*
3230  * Insert a watched area into the list of watched pages.
3231  * If oflags is zero then we are adding a new watched area.
3232  * Otherwise we are changing the flags of an existing watched area.
3233  */
3234 static int
3235 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3236 	ulong_t flags, ulong_t oflags)
3237 {
3238 	struct as *as = p->p_as;
3239 	avl_tree_t *pwp_tree;
3240 	struct watched_page *pwp, *newpwp;
3241 	struct watched_page tpw;
3242 	avl_index_t where;
3243 	struct seg *seg;
3244 	uint_t prot;
3245 	caddr_t addr;
3246 
3247 	/*
3248 	 * We need to pre-allocate a list of structures before we grab the
3249 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3250 	 * held.
3251 	 */
3252 	newpwp = NULL;
3253 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3254 	    addr < eaddr; addr += PAGESIZE) {
3255 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3256 		pwp->wp_list = newpwp;
3257 		newpwp = pwp;
3258 	}
3259 
3260 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3261 
3262 	/*
3263 	 * Search for an existing watched page to contain the watched area.
3264 	 * If none is found, grab a new one from the available list
3265 	 * and insert it in the active list, keeping the list sorted
3266 	 * by user-level virtual address.
3267 	 */
3268 	if (p->p_flag & SVFWAIT)
3269 		pwp_tree = &p->p_wpage;
3270 	else
3271 		pwp_tree = &as->a_wpage;
3272 
3273 again:
3274 	if (avl_numnodes(pwp_tree) > prnwatch) {
3275 		AS_LOCK_EXIT(as, &as->a_lock);
3276 		while (newpwp != NULL) {
3277 			pwp = newpwp->wp_list;
3278 			kmem_free(newpwp, sizeof (struct watched_page));
3279 			newpwp = pwp;
3280 		}
3281 		return (E2BIG);
3282 	}
3283 
3284 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3285 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3286 		pwp = newpwp;
3287 		newpwp = newpwp->wp_list;
3288 		pwp->wp_list = NULL;
3289 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3290 		    (uintptr_t)PAGEMASK);
3291 		avl_insert(pwp_tree, pwp, where);
3292 	}
3293 
3294 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3295 
3296 	if (oflags & WA_READ)
3297 		pwp->wp_read--;
3298 	if (oflags & WA_WRITE)
3299 		pwp->wp_write--;
3300 	if (oflags & WA_EXEC)
3301 		pwp->wp_exec--;
3302 
3303 	ASSERT(pwp->wp_read >= 0);
3304 	ASSERT(pwp->wp_write >= 0);
3305 	ASSERT(pwp->wp_exec >= 0);
3306 
3307 	if (flags & WA_READ)
3308 		pwp->wp_read++;
3309 	if (flags & WA_WRITE)
3310 		pwp->wp_write++;
3311 	if (flags & WA_EXEC)
3312 		pwp->wp_exec++;
3313 
3314 	if (!(p->p_flag & SVFWAIT)) {
3315 		vaddr = pwp->wp_vaddr;
3316 		if (pwp->wp_oprot == 0 &&
3317 		    (seg = as_segat(as, vaddr)) != NULL) {
3318 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3319 			pwp->wp_oprot = (uchar_t)prot;
3320 			pwp->wp_prot = (uchar_t)prot;
3321 		}
3322 		if (pwp->wp_oprot != 0) {
3323 			prot = pwp->wp_oprot;
3324 			if (pwp->wp_read)
3325 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3326 			if (pwp->wp_write)
3327 				prot &= ~PROT_WRITE;
3328 			if (pwp->wp_exec)
3329 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3330 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3331 			    pwp->wp_prot != prot &&
3332 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3333 				pwp->wp_flags |= WP_SETPROT;
3334 				pwp->wp_list = p->p_wprot;
3335 				p->p_wprot = pwp;
3336 			}
3337 			pwp->wp_prot = (uchar_t)prot;
3338 		}
3339 	}
3340 
3341 	/*
3342 	 * If the watched area extends into the next page then do
3343 	 * it over again with the virtual address of the next page.
3344 	 */
3345 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3346 		goto again;
3347 
3348 	AS_LOCK_EXIT(as, &as->a_lock);
3349 
3350 	/*
3351 	 * Free any pages we may have over-allocated
3352 	 */
3353 	while (newpwp != NULL) {
3354 		pwp = newpwp->wp_list;
3355 		kmem_free(newpwp, sizeof (struct watched_page));
3356 		newpwp = pwp;
3357 	}
3358 
3359 	return (0);
3360 }
3361 
3362 /*
3363  * Remove a watched area from the list of watched pages.
3364  * A watched area may extend over more than one page.
3365  */
3366 static void
3367 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3368 {
3369 	struct as *as = p->p_as;
3370 	struct watched_page *pwp;
3371 	struct watched_page tpw;
3372 	avl_tree_t *tree;
3373 	avl_index_t where;
3374 
3375 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3376 
3377 	if (p->p_flag & SVFWAIT)
3378 		tree = &p->p_wpage;
3379 	else
3380 		tree = &as->a_wpage;
3381 
3382 	tpw.wp_vaddr = vaddr =
3383 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3384 	pwp = avl_find(tree, &tpw, &where);
3385 	if (pwp == NULL)
3386 		pwp = avl_nearest(tree, where, AVL_AFTER);
3387 
3388 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3389 		ASSERT(vaddr <=  pwp->wp_vaddr);
3390 
3391 		if (flags & WA_READ)
3392 			pwp->wp_read--;
3393 		if (flags & WA_WRITE)
3394 			pwp->wp_write--;
3395 		if (flags & WA_EXEC)
3396 			pwp->wp_exec--;
3397 
3398 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3399 			/*
3400 			 * Reset the hat layer's protections on this page.
3401 			 */
3402 			if (pwp->wp_oprot != 0) {
3403 				uint_t prot = pwp->wp_oprot;
3404 
3405 				if (pwp->wp_read)
3406 					prot &=
3407 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3408 				if (pwp->wp_write)
3409 					prot &= ~PROT_WRITE;
3410 				if (pwp->wp_exec)
3411 					prot &=
3412 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3413 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3414 				    pwp->wp_prot != prot &&
3415 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3416 					pwp->wp_flags |= WP_SETPROT;
3417 					pwp->wp_list = p->p_wprot;
3418 					p->p_wprot = pwp;
3419 				}
3420 				pwp->wp_prot = (uchar_t)prot;
3421 			}
3422 		} else {
3423 			/*
3424 			 * No watched areas remain in this page.
3425 			 * Reset everything to normal.
3426 			 */
3427 			if (pwp->wp_oprot != 0) {
3428 				pwp->wp_prot = pwp->wp_oprot;
3429 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3430 					pwp->wp_flags |= WP_SETPROT;
3431 					pwp->wp_list = p->p_wprot;
3432 					p->p_wprot = pwp;
3433 				}
3434 			}
3435 		}
3436 
3437 		pwp = AVL_NEXT(tree, pwp);
3438 	}
3439 
3440 	AS_LOCK_EXIT(as, &as->a_lock);
3441 }
3442 
3443 /*
3444  * Return the original protections for the specified page.
3445  */
3446 static void
3447 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3448 {
3449 	struct watched_page *pwp;
3450 	struct watched_page tpw;
3451 
3452 	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3453 
3454 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3455 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3456 		*prot = pwp->wp_oprot;
3457 }
3458 
3459 static prpagev_t *
3460 pr_pagev_create(struct seg *seg, int check_noreserve)
3461 {
3462 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3463 	size_t total_pages = seg_pages(seg);
3464 
3465 	/*
3466 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3467 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3468 	 * to about a megabyte of kernel heap by default.
3469 	 */
3470 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3471 	pagev->pg_pnbase = 0;
3472 
3473 	pagev->pg_protv =
3474 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3475 
3476 	if (check_noreserve)
3477 		pagev->pg_incore =
3478 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3479 	else
3480 		pagev->pg_incore = NULL;
3481 
3482 	return (pagev);
3483 }
3484 
3485 static void
3486 pr_pagev_destroy(prpagev_t *pagev)
3487 {
3488 	if (pagev->pg_incore != NULL)
3489 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3490 
3491 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3492 	kmem_free(pagev, sizeof (prpagev_t));
3493 }
3494 
3495 static caddr_t
3496 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3497 {
3498 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3499 	ulong_t pn, pnlim;
3500 	caddr_t saddr;
3501 	size_t len;
3502 
3503 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3504 
3505 	if (addr == eaddr)
3506 		return (eaddr);
3507 
3508 refill:
3509 	ASSERT(addr < eaddr);
3510 	pagev->pg_pnbase = seg_page(seg, addr);
3511 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3512 	saddr = addr;
3513 
3514 	if (lastpg < pnlim)
3515 		len = (size_t)(eaddr - addr);
3516 	else
3517 		len = pagev->pg_npages * PAGESIZE;
3518 
3519 	if (pagev->pg_incore != NULL) {
3520 		/*
3521 		 * INCORE cleverly has different semantics than GETPROT:
3522 		 * it returns info on pages up to but NOT including addr + len.
3523 		 */
3524 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3525 		pn = pagev->pg_pnbase;
3526 
3527 		do {
3528 			/*
3529 			 * Guilty knowledge here:  We know that segvn_incore
3530 			 * returns more than just the low-order bit that
3531 			 * indicates the page is actually in memory.  If any
3532 			 * bits are set, then the page has backing store.
3533 			 */
3534 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3535 				goto out;
3536 
3537 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3538 
3539 		/*
3540 		 * If we examined all the pages in the vector but we're not
3541 		 * at the end of the segment, take another lap.
3542 		 */
3543 		if (addr < eaddr)
3544 			goto refill;
3545 	}
3546 
3547 	/*
3548 	 * Need to take len - 1 because addr + len is the address of the
3549 	 * first byte of the page just past the end of what we want.
3550 	 */
3551 out:
3552 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3553 	return (addr);
3554 }
3555 
3556 static caddr_t
3557 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3558     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3559 {
3560 	/*
3561 	 * Our starting address is either the specified address, or the base
3562 	 * address from the start of the pagev.  If the latter is greater,
3563 	 * this means a previous call to pr_pagev_fill has already scanned
3564 	 * further than the end of the previous mapping.
3565 	 */
3566 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3567 	caddr_t addr = MAX(*saddrp, base);
3568 	ulong_t pn = seg_page(seg, addr);
3569 	uint_t prot, nprot;
3570 
3571 	/*
3572 	 * If we're dealing with noreserve pages, then advance addr to
3573 	 * the address of the next page which has backing store.
3574 	 */
3575 	if (pagev->pg_incore != NULL) {
3576 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3577 			if ((addr += PAGESIZE) == eaddr) {
3578 				*saddrp = addr;
3579 				prot = 0;
3580 				goto out;
3581 			}
3582 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3583 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3584 				if (addr == eaddr) {
3585 					*saddrp = addr;
3586 					prot = 0;
3587 					goto out;
3588 				}
3589 				pn = seg_page(seg, addr);
3590 			}
3591 		}
3592 	}
3593 
3594 	/*
3595 	 * Get the protections on the page corresponding to addr.
3596 	 */
3597 	pn = seg_page(seg, addr);
3598 	ASSERT(pn >= pagev->pg_pnbase);
3599 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3600 
3601 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3602 	getwatchprot(seg->s_as, addr, &prot);
3603 	*saddrp = addr;
3604 
3605 	/*
3606 	 * Now loop until we find a backed page with different protections
3607 	 * or we reach the end of this segment.
3608 	 */
3609 	while ((addr += PAGESIZE) < eaddr) {
3610 		/*
3611 		 * If pn has advanced to the page number following what we
3612 		 * have information on, refill the page vector and reset
3613 		 * addr and pn.  If pr_pagev_fill does not return the
3614 		 * address of the next page, we have a discontiguity and
3615 		 * thus have reached the end of the current mapping.
3616 		 */
3617 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3618 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3619 			if (naddr != addr)
3620 				goto out;
3621 			pn = seg_page(seg, addr);
3622 		}
3623 
3624 		/*
3625 		 * The previous page's protections are in prot, and it has
3626 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3627 		 * then end this mapping and return the previous protections.
3628 		 */
3629 		if (pagev->pg_incore != NULL &&
3630 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3631 			break;
3632 
3633 		/*
3634 		 * Otherwise end the mapping if this page's protections (nprot)
3635 		 * are different than those in the previous page (prot).
3636 		 */
3637 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3638 		getwatchprot(seg->s_as, addr, &nprot);
3639 
3640 		if (nprot != prot)
3641 			break;
3642 	}
3643 
3644 out:
3645 	*protp = prot;
3646 	return (addr);
3647 }
3648 
3649 size_t
3650 pr_getsegsize(struct seg *seg, int reserved)
3651 {
3652 	size_t size = seg->s_size;
3653 
3654 	/*
3655 	 * If we're interested in the reserved space, return the size of the
3656 	 * segment itself.  Everything else in this function is a special case
3657 	 * to determine the actual underlying size of various segment types.
3658 	 */
3659 	if (reserved)
3660 		return (size);
3661 
3662 	/*
3663 	 * If this is a segvn mapping of a regular file, return the smaller
3664 	 * of the segment size and the remaining size of the file beyond
3665 	 * the file offset corresponding to seg->s_base.
3666 	 */
3667 	if (seg->s_ops == &segvn_ops) {
3668 		vattr_t vattr;
3669 		vnode_t *vp;
3670 
3671 		vattr.va_mask = AT_SIZE;
3672 
3673 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3674 		    vp != NULL && vp->v_type == VREG &&
3675 		    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
3676 
3677 			u_offset_t fsize = vattr.va_size;
3678 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3679 
3680 			if (fsize < offset)
3681 				fsize = 0;
3682 			else
3683 				fsize -= offset;
3684 
3685 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3686 
3687 			if (fsize < (u_offset_t)size)
3688 				size = (size_t)fsize;
3689 		}
3690 
3691 		return (size);
3692 	}
3693 
3694 	/*
3695 	 * If this is an ISM shared segment, don't include pages that are
3696 	 * beyond the real size of the spt segment that backs it.
3697 	 */
3698 	if (seg->s_ops == &segspt_shmops)
3699 		return (MIN(spt_realsize(seg), size));
3700 
3701 	/*
3702 	 * If this is segment is a mapping from /dev/null, then this is a
3703 	 * reservation of virtual address space and has no actual size.
3704 	 * Such segments are backed by segdev and have type set to neither
3705 	 * MAP_SHARED nor MAP_PRIVATE.
3706 	 */
3707 	if (seg->s_ops == &segdev_ops &&
3708 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3709 		(MAP_SHARED | MAP_PRIVATE)) == 0))
3710 		return (0);
3711 
3712 	/*
3713 	 * If this segment doesn't match one of the special types we handle,
3714 	 * just return the size of the segment itself.
3715 	 */
3716 	return (size);
3717 }
3718 
3719 uint_t
3720 pr_getprot(struct seg *seg, int reserved, void **tmp,
3721 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3722 {
3723 	struct as *as = seg->s_as;
3724 
3725 	caddr_t saddr = *saddrp;
3726 	caddr_t naddr;
3727 
3728 	int check_noreserve;
3729 	uint_t prot;
3730 
3731 	union {
3732 		struct segvn_data *svd;
3733 		struct segdev_data *sdp;
3734 		void *data;
3735 	} s;
3736 
3737 	s.data = seg->s_data;
3738 
3739 	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3740 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3741 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3742 
3743 	/*
3744 	 * Don't include MAP_NORESERVE pages in the address range
3745 	 * unless their mappings have actually materialized.
3746 	 * We cheat by knowing that segvn is the only segment
3747 	 * driver that supports MAP_NORESERVE.
3748 	 */
3749 	check_noreserve =
3750 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3751 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3752 	    (s.svd->flags & MAP_NORESERVE));
3753 
3754 	/*
3755 	 * Examine every page only as a last resort.  We use guilty knowledge
3756 	 * of segvn and segdev to avoid this: if there are no per-page
3757 	 * protections present in the segment and we don't care about
3758 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3759 	 */
3760 	if (!check_noreserve && saddr == seg->s_base &&
3761 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3762 		prot = s.svd->prot;
3763 		getwatchprot(as, saddr, &prot);
3764 		naddr = eaddr;
3765 
3766 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3767 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3768 		prot = s.sdp->prot;
3769 		getwatchprot(as, saddr, &prot);
3770 		naddr = eaddr;
3771 
3772 	} else {
3773 		prpagev_t *pagev;
3774 
3775 		/*
3776 		 * If addr is sitting at the start of the segment, then
3777 		 * create a page vector to store protection and incore
3778 		 * information for pages in the segment, and fill it.
3779 		 * Otherwise, we expect *tmp to address the prpagev_t
3780 		 * allocated by a previous call to this function.
3781 		 */
3782 		if (saddr == seg->s_base) {
3783 			pagev = pr_pagev_create(seg, check_noreserve);
3784 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3785 
3786 			ASSERT(*tmp == NULL);
3787 			*tmp = pagev;
3788 
3789 			ASSERT(saddr <= eaddr);
3790 			*saddrp = saddr;
3791 
3792 			if (saddr == eaddr) {
3793 				naddr = saddr;
3794 				prot = 0;
3795 				goto out;
3796 			}
3797 
3798 		} else {
3799 			ASSERT(*tmp != NULL);
3800 			pagev = (prpagev_t *)*tmp;
3801 		}
3802 
3803 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3804 		ASSERT(naddr <= eaddr);
3805 	}
3806 
3807 out:
3808 	if (naddr == eaddr)
3809 		pr_getprot_done(tmp);
3810 	*naddrp = naddr;
3811 	return (prot);
3812 }
3813 
3814 void
3815 pr_getprot_done(void **tmp)
3816 {
3817 	if (*tmp != NULL) {
3818 		pr_pagev_destroy((prpagev_t *)*tmp);
3819 		*tmp = NULL;
3820 	}
3821 }
3822 
3823 /*
3824  * Return true iff the vnode is a /proc file from the object directory.
3825  */
3826 int
3827 pr_isobject(vnode_t *vp)
3828 {
3829 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3830 }
3831 
3832 /*
3833  * Return true iff the vnode is a /proc file opened by the process itself.
3834  */
3835 int
3836 pr_isself(vnode_t *vp)
3837 {
3838 	/*
3839 	 * XXX: To retain binary compatibility with the old
3840 	 * ioctl()-based version of /proc, we exempt self-opens
3841 	 * of /proc/<pid> from being marked close-on-exec.
3842 	 */
3843 	return (vn_matchops(vp, prvnodeops) &&
3844 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3845 	    VTOP(vp)->pr_type != PR_PIDDIR);
3846 }
3847 
3848 static ssize_t
3849 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3850 {
3851 	ssize_t pagesize, hatsize;
3852 
3853 	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3854 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3855 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3856 	ASSERT(saddr < eaddr);
3857 
3858 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3859 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3860 	ASSERT(pagesize != 0);
3861 
3862 	if (pagesize == -1)
3863 		pagesize = PAGESIZE;
3864 
3865 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3866 
3867 	while (saddr < eaddr) {
3868 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3869 			break;
3870 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3871 		saddr += pagesize;
3872 	}
3873 
3874 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3875 	return (hatsize);
3876 }
3877 
3878 /*
3879  * Return an array of structures with extended memory map information.
3880  * We allocate here; the caller must deallocate.
3881  */
3882 int
3883 prgetxmap(proc_t *p, list_t *iolhead)
3884 {
3885 	struct as *as = p->p_as;
3886 	prxmap_t *mp;
3887 	struct seg *seg;
3888 	struct seg *brkseg, *stkseg;
3889 	struct vnode *vp;
3890 	struct vattr vattr;
3891 	uint_t prot;
3892 
3893 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3894 
3895 	/*
3896 	 * Request an initial buffer size that doesn't waste memory
3897 	 * if the address space has only a small number of segments.
3898 	 */
3899 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3900 
3901 	if ((seg = AS_SEGFIRST(as)) == NULL)
3902 		return (0);
3903 
3904 	brkseg = break_seg(p);
3905 	stkseg = as_segat(as, prgetstackbase(p));
3906 
3907 	do {
3908 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3909 		caddr_t saddr, naddr, baddr;
3910 		void *tmp = NULL;
3911 		ssize_t psz;
3912 		char *parr;
3913 		uint64_t npages;
3914 		uint64_t pagenum;
3915 
3916 		/*
3917 		 * Segment loop part one: iterate from the base of the segment
3918 		 * to its end, pausing at each address boundary (baddr) between
3919 		 * ranges that have different virtual memory protections.
3920 		 */
3921 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3922 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3923 			ASSERT(baddr >= saddr && baddr <= eaddr);
3924 
3925 			/*
3926 			 * Segment loop part two: iterate from the current
3927 			 * position to the end of the protection boundary,
3928 			 * pausing at each address boundary (naddr) between
3929 			 * ranges that have different underlying page sizes.
3930 			 */
3931 			for (; saddr < baddr; saddr = naddr) {
3932 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3933 				ASSERT(naddr >= saddr && naddr <= baddr);
3934 
3935 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3936 
3937 				mp->pr_vaddr = (uintptr_t)saddr;
3938 				mp->pr_size = naddr - saddr;
3939 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3940 				mp->pr_mflags = 0;
3941 				if (prot & PROT_READ)
3942 					mp->pr_mflags |= MA_READ;
3943 				if (prot & PROT_WRITE)
3944 					mp->pr_mflags |= MA_WRITE;
3945 				if (prot & PROT_EXEC)
3946 					mp->pr_mflags |= MA_EXEC;
3947 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3948 					mp->pr_mflags |= MA_SHARED;
3949 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3950 					mp->pr_mflags |= MA_NORESERVE;
3951 				if (seg->s_ops == &segspt_shmops ||
3952 				    (seg->s_ops == &segvn_ops &&
3953 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3954 				    vp == NULL)))
3955 					mp->pr_mflags |= MA_ANON;
3956 				if (seg == brkseg)
3957 					mp->pr_mflags |= MA_BREAK;
3958 				else if (seg == stkseg)
3959 					mp->pr_mflags |= MA_STACK;
3960 				if (seg->s_ops == &segspt_shmops)
3961 					mp->pr_mflags |= MA_ISM | MA_SHM;
3962 
3963 				mp->pr_pagesize = PAGESIZE;
3964 				if (psz == -1) {
3965 					mp->pr_hatpagesize = 0;
3966 				} else {
3967 					mp->pr_hatpagesize = psz;
3968 				}
3969 
3970 				/*
3971 				 * Manufacture a filename for the "object" dir.
3972 				 */
3973 				mp->pr_dev = PRNODEV;
3974 				vattr.va_mask = AT_FSID|AT_NODEID;
3975 				if (seg->s_ops == &segvn_ops &&
3976 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
3977 				    vp != NULL && vp->v_type == VREG &&
3978 				    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
3979 					mp->pr_dev = vattr.va_fsid;
3980 					mp->pr_ino = vattr.va_nodeid;
3981 					if (vp == p->p_exec)
3982 						(void) strcpy(mp->pr_mapname,
3983 						    "a.out");
3984 					else
3985 						pr_object_name(mp->pr_mapname,
3986 						    vp, &vattr);
3987 				}
3988 
3989 				/*
3990 				 * Get the SysV shared memory id, if any.
3991 				 */
3992 				if ((mp->pr_mflags & MA_SHARED) &&
3993 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
3994 				    seg->s_base)) != SHMID_NONE) {
3995 					if (mp->pr_shmid == SHMID_FREE)
3996 						mp->pr_shmid = -1;
3997 
3998 					mp->pr_mflags |= MA_SHM;
3999 				} else {
4000 					mp->pr_shmid = -1;
4001 				}
4002 
4003 				npages = ((uintptr_t)(naddr - saddr)) >>
4004 				    PAGESHIFT;
4005 				parr = kmem_zalloc(npages, KM_SLEEP);
4006 
4007 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4008 
4009 				for (pagenum = 0; pagenum < npages; pagenum++) {
4010 					if (parr[pagenum] & SEG_PAGE_INCORE)
4011 						mp->pr_rss++;
4012 					if (parr[pagenum] & SEG_PAGE_ANON)
4013 						mp->pr_anon++;
4014 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4015 						mp->pr_locked++;
4016 				}
4017 				kmem_free(parr, npages);
4018 			}
4019 		}
4020 		ASSERT(tmp == NULL);
4021 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4022 
4023 	return (0);
4024 }
4025 
4026 /*
4027  * Return the process's credentials.  We don't need a 32-bit equivalent of
4028  * this function because prcred_t and prcred32_t are actually the same.
4029  */
4030 void
4031 prgetcred(proc_t *p, prcred_t *pcrp)
4032 {
4033 	mutex_enter(&p->p_crlock);
4034 	cred2prcred(p->p_cred, pcrp);
4035 	mutex_exit(&p->p_crlock);
4036 }
4037 
4038 /*
4039  * Compute actual size of the prpriv_t structure.
4040  */
4041 
4042 size_t
4043 prgetprivsize(void)
4044 {
4045 	return (priv_prgetprivsize(NULL));
4046 }
4047 
4048 /*
4049  * Return the process's privileges.  We don't need a 32-bit equivalent of
4050  * this function because prpriv_t and prpriv32_t are actually the same.
4051  */
4052 void
4053 prgetpriv(proc_t *p, prpriv_t *pprp)
4054 {
4055 	mutex_enter(&p->p_crlock);
4056 	cred2prpriv(p->p_cred, pprp);
4057 	mutex_exit(&p->p_crlock);
4058 }
4059 
4060 #ifdef _SYSCALL32_IMPL
4061 /*
4062  * Return an array of structures with HAT memory map information.
4063  * We allocate here; the caller must deallocate.
4064  */
4065 int
4066 prgetxmap32(proc_t *p, list_t *iolhead)
4067 {
4068 	struct as *as = p->p_as;
4069 	prxmap32_t *mp;
4070 	struct seg *seg;
4071 	struct seg *brkseg, *stkseg;
4072 	struct vnode *vp;
4073 	struct vattr vattr;
4074 	uint_t prot;
4075 
4076 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4077 
4078 	/*
4079 	 * Request an initial buffer size that doesn't waste memory
4080 	 * if the address space has only a small number of segments.
4081 	 */
4082 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4083 
4084 	if ((seg = AS_SEGFIRST(as)) == NULL)
4085 		return (0);
4086 
4087 	brkseg = break_seg(p);
4088 	stkseg = as_segat(as, prgetstackbase(p));
4089 
4090 	do {
4091 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4092 		caddr_t saddr, naddr, baddr;
4093 		void *tmp = NULL;
4094 		ssize_t psz;
4095 		char *parr;
4096 		uint64_t npages;
4097 		uint64_t pagenum;
4098 
4099 		/*
4100 		 * Segment loop part one: iterate from the base of the segment
4101 		 * to its end, pausing at each address boundary (baddr) between
4102 		 * ranges that have different virtual memory protections.
4103 		 */
4104 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4105 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4106 			ASSERT(baddr >= saddr && baddr <= eaddr);
4107 
4108 			/*
4109 			 * Segment loop part two: iterate from the current
4110 			 * position to the end of the protection boundary,
4111 			 * pausing at each address boundary (naddr) between
4112 			 * ranges that have different underlying page sizes.
4113 			 */
4114 			for (; saddr < baddr; saddr = naddr) {
4115 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4116 				ASSERT(naddr >= saddr && naddr <= baddr);
4117 
4118 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4119 
4120 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4121 				mp->pr_size = (size32_t)(naddr - saddr);
4122 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4123 				mp->pr_mflags = 0;
4124 				if (prot & PROT_READ)
4125 					mp->pr_mflags |= MA_READ;
4126 				if (prot & PROT_WRITE)
4127 					mp->pr_mflags |= MA_WRITE;
4128 				if (prot & PROT_EXEC)
4129 					mp->pr_mflags |= MA_EXEC;
4130 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4131 					mp->pr_mflags |= MA_SHARED;
4132 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4133 					mp->pr_mflags |= MA_NORESERVE;
4134 				if (seg->s_ops == &segspt_shmops ||
4135 				    (seg->s_ops == &segvn_ops &&
4136 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4137 				    vp == NULL)))
4138 					mp->pr_mflags |= MA_ANON;
4139 				if (seg == brkseg)
4140 					mp->pr_mflags |= MA_BREAK;
4141 				else if (seg == stkseg)
4142 					mp->pr_mflags |= MA_STACK;
4143 				if (seg->s_ops == &segspt_shmops)
4144 					mp->pr_mflags |= MA_ISM | MA_SHM;
4145 
4146 				mp->pr_pagesize = PAGESIZE;
4147 				if (psz == -1) {
4148 					mp->pr_hatpagesize = 0;
4149 				} else {
4150 					mp->pr_hatpagesize = psz;
4151 				}
4152 
4153 				/*
4154 				 * Manufacture a filename for the "object" dir.
4155 				 */
4156 				mp->pr_dev = PRNODEV32;
4157 				vattr.va_mask = AT_FSID|AT_NODEID;
4158 				if (seg->s_ops == &segvn_ops &&
4159 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4160 				    vp != NULL && vp->v_type == VREG &&
4161 				    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
4162 					(void) cmpldev(&mp->pr_dev,
4163 					    vattr.va_fsid);
4164 					mp->pr_ino = vattr.va_nodeid;
4165 					if (vp == p->p_exec)
4166 						(void) strcpy(mp->pr_mapname,
4167 						    "a.out");
4168 					else
4169 						pr_object_name(mp->pr_mapname,
4170 						    vp, &vattr);
4171 				}
4172 
4173 				/*
4174 				 * Get the SysV shared memory id, if any.
4175 				 */
4176 				if ((mp->pr_mflags & MA_SHARED) &&
4177 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4178 				    seg->s_base)) != SHMID_NONE) {
4179 					if (mp->pr_shmid == SHMID_FREE)
4180 						mp->pr_shmid = -1;
4181 
4182 					mp->pr_mflags |= MA_SHM;
4183 				} else {
4184 					mp->pr_shmid = -1;
4185 				}
4186 
4187 				npages = ((uintptr_t)(naddr - saddr)) >>
4188 				    PAGESHIFT;
4189 				parr = kmem_zalloc(npages, KM_SLEEP);
4190 
4191 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4192 
4193 				for (pagenum = 0; pagenum < npages; pagenum++) {
4194 					if (parr[pagenum] & SEG_PAGE_INCORE)
4195 						mp->pr_rss++;
4196 					if (parr[pagenum] & SEG_PAGE_ANON)
4197 						mp->pr_anon++;
4198 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4199 						mp->pr_locked++;
4200 				}
4201 				kmem_free(parr, npages);
4202 			}
4203 		}
4204 		ASSERT(tmp == NULL);
4205 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4206 
4207 	return (0);
4208 }
4209 #endif	/* _SYSCALL32_IMPL */
4210