xref: /titanic_44/usr/src/uts/common/fs/proc/prsubr.c (revision 3b890a5b92df88d9d90b2d7ac57e8c1e93e59e6a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cred.h>
37 #include <sys/priv.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/inline.h>
41 #include <sys/kmem.h>
42 #include <sys/mman.h>
43 #include <sys/proc.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <vm/as.h>
70 #include <vm/rm.h>
71 #include <vm/seg.h>
72 #include <vm/seg_vn.h>
73 #include <vm/seg_dev.h>
74 #include <vm/seg_spt.h>
75 #include <vm/page.h>
76 #include <sys/vmparam.h>
77 #include <sys/swap.h>
78 #include <fs/proc/prdata.h>
79 #include <sys/task.h>
80 #include <sys/project.h>
81 #include <sys/contract_impl.h>
82 #include <sys/contract/process.h>
83 #include <sys/contract/process_impl.h>
84 #include <sys/schedctl.h>
85 #include <sys/pool.h>
86 #include <sys/zone.h>
87 #include <sys/atomic.h>
88 #include <sys/sdt.h>
89 
90 #define	MAX_ITERS_SPIN	5
91 
92 typedef struct prpagev {
93 	uint_t *pg_protv;	/* vector of page permissions */
94 	char *pg_incore;	/* vector of incore flags */
95 	size_t pg_npages;	/* number of pages in protv and incore */
96 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
97 } prpagev_t;
98 
99 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
100 
101 extern struct seg_ops segdev_ops;	/* needs a header file */
102 extern struct seg_ops segspt_shmops;	/* needs a header file */
103 
104 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
105 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
106 
107 /*
108  * Choose an lwp from the complete set of lwps for the process.
109  * This is called for any operation applied to the process
110  * file descriptor that requires an lwp to operate upon.
111  *
112  * Returns a pointer to the thread for the selected LWP,
113  * and with the dispatcher lock held for the thread.
114  *
115  * The algorithm for choosing an lwp is critical for /proc semantics;
116  * don't touch this code unless you know all of the implications.
117  */
118 kthread_t *
119 prchoose(proc_t *p)
120 {
121 	kthread_t *t;
122 	kthread_t *t_onproc = NULL;	/* running on processor */
123 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
124 	kthread_t *t_sleep = NULL;	/* sleeping */
125 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
126 	kthread_t *t_susp = NULL;	/* suspended stop */
127 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
128 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
129 	kthread_t *t_req = NULL;	/* requested stop */
130 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
131 
132 	ASSERT(MUTEX_HELD(&p->p_lock));
133 
134 	/*
135 	 * If the agent lwp exists, it takes precedence over all others.
136 	 */
137 	if ((t = p->p_agenttp) != NULL) {
138 		thread_lock(t);
139 		return (t);
140 	}
141 
142 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
143 		return (t);
144 	do {		/* for eacn lwp in the process */
145 		if (VSTOPPED(t)) {	/* virtually stopped */
146 			if (t_req == NULL)
147 				t_req = t;
148 			continue;
149 		}
150 
151 		thread_lock(t);		/* make sure thread is in good state */
152 		switch (t->t_state) {
153 		default:
154 			panic("prchoose: bad thread state %d, thread 0x%p",
155 			    t->t_state, (void *)t);
156 			/*NOTREACHED*/
157 		case TS_SLEEP:
158 			/* this is filthy */
159 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 			    t->t_wchan0 == NULL) {
161 				if (t_hold == NULL)
162 					t_hold = t;
163 			} else {
164 				if (t_sleep == NULL)
165 					t_sleep = t;
166 			}
167 			break;
168 		case TS_RUN:
169 			if (t_run == NULL)
170 				t_run = t;
171 			break;
172 		case TS_ONPROC:
173 			if (t_onproc == NULL)
174 				t_onproc = t;
175 			break;
176 		case TS_ZOMB:		/* last possible choice */
177 			break;
178 		case TS_STOPPED:
179 			switch (t->t_whystop) {
180 			case PR_SUSPENDED:
181 				if (t_susp == NULL)
182 					t_susp = t;
183 				break;
184 			case PR_JOBCONTROL:
185 				if (t->t_proc_flag & TP_PRSTOP) {
186 					if (t_jdstop == NULL)
187 						t_jdstop = t;
188 				} else {
189 					if (t_jstop == NULL)
190 						t_jstop = t;
191 				}
192 				break;
193 			case PR_REQUESTED:
194 				if (t_req == NULL)
195 					t_req = t;
196 				break;
197 			case PR_SYSENTRY:
198 			case PR_SYSEXIT:
199 			case PR_SIGNALLED:
200 			case PR_FAULTED:
201 				/*
202 				 * Make an lwp calling exit() be the
203 				 * last lwp seen in the process.
204 				 */
205 				if (t_istop == NULL ||
206 				    (t_istop->t_whystop == PR_SYSENTRY &&
207 				    t_istop->t_whatstop == SYS_exit))
208 					t_istop = t;
209 				break;
210 			case PR_CHECKPOINT:	/* can't happen? */
211 				break;
212 			default:
213 				panic("prchoose: bad t_whystop %d, thread 0x%p",
214 				    t->t_whystop, (void *)t);
215 				/*NOTREACHED*/
216 			}
217 			break;
218 		}
219 		thread_unlock(t);
220 	} while ((t = t->t_forw) != p->p_tlist);
221 
222 	if (t_onproc)
223 		t = t_onproc;
224 	else if (t_run)
225 		t = t_run;
226 	else if (t_sleep)
227 		t = t_sleep;
228 	else if (t_jstop)
229 		t = t_jstop;
230 	else if (t_jdstop)
231 		t = t_jdstop;
232 	else if (t_istop)
233 		t = t_istop;
234 	else if (t_req)
235 		t = t_req;
236 	else if (t_hold)
237 		t = t_hold;
238 	else if (t_susp)
239 		t = t_susp;
240 	else			/* TS_ZOMB */
241 		t = p->p_tlist;
242 
243 	if (t != NULL)
244 		thread_lock(t);
245 	return (t);
246 }
247 
248 /*
249  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
250  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
251  * on the /proc file descriptor.  Called from stop() when a traced
252  * process stops on an event of interest.  Also called from exit()
253  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
254  */
255 void
256 prnotify(struct vnode *vp)
257 {
258 	prcommon_t *pcp = VTOP(vp)->pr_common;
259 
260 	mutex_enter(&pcp->prc_mutex);
261 	cv_broadcast(&pcp->prc_wait);
262 	mutex_exit(&pcp->prc_mutex);
263 	if (pcp->prc_flags & PRC_POLL) {
264 		/*
265 		 * We call pollwakeup() with POLLHUP to ensure that
266 		 * the pollers are awakened even if they are polling
267 		 * for nothing (i.e., waiting for the process to exit).
268 		 * This enables the use of the PRC_POLL flag for optimization
269 		 * (we can turn off PRC_POLL only if we know no pollers remain).
270 		 */
271 		pcp->prc_flags &= ~PRC_POLL;
272 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
273 	}
274 }
275 
276 /* called immediately below, in prfree() */
277 static void
278 prfreenotify(vnode_t *vp)
279 {
280 	prnode_t *pnp;
281 	prcommon_t *pcp;
282 
283 	while (vp != NULL) {
284 		pnp = VTOP(vp);
285 		pcp = pnp->pr_common;
286 		ASSERT(pcp->prc_thread == NULL);
287 		pcp->prc_proc = NULL;
288 		/*
289 		 * We can't call prnotify() here because we are holding
290 		 * pidlock.  We assert that there is no need to.
291 		 */
292 		mutex_enter(&pcp->prc_mutex);
293 		cv_broadcast(&pcp->prc_wait);
294 		mutex_exit(&pcp->prc_mutex);
295 		ASSERT(!(pcp->prc_flags & PRC_POLL));
296 
297 		vp = pnp->pr_next;
298 		pnp->pr_next = NULL;
299 	}
300 }
301 
302 /*
303  * Called from a hook in freeproc() when a traced process is removed
304  * from the process table.  The proc-table pointers of all associated
305  * /proc vnodes are cleared to indicate that the process has gone away.
306  */
307 void
308 prfree(proc_t *p)
309 {
310 	uint_t slot = p->p_slot;
311 
312 	ASSERT(MUTEX_HELD(&pidlock));
313 
314 	/*
315 	 * Block the process against /proc so it can be freed.
316 	 * It cannot be freed while locked by some controlling process.
317 	 * Lock ordering:
318 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
319 	 */
320 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
321 	mutex_enter(&p->p_lock);
322 	while (p->p_proc_flag & P_PR_LOCK) {
323 		mutex_exit(&pr_pidlock);
324 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
325 		mutex_exit(&p->p_lock);
326 		mutex_enter(&pr_pidlock);
327 		mutex_enter(&p->p_lock);
328 	}
329 
330 	ASSERT(p->p_tlist == NULL);
331 
332 	prfreenotify(p->p_plist);
333 	p->p_plist = NULL;
334 
335 	prfreenotify(p->p_trace);
336 	p->p_trace = NULL;
337 
338 	/*
339 	 * We broadcast to wake up everyone waiting for this process.
340 	 * No one can reach this process from this point on.
341 	 */
342 	cv_broadcast(&pr_pid_cv[slot]);
343 
344 	mutex_exit(&p->p_lock);
345 	mutex_exit(&pr_pidlock);
346 }
347 
348 /*
349  * Called from a hook in exit() when a traced process is becoming a zombie.
350  */
351 void
352 prexit(proc_t *p)
353 {
354 	ASSERT(MUTEX_HELD(&p->p_lock));
355 
356 	if (pr_watch_active(p)) {
357 		pr_free_watchpoints(p);
358 		watch_disable(curthread);
359 	}
360 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
361 	if (p->p_trace) {
362 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
363 		prnotify(p->p_trace);
364 	}
365 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
366 }
367 
368 /*
369  * Called when a thread calls lwp_exit().
370  */
371 void
372 prlwpexit(kthread_t *t)
373 {
374 	vnode_t *vp;
375 	prnode_t *pnp;
376 	prcommon_t *pcp;
377 	proc_t *p = ttoproc(t);
378 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
379 
380 	ASSERT(t == curthread);
381 	ASSERT(MUTEX_HELD(&p->p_lock));
382 
383 	/*
384 	 * The process must be blocked against /proc to do this safely.
385 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
386 	 * It is the caller's responsibility to have called prbarrier(p).
387 	 */
388 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
389 
390 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
391 		pnp = VTOP(vp);
392 		pcp = pnp->pr_common;
393 		if (pcp->prc_thread == t) {
394 			pcp->prc_thread = NULL;
395 			pcp->prc_flags |= PRC_DESTROY;
396 		}
397 	}
398 
399 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
400 		pnp = VTOP(vp);
401 		pcp = pnp->pr_common;
402 		pcp->prc_thread = NULL;
403 		pcp->prc_flags |= PRC_DESTROY;
404 		prnotify(vp);
405 	}
406 
407 	if (p->p_trace)
408 		prnotify(p->p_trace);
409 }
410 
411 /*
412  * Called when a zombie thread is joined or when a
413  * detached lwp exits.  Called from lwp_hash_out().
414  */
415 void
416 prlwpfree(proc_t *p, lwpent_t *lep)
417 {
418 	vnode_t *vp;
419 	prnode_t *pnp;
420 	prcommon_t *pcp;
421 
422 	ASSERT(MUTEX_HELD(&p->p_lock));
423 
424 	/*
425 	 * The process must be blocked against /proc to do this safely.
426 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
427 	 * It is the caller's responsibility to have called prbarrier(p).
428 	 */
429 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
430 
431 	vp = lep->le_trace;
432 	lep->le_trace = NULL;
433 	while (vp) {
434 		prnotify(vp);
435 		pnp = VTOP(vp);
436 		pcp = pnp->pr_common;
437 		ASSERT(pcp->prc_thread == NULL &&
438 		    (pcp->prc_flags & PRC_DESTROY));
439 		pcp->prc_tslot = -1;
440 		vp = pnp->pr_next;
441 		pnp->pr_next = NULL;
442 	}
443 
444 	if (p->p_trace)
445 		prnotify(p->p_trace);
446 }
447 
448 /*
449  * Called from a hook in exec() when a thread starts exec().
450  */
451 void
452 prexecstart(void)
453 {
454 	proc_t *p = ttoproc(curthread);
455 	klwp_t *lwp = ttolwp(curthread);
456 
457 	/*
458 	 * The P_PR_EXEC flag blocks /proc operations for
459 	 * the duration of the exec().
460 	 * We can't start exec() while the process is
461 	 * locked by /proc, so we call prbarrier().
462 	 * lwp_nostop keeps the process from being stopped
463 	 * via job control for the duration of the exec().
464 	 */
465 
466 	ASSERT(MUTEX_HELD(&p->p_lock));
467 	prbarrier(p);
468 	lwp->lwp_nostop++;
469 	p->p_proc_flag |= P_PR_EXEC;
470 }
471 
472 /*
473  * Called from a hook in exec() when a thread finishes exec().
474  * The thread may or may not have succeeded.  Some other thread
475  * may have beat it to the punch.
476  */
477 void
478 prexecend(void)
479 {
480 	proc_t *p = ttoproc(curthread);
481 	klwp_t *lwp = ttolwp(curthread);
482 	vnode_t *vp;
483 	prnode_t *pnp;
484 	prcommon_t *pcp;
485 	model_t model = p->p_model;
486 	id_t tid = curthread->t_tid;
487 	int tslot = curthread->t_dslot;
488 
489 	ASSERT(MUTEX_HELD(&p->p_lock));
490 
491 	lwp->lwp_nostop--;
492 	if (p->p_flag & SEXITLWPS) {
493 		/*
494 		 * We are on our way to exiting because some
495 		 * other thread beat us in the race to exec().
496 		 * Don't clear the P_PR_EXEC flag in this case.
497 		 */
498 		return;
499 	}
500 
501 	/*
502 	 * Wake up anyone waiting in /proc for the process to complete exec().
503 	 */
504 	p->p_proc_flag &= ~P_PR_EXEC;
505 	if ((vp = p->p_trace) != NULL) {
506 		pcp = VTOP(vp)->pr_common;
507 		mutex_enter(&pcp->prc_mutex);
508 		cv_broadcast(&pcp->prc_wait);
509 		mutex_exit(&pcp->prc_mutex);
510 		for (; vp != NULL; vp = pnp->pr_next) {
511 			pnp = VTOP(vp);
512 			pnp->pr_common->prc_datamodel = model;
513 		}
514 	}
515 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
516 		/*
517 		 * We dealt with the process common above.
518 		 */
519 		ASSERT(p->p_trace != NULL);
520 		pcp = VTOP(vp)->pr_common;
521 		mutex_enter(&pcp->prc_mutex);
522 		cv_broadcast(&pcp->prc_wait);
523 		mutex_exit(&pcp->prc_mutex);
524 		for (; vp != NULL; vp = pnp->pr_next) {
525 			pnp = VTOP(vp);
526 			pcp = pnp->pr_common;
527 			pcp->prc_datamodel = model;
528 			pcp->prc_tid = tid;
529 			pcp->prc_tslot = tslot;
530 		}
531 	}
532 }
533 
534 /*
535  * Called from a hook in relvm() just before freeing the address space.
536  * We free all the watched areas now.
537  */
538 void
539 prrelvm(void)
540 {
541 	proc_t *p = ttoproc(curthread);
542 
543 	mutex_enter(&p->p_lock);
544 	prbarrier(p);	/* block all other /proc operations */
545 	if (pr_watch_active(p)) {
546 		pr_free_watchpoints(p);
547 		watch_disable(curthread);
548 	}
549 	mutex_exit(&p->p_lock);
550 	pr_free_watched_pages(p);
551 }
552 
553 /*
554  * Called from hooks in exec-related code when a traced process
555  * attempts to exec(2) a setuid/setgid program or an unreadable
556  * file.  Rather than fail the exec we invalidate the associated
557  * /proc vnodes so that subsequent attempts to use them will fail.
558  *
559  * All /proc vnodes, except directory vnodes, are retained on a linked
560  * list (rooted at p_plist in the process structure) until last close.
561  *
562  * A controlling process must re-open the /proc files in order to
563  * regain control.
564  */
565 void
566 prinvalidate(struct user *up)
567 {
568 	kthread_t *t = curthread;
569 	proc_t *p = ttoproc(t);
570 	vnode_t *vp;
571 	prnode_t *pnp;
572 	int writers = 0;
573 
574 	mutex_enter(&p->p_lock);
575 	prbarrier(p);	/* block all other /proc operations */
576 
577 	/*
578 	 * At this moment, there can be only one lwp in the process.
579 	 */
580 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
581 
582 	/*
583 	 * Invalidate any currently active /proc vnodes.
584 	 */
585 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
586 		pnp = VTOP(vp);
587 		switch (pnp->pr_type) {
588 		case PR_PSINFO:		/* these files can read by anyone */
589 		case PR_LPSINFO:
590 		case PR_LWPSINFO:
591 		case PR_LWPDIR:
592 		case PR_LWPIDDIR:
593 		case PR_USAGE:
594 		case PR_LUSAGE:
595 		case PR_LWPUSAGE:
596 			break;
597 		default:
598 			pnp->pr_flags |= PR_INVAL;
599 			break;
600 		}
601 	}
602 	/*
603 	 * Wake up anyone waiting for the process or lwp.
604 	 * p->p_trace is guaranteed to be non-NULL if there
605 	 * are any open /proc files for this process.
606 	 */
607 	if ((vp = p->p_trace) != NULL) {
608 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
609 
610 		prnotify(vp);
611 		/*
612 		 * Are there any writers?
613 		 */
614 		if ((writers = pcp->prc_writers) != 0) {
615 			/*
616 			 * Clear the exclusive open flag (old /proc interface).
617 			 * Set prc_selfopens equal to prc_writers so that
618 			 * the next O_EXCL|O_WRITE open will succeed
619 			 * even with existing (though invalid) writers.
620 			 * prclose() must decrement prc_selfopens when
621 			 * the invalid files are closed.
622 			 */
623 			pcp->prc_flags &= ~PRC_EXCL;
624 			ASSERT(pcp->prc_selfopens <= writers);
625 			pcp->prc_selfopens = writers;
626 		}
627 	}
628 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
629 	while (vp != NULL) {
630 		/*
631 		 * We should not invalidate the lwpiddir vnodes,
632 		 * but the necessities of maintaining the old
633 		 * ioctl()-based version of /proc require it.
634 		 */
635 		pnp = VTOP(vp);
636 		pnp->pr_flags |= PR_INVAL;
637 		prnotify(vp);
638 		vp = pnp->pr_next;
639 	}
640 
641 	/*
642 	 * If any tracing flags are in effect and any vnodes are open for
643 	 * writing then set the requested-stop and run-on-last-close flags.
644 	 * Otherwise, clear all tracing flags.
645 	 */
646 	t->t_proc_flag &= ~TP_PAUSE;
647 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
648 		t->t_proc_flag |= TP_PRSTOP;
649 		aston(t);		/* so ISSIG will see the flag */
650 		p->p_proc_flag |= P_PR_RUNLCL;
651 	} else {
652 		premptyset(&up->u_entrymask);		/* syscalls */
653 		premptyset(&up->u_exitmask);
654 		up->u_systrap = 0;
655 		premptyset(&p->p_sigmask);		/* signals */
656 		premptyset(&p->p_fltmask);		/* faults */
657 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
658 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
659 		prnostep(ttolwp(t));
660 	}
661 
662 	mutex_exit(&p->p_lock);
663 }
664 
665 /*
666  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
667  * Return with pr_pidlock held in all cases.
668  * Return with p_lock held if the the process still exists.
669  * Return value is the process pointer if the process still exists, else NULL.
670  * If we lock the process, give ourself kernel priority to avoid deadlocks;
671  * this is undone in prunlock().
672  */
673 proc_t *
674 pr_p_lock(prnode_t *pnp)
675 {
676 	proc_t *p;
677 	prcommon_t *pcp;
678 
679 	mutex_enter(&pr_pidlock);
680 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
681 		return (NULL);
682 	mutex_enter(&p->p_lock);
683 	while (p->p_proc_flag & P_PR_LOCK) {
684 		/*
685 		 * This cv/mutex pair is persistent even if
686 		 * the process disappears while we sleep.
687 		 */
688 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
689 		kmutex_t *mp = &p->p_lock;
690 
691 		mutex_exit(&pr_pidlock);
692 		cv_wait(cv, mp);
693 		mutex_exit(mp);
694 		mutex_enter(&pr_pidlock);
695 		if (pcp->prc_proc == NULL)
696 			return (NULL);
697 		ASSERT(p == pcp->prc_proc);
698 		mutex_enter(&p->p_lock);
699 	}
700 	p->p_proc_flag |= P_PR_LOCK;
701 	THREAD_KPRI_REQUEST();
702 	return (p);
703 }
704 
705 /*
706  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
707  * This prevents any lwp of the process from disappearing and
708  * blocks most operations that a process can perform on itself.
709  * Returns 0 on success, a non-zero error number on failure.
710  *
711  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
712  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
713  *
714  * error returns:
715  *	ENOENT: process or lwp has disappeared or process is exiting
716  *		(or has become a zombie and zdisp == ZNO).
717  *	EAGAIN: procfs vnode has become invalid.
718  *	EINTR:  signal arrived while waiting for exec to complete.
719  */
720 int
721 prlock(prnode_t *pnp, int zdisp)
722 {
723 	prcommon_t *pcp;
724 	proc_t *p;
725 
726 again:
727 	pcp = pnp->pr_common;
728 	p = pr_p_lock(pnp);
729 	mutex_exit(&pr_pidlock);
730 
731 	/*
732 	 * Return ENOENT immediately if there is no process.
733 	 */
734 	if (p == NULL)
735 		return (ENOENT);
736 
737 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
738 
739 	/*
740 	 * Return ENOENT if process entered zombie state or is exiting
741 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
742 	 */
743 	if (zdisp == ZNO &&
744 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
745 		prunlock(pnp);
746 		return (ENOENT);
747 	}
748 
749 	/*
750 	 * If lwp-specific, check to see if lwp has disappeared.
751 	 */
752 	if (pcp->prc_flags & PRC_LWP) {
753 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
754 		    pcp->prc_tslot == -1) {
755 			prunlock(pnp);
756 			return (ENOENT);
757 		}
758 	}
759 
760 	/*
761 	 * Return EAGAIN if we have encountered a security violation.
762 	 * (The process exec'd a set-id or unreadable executable file.)
763 	 */
764 	if (pnp->pr_flags & PR_INVAL) {
765 		prunlock(pnp);
766 		return (EAGAIN);
767 	}
768 
769 	/*
770 	 * If process is undergoing an exec(), wait for
771 	 * completion and then start all over again.
772 	 */
773 	if (p->p_proc_flag & P_PR_EXEC) {
774 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
775 		mutex_enter(&pcp->prc_mutex);
776 		prunlock(pnp);
777 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
778 			mutex_exit(&pcp->prc_mutex);
779 			return (EINTR);
780 		}
781 		mutex_exit(&pcp->prc_mutex);
782 		goto again;
783 	}
784 
785 	/*
786 	 * We return holding p->p_lock.
787 	 */
788 	return (0);
789 }
790 
791 /*
792  * Undo prlock() and pr_p_lock().
793  * p->p_lock is still held; pr_pidlock is no longer held.
794  *
795  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
796  * if any, waiting for the flag to be dropped; it retains p->p_lock.
797  *
798  * prunlock() calls prunmark() and then drops p->p_lock.
799  */
800 void
801 prunmark(proc_t *p)
802 {
803 	ASSERT(p->p_proc_flag & P_PR_LOCK);
804 	ASSERT(MUTEX_HELD(&p->p_lock));
805 
806 	cv_signal(&pr_pid_cv[p->p_slot]);
807 	p->p_proc_flag &= ~P_PR_LOCK;
808 	THREAD_KPRI_RELEASE();
809 }
810 
811 void
812 prunlock(prnode_t *pnp)
813 {
814 	prcommon_t *pcp = pnp->pr_common;
815 	proc_t *p = pcp->prc_proc;
816 
817 	/*
818 	 * If we (or someone) gave it a SIGKILL, and it is not
819 	 * already a zombie, set it running unconditionally.
820 	 */
821 	if ((p->p_flag & SKILLED) &&
822 	    !(p->p_flag & SEXITING) &&
823 	    !(pcp->prc_flags & PRC_DESTROY) &&
824 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
825 		(void) pr_setrun(pnp, 0);
826 	prunmark(p);
827 	mutex_exit(&p->p_lock);
828 }
829 
830 /*
831  * Called while holding p->p_lock to delay until the process is unlocked.
832  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
833  * The process cannot become locked again until p->p_lock is dropped.
834  */
835 void
836 prbarrier(proc_t *p)
837 {
838 	ASSERT(MUTEX_HELD(&p->p_lock));
839 
840 	if (p->p_proc_flag & P_PR_LOCK) {
841 		/* The process is locked; delay until not locked */
842 		uint_t slot = p->p_slot;
843 
844 		while (p->p_proc_flag & P_PR_LOCK)
845 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
846 		cv_signal(&pr_pid_cv[slot]);
847 	}
848 }
849 
850 /*
851  * Return process/lwp status.
852  * The u-block is mapped in by this routine and unmapped at the end.
853  */
854 void
855 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
856 {
857 	kthread_t *t;
858 
859 	ASSERT(MUTEX_HELD(&p->p_lock));
860 
861 	t = prchoose(p);	/* returns locked thread */
862 	ASSERT(t != NULL);
863 	thread_unlock(t);
864 
865 	/* just bzero the process part, prgetlwpstatus() does the rest */
866 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
867 	sp->pr_nlwp = p->p_lwpcnt;
868 	sp->pr_nzomb = p->p_zombcnt;
869 	prassignset(&sp->pr_sigpend, &p->p_sig);
870 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
871 	sp->pr_brksize = p->p_brksize;
872 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
873 	sp->pr_stksize = p->p_stksize;
874 	sp->pr_pid = p->p_pid;
875 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
876 	    (p->p_flag & SZONETOP)) {
877 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
878 		/*
879 		 * Inside local zones, fake zsched's pid as parent pids for
880 		 * processes which reference processes outside of the zone.
881 		 */
882 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
883 	} else {
884 		sp->pr_ppid = p->p_ppid;
885 	}
886 	sp->pr_pgid  = p->p_pgrp;
887 	sp->pr_sid   = p->p_sessp->s_sid;
888 	sp->pr_taskid = p->p_task->tk_tkid;
889 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
890 	sp->pr_zoneid = p->p_zone->zone_id;
891 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
892 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
893 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
894 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
895 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
896 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
897 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
898 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
899 	switch (p->p_model) {
900 	case DATAMODEL_ILP32:
901 		sp->pr_dmodel = PR_MODEL_ILP32;
902 		break;
903 	case DATAMODEL_LP64:
904 		sp->pr_dmodel = PR_MODEL_LP64;
905 		break;
906 	}
907 	if (p->p_agenttp)
908 		sp->pr_agentid = p->p_agenttp->t_tid;
909 
910 	/* get the chosen lwp's status */
911 	prgetlwpstatus(t, &sp->pr_lwp, zp);
912 
913 	/* replicate the flags */
914 	sp->pr_flags = sp->pr_lwp.pr_flags;
915 }
916 
917 #ifdef _SYSCALL32_IMPL
918 void
919 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
920 {
921 	proc_t *p = ttoproc(t);
922 	klwp_t *lwp = ttolwp(t);
923 	struct mstate *ms = &lwp->lwp_mstate;
924 	hrtime_t usr, sys;
925 	int flags;
926 	ulong_t instr;
927 
928 	ASSERT(MUTEX_HELD(&p->p_lock));
929 
930 	bzero(sp, sizeof (*sp));
931 	flags = 0L;
932 	if (t->t_state == TS_STOPPED) {
933 		flags |= PR_STOPPED;
934 		if ((t->t_schedflag & TS_PSTART) == 0)
935 			flags |= PR_ISTOP;
936 	} else if (VSTOPPED(t)) {
937 		flags |= PR_STOPPED|PR_ISTOP;
938 	}
939 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
940 		flags |= PR_DSTOP;
941 	if (lwp->lwp_asleep)
942 		flags |= PR_ASLEEP;
943 	if (t == p->p_agenttp)
944 		flags |= PR_AGENT;
945 	if (!(t->t_proc_flag & TP_TWAIT))
946 		flags |= PR_DETACH;
947 	if (t->t_proc_flag & TP_DAEMON)
948 		flags |= PR_DAEMON;
949 	if (p->p_proc_flag & P_PR_FORK)
950 		flags |= PR_FORK;
951 	if (p->p_proc_flag & P_PR_RUNLCL)
952 		flags |= PR_RLC;
953 	if (p->p_proc_flag & P_PR_KILLCL)
954 		flags |= PR_KLC;
955 	if (p->p_proc_flag & P_PR_ASYNC)
956 		flags |= PR_ASYNC;
957 	if (p->p_proc_flag & P_PR_BPTADJ)
958 		flags |= PR_BPTADJ;
959 	if (p->p_proc_flag & P_PR_PTRACE)
960 		flags |= PR_PTRACE;
961 	if (p->p_flag & SMSACCT)
962 		flags |= PR_MSACCT;
963 	if (p->p_flag & SMSFORK)
964 		flags |= PR_MSFORK;
965 	if (p->p_flag & SVFWAIT)
966 		flags |= PR_VFORKP;
967 	sp->pr_flags = flags;
968 	if (VSTOPPED(t)) {
969 		sp->pr_why   = PR_REQUESTED;
970 		sp->pr_what  = 0;
971 	} else {
972 		sp->pr_why   = t->t_whystop;
973 		sp->pr_what  = t->t_whatstop;
974 	}
975 	sp->pr_lwpid = t->t_tid;
976 	sp->pr_cursig  = lwp->lwp_cursig;
977 	prassignset(&sp->pr_lwppend, &t->t_sig);
978 	schedctl_finish_sigblock(t);
979 	prassignset(&sp->pr_lwphold, &t->t_hold);
980 	if (t->t_whystop == PR_FAULTED) {
981 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
982 		if (t->t_whatstop == FLTPAGE)
983 			sp->pr_info.si_addr =
984 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
985 	} else if (lwp->lwp_curinfo)
986 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
987 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
988 	    sp->pr_info.si_zoneid != zp->zone_id) {
989 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
990 		sp->pr_info.si_uid = 0;
991 		sp->pr_info.si_ctid = -1;
992 		sp->pr_info.si_zoneid = zp->zone_id;
993 	}
994 	sp->pr_altstack.ss_sp =
995 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
996 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
997 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
998 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
999 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1000 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1001 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1002 		sizeof (sp->pr_clname) - 1);
1003 	if (flags & PR_STOPPED)
1004 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1005 	usr = ms->ms_acct[LMS_USER];
1006 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1007 	scalehrtime(&usr);
1008 	scalehrtime(&sys);
1009 	hrt2ts32(usr, &sp->pr_utime);
1010 	hrt2ts32(sys, &sp->pr_stime);
1011 
1012 	/*
1013 	 * Fetch the current instruction, if not a system process.
1014 	 * We don't attempt this unless the lwp is stopped.
1015 	 */
1016 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1017 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1018 	else if (!(flags & PR_STOPPED))
1019 		sp->pr_flags |= PR_PCINVAL;
1020 	else if (!prfetchinstr(lwp, &instr))
1021 		sp->pr_flags |= PR_PCINVAL;
1022 	else
1023 		sp->pr_instr = (uint32_t)instr;
1024 
1025 	/*
1026 	 * Drop p_lock while touching the lwp's stack.
1027 	 */
1028 	mutex_exit(&p->p_lock);
1029 	if (prisstep(lwp))
1030 		sp->pr_flags |= PR_STEP;
1031 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1032 		int i;
1033 
1034 		sp->pr_syscall = get_syscall32_args(lwp,
1035 			(int *)sp->pr_sysarg, &i);
1036 		sp->pr_nsysarg = (ushort_t)i;
1037 	}
1038 	if ((flags & PR_STOPPED) || t == curthread)
1039 		prgetprregs32(lwp, sp->pr_reg);
1040 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1041 	    (flags & PR_VFORKP)) {
1042 		long r1, r2;
1043 		user_t *up;
1044 		auxv_t *auxp;
1045 		int i;
1046 
1047 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1048 		if (sp->pr_errno == 0) {
1049 			sp->pr_rval1 = (int32_t)r1;
1050 			sp->pr_rval2 = (int32_t)r2;
1051 			sp->pr_errpriv = PRIV_NONE;
1052 		} else
1053 			sp->pr_errpriv = lwp->lwp_badpriv;
1054 
1055 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1056 			up = PTOU(p);
1057 			sp->pr_sysarg[0] = 0;
1058 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1059 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1060 			for (i = 0, auxp = up->u_auxv;
1061 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1062 			    i++, auxp++) {
1063 				if (auxp->a_type == AT_SUN_EXECNAME) {
1064 					sp->pr_sysarg[0] =
1065 					(caddr32_t)(uintptr_t)auxp->a_un.a_ptr;
1066 					break;
1067 				}
1068 			}
1069 		}
1070 	}
1071 	if (prhasfp())
1072 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1073 	mutex_enter(&p->p_lock);
1074 }
1075 
1076 void
1077 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1078 {
1079 	kthread_t *t;
1080 
1081 	ASSERT(MUTEX_HELD(&p->p_lock));
1082 
1083 	t = prchoose(p);	/* returns locked thread */
1084 	ASSERT(t != NULL);
1085 	thread_unlock(t);
1086 
1087 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1088 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1089 	sp->pr_nlwp = p->p_lwpcnt;
1090 	sp->pr_nzomb = p->p_zombcnt;
1091 	prassignset(&sp->pr_sigpend, &p->p_sig);
1092 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1093 	sp->pr_brksize = (uint32_t)p->p_brksize;
1094 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1095 	sp->pr_stksize = (uint32_t)p->p_stksize;
1096 	sp->pr_pid   = p->p_pid;
1097 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1098 	    (p->p_flag & SZONETOP)) {
1099 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1100 		/*
1101 		 * Inside local zones, fake zsched's pid as parent pids for
1102 		 * processes which reference processes outside of the zone.
1103 		 */
1104 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1105 	} else {
1106 		sp->pr_ppid = p->p_ppid;
1107 	}
1108 	sp->pr_pgid  = p->p_pgrp;
1109 	sp->pr_sid   = p->p_sessp->s_sid;
1110 	sp->pr_taskid = p->p_task->tk_tkid;
1111 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1112 	sp->pr_zoneid = p->p_zone->zone_id;
1113 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1114 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1115 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1116 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1117 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1118 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1119 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1120 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1121 	switch (p->p_model) {
1122 	case DATAMODEL_ILP32:
1123 		sp->pr_dmodel = PR_MODEL_ILP32;
1124 		break;
1125 	case DATAMODEL_LP64:
1126 		sp->pr_dmodel = PR_MODEL_LP64;
1127 		break;
1128 	}
1129 	if (p->p_agenttp)
1130 		sp->pr_agentid = p->p_agenttp->t_tid;
1131 
1132 	/* get the chosen lwp's status */
1133 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1134 
1135 	/* replicate the flags */
1136 	sp->pr_flags = sp->pr_lwp.pr_flags;
1137 }
1138 #endif	/* _SYSCALL32_IMPL */
1139 
1140 /*
1141  * Return lwp status.
1142  */
1143 void
1144 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1145 {
1146 	proc_t *p = ttoproc(t);
1147 	klwp_t *lwp = ttolwp(t);
1148 	struct mstate *ms = &lwp->lwp_mstate;
1149 	hrtime_t usr, sys;
1150 	int flags;
1151 	ulong_t instr;
1152 
1153 	ASSERT(MUTEX_HELD(&p->p_lock));
1154 
1155 	bzero(sp, sizeof (*sp));
1156 	flags = 0L;
1157 	if (t->t_state == TS_STOPPED) {
1158 		flags |= PR_STOPPED;
1159 		if ((t->t_schedflag & TS_PSTART) == 0)
1160 			flags |= PR_ISTOP;
1161 	} else if (VSTOPPED(t)) {
1162 		flags |= PR_STOPPED|PR_ISTOP;
1163 	}
1164 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1165 		flags |= PR_DSTOP;
1166 	if (lwp->lwp_asleep)
1167 		flags |= PR_ASLEEP;
1168 	if (t == p->p_agenttp)
1169 		flags |= PR_AGENT;
1170 	if (!(t->t_proc_flag & TP_TWAIT))
1171 		flags |= PR_DETACH;
1172 	if (t->t_proc_flag & TP_DAEMON)
1173 		flags |= PR_DAEMON;
1174 	if (p->p_proc_flag & P_PR_FORK)
1175 		flags |= PR_FORK;
1176 	if (p->p_proc_flag & P_PR_RUNLCL)
1177 		flags |= PR_RLC;
1178 	if (p->p_proc_flag & P_PR_KILLCL)
1179 		flags |= PR_KLC;
1180 	if (p->p_proc_flag & P_PR_ASYNC)
1181 		flags |= PR_ASYNC;
1182 	if (p->p_proc_flag & P_PR_BPTADJ)
1183 		flags |= PR_BPTADJ;
1184 	if (p->p_proc_flag & P_PR_PTRACE)
1185 		flags |= PR_PTRACE;
1186 	if (p->p_flag & SMSACCT)
1187 		flags |= PR_MSACCT;
1188 	if (p->p_flag & SMSFORK)
1189 		flags |= PR_MSFORK;
1190 	if (p->p_flag & SVFWAIT)
1191 		flags |= PR_VFORKP;
1192 	if (p->p_pgidp->pid_pgorphaned)
1193 		flags |= PR_ORPHAN;
1194 	if (p->p_pidflag & CLDNOSIGCHLD)
1195 		flags |= PR_NOSIGCHLD;
1196 	if (p->p_pidflag & CLDWAITPID)
1197 		flags |= PR_WAITPID;
1198 	sp->pr_flags = flags;
1199 	if (VSTOPPED(t)) {
1200 		sp->pr_why   = PR_REQUESTED;
1201 		sp->pr_what  = 0;
1202 	} else {
1203 		sp->pr_why   = t->t_whystop;
1204 		sp->pr_what  = t->t_whatstop;
1205 	}
1206 	sp->pr_lwpid = t->t_tid;
1207 	sp->pr_cursig  = lwp->lwp_cursig;
1208 	prassignset(&sp->pr_lwppend, &t->t_sig);
1209 	schedctl_finish_sigblock(t);
1210 	prassignset(&sp->pr_lwphold, &t->t_hold);
1211 	if (t->t_whystop == PR_FAULTED)
1212 		bcopy(&lwp->lwp_siginfo,
1213 		    &sp->pr_info, sizeof (k_siginfo_t));
1214 	else if (lwp->lwp_curinfo)
1215 		bcopy(&lwp->lwp_curinfo->sq_info,
1216 		    &sp->pr_info, sizeof (k_siginfo_t));
1217 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1218 	    sp->pr_info.si_zoneid != zp->zone_id) {
1219 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1220 		sp->pr_info.si_uid = 0;
1221 		sp->pr_info.si_ctid = -1;
1222 		sp->pr_info.si_zoneid = zp->zone_id;
1223 	}
1224 	sp->pr_altstack = lwp->lwp_sigaltstack;
1225 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1226 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1227 	sp->pr_ustack = lwp->lwp_ustack;
1228 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1229 		sizeof (sp->pr_clname) - 1);
1230 	if (flags & PR_STOPPED)
1231 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1232 	usr = ms->ms_acct[LMS_USER];
1233 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1234 	scalehrtime(&usr);
1235 	scalehrtime(&sys);
1236 	hrt2ts(usr, &sp->pr_utime);
1237 	hrt2ts(sys, &sp->pr_stime);
1238 
1239 	/*
1240 	 * Fetch the current instruction, if not a system process.
1241 	 * We don't attempt this unless the lwp is stopped.
1242 	 */
1243 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1244 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1245 	else if (!(flags & PR_STOPPED))
1246 		sp->pr_flags |= PR_PCINVAL;
1247 	else if (!prfetchinstr(lwp, &instr))
1248 		sp->pr_flags |= PR_PCINVAL;
1249 	else
1250 		sp->pr_instr = instr;
1251 
1252 	/*
1253 	 * Drop p_lock while touching the lwp's stack.
1254 	 */
1255 	mutex_exit(&p->p_lock);
1256 	if (prisstep(lwp))
1257 		sp->pr_flags |= PR_STEP;
1258 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1259 		int i;
1260 
1261 		sp->pr_syscall = get_syscall_args(lwp,
1262 			(long *)sp->pr_sysarg, &i);
1263 		sp->pr_nsysarg = (ushort_t)i;
1264 	}
1265 	if ((flags & PR_STOPPED) || t == curthread)
1266 		prgetprregs(lwp, sp->pr_reg);
1267 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1268 	    (flags & PR_VFORKP)) {
1269 		user_t *up;
1270 		auxv_t *auxp;
1271 		int i;
1272 
1273 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1274 		if (sp->pr_errno == 0)
1275 			sp->pr_errpriv = PRIV_NONE;
1276 		else
1277 			sp->pr_errpriv = lwp->lwp_badpriv;
1278 
1279 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1280 			up = PTOU(p);
1281 			sp->pr_sysarg[0] = 0;
1282 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1283 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1284 			for (i = 0, auxp = up->u_auxv;
1285 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1286 			    i++, auxp++) {
1287 				if (auxp->a_type == AT_SUN_EXECNAME) {
1288 					sp->pr_sysarg[0] =
1289 						(uintptr_t)auxp->a_un.a_ptr;
1290 					break;
1291 				}
1292 			}
1293 		}
1294 	}
1295 	if (prhasfp())
1296 		prgetprfpregs(lwp, &sp->pr_fpreg);
1297 	mutex_enter(&p->p_lock);
1298 }
1299 
1300 /*
1301  * Get the sigaction structure for the specified signal.  The u-block
1302  * must already have been mapped in by the caller.
1303  */
1304 void
1305 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1306 {
1307 	bzero(sp, sizeof (*sp));
1308 
1309 	if (sig != 0 && (unsigned)sig < NSIG) {
1310 		sp->sa_handler = up->u_signal[sig-1];
1311 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1312 		if (sigismember(&up->u_sigonstack, sig))
1313 			sp->sa_flags |= SA_ONSTACK;
1314 		if (sigismember(&up->u_sigresethand, sig))
1315 			sp->sa_flags |= SA_RESETHAND;
1316 		if (sigismember(&up->u_sigrestart, sig))
1317 			sp->sa_flags |= SA_RESTART;
1318 		if (sigismember(&p->p_siginfo, sig))
1319 			sp->sa_flags |= SA_SIGINFO;
1320 		if (sigismember(&up->u_signodefer, sig))
1321 			sp->sa_flags |= SA_NODEFER;
1322 		if (sig == SIGCLD) {
1323 			if (p->p_flag & SNOWAIT)
1324 				sp->sa_flags |= SA_NOCLDWAIT;
1325 			if ((p->p_flag & SJCTL) == 0)
1326 				sp->sa_flags |= SA_NOCLDSTOP;
1327 		}
1328 	}
1329 }
1330 
1331 #ifdef _SYSCALL32_IMPL
1332 void
1333 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1334 {
1335 	bzero(sp, sizeof (*sp));
1336 
1337 	if (sig != 0 && (unsigned)sig < NSIG) {
1338 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1339 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1340 		if (sigismember(&up->u_sigonstack, sig))
1341 			sp->sa_flags |= SA_ONSTACK;
1342 		if (sigismember(&up->u_sigresethand, sig))
1343 			sp->sa_flags |= SA_RESETHAND;
1344 		if (sigismember(&up->u_sigrestart, sig))
1345 			sp->sa_flags |= SA_RESTART;
1346 		if (sigismember(&p->p_siginfo, sig))
1347 			sp->sa_flags |= SA_SIGINFO;
1348 		if (sigismember(&up->u_signodefer, sig))
1349 			sp->sa_flags |= SA_NODEFER;
1350 		if (sig == SIGCLD) {
1351 			if (p->p_flag & SNOWAIT)
1352 				sp->sa_flags |= SA_NOCLDWAIT;
1353 			if ((p->p_flag & SJCTL) == 0)
1354 				sp->sa_flags |= SA_NOCLDSTOP;
1355 		}
1356 	}
1357 }
1358 #endif	/* _SYSCALL32_IMPL */
1359 
1360 /*
1361  * Count the number of segments in this process's address space.
1362  */
1363 int
1364 prnsegs(struct as *as, int reserved)
1365 {
1366 	int n = 0;
1367 	struct seg *seg;
1368 
1369 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1370 
1371 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1372 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1373 		caddr_t saddr, naddr;
1374 		void *tmp = NULL;
1375 
1376 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1377 			(void) pr_getprot(seg, reserved, &tmp,
1378 			    &saddr, &naddr, eaddr);
1379 			if (saddr != naddr)
1380 				n++;
1381 		}
1382 
1383 		ASSERT(tmp == NULL);
1384 	}
1385 
1386 	return (n);
1387 }
1388 
1389 /*
1390  * Convert uint32_t to decimal string w/o leading zeros.
1391  * Add trailing null characters if 'len' is greater than string length.
1392  * Return the string length.
1393  */
1394 int
1395 pr_u32tos(uint32_t n, char *s, int len)
1396 {
1397 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1398 	char *cp = cbuf;
1399 	char *end = s + len;
1400 
1401 	do {
1402 		*cp++ = (char)(n % 10 + '0');
1403 		n /= 10;
1404 	} while (n);
1405 
1406 	len = (int)(cp - cbuf);
1407 
1408 	do {
1409 		*s++ = *--cp;
1410 	} while (cp > cbuf);
1411 
1412 	while (s < end)		/* optional pad */
1413 		*s++ = '\0';
1414 
1415 	return (len);
1416 }
1417 
1418 /*
1419  * Convert uint64_t to decimal string w/o leading zeros.
1420  * Return the string length.
1421  */
1422 static int
1423 pr_u64tos(uint64_t n, char *s)
1424 {
1425 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1426 	char *cp = cbuf;
1427 	int len;
1428 
1429 	do {
1430 		*cp++ = (char)(n % 10 + '0');
1431 		n /= 10;
1432 	} while (n);
1433 
1434 	len = (int)(cp - cbuf);
1435 
1436 	do {
1437 		*s++ = *--cp;
1438 	} while (cp > cbuf);
1439 
1440 	return (len);
1441 }
1442 
1443 void
1444 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1445 {
1446 	char *s = name;
1447 	struct vfs *vfsp;
1448 	struct vfssw *vfsswp;
1449 
1450 	if ((vfsp = vp->v_vfsp) != NULL &&
1451 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1452 	    *vfsswp->vsw_name) {
1453 		(void) strcpy(s, vfsswp->vsw_name);
1454 		s += strlen(s);
1455 		*s++ = '.';
1456 	}
1457 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1458 	*s++ = '.';
1459 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1460 	*s++ = '.';
1461 	s += pr_u64tos(vattr->va_nodeid, s);
1462 	*s++ = '\0';
1463 }
1464 
1465 struct seg *
1466 break_seg(proc_t *p)
1467 {
1468 	caddr_t addr = p->p_brkbase;
1469 	struct seg *seg;
1470 	struct vnode *vp;
1471 
1472 	if (p->p_brksize != 0)
1473 		addr += p->p_brksize - 1;
1474 	seg = as_segat(p->p_as, addr);
1475 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1476 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1477 		return (seg);
1478 	return (NULL);
1479 }
1480 
1481 /*
1482  * Implementation of service functions to handle procfs generic chained
1483  * copyout buffers.
1484  */
1485 typedef struct pr_iobuf_list {
1486 	list_node_t	piol_link;	/* buffer linkage */
1487 	size_t		piol_size;	/* total size (header + data) */
1488 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1489 } piol_t;
1490 
1491 #define	MAPSIZE	(64 * 1024)
1492 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1493 
1494 void
1495 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1496 {
1497 	piol_t	*iol;
1498 	size_t	initial_size = MIN(1, n) * itemsize;
1499 
1500 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1501 
1502 	ASSERT(list_head(iolhead) == NULL);
1503 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1504 	ASSERT(initial_size > 0);
1505 
1506 	/*
1507 	 * Someone creating chained copyout buffers may ask for less than
1508 	 * MAPSIZE if the amount of data to be buffered is known to be
1509 	 * smaller than that.
1510 	 * But in order to prevent involuntary self-denial of service,
1511 	 * the requested input size is clamped at MAPSIZE.
1512 	 */
1513 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1514 	iol = kmem_alloc(initial_size, KM_SLEEP);
1515 	list_insert_head(iolhead, iol);
1516 	iol->piol_usedsize = 0;
1517 	iol->piol_size = initial_size;
1518 }
1519 
1520 void *
1521 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1522 {
1523 	piol_t	*iol;
1524 	char	*new;
1525 
1526 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1527 	ASSERT(list_head(iolhead) != NULL);
1528 
1529 	iol = (piol_t *)list_tail(iolhead);
1530 
1531 	if (iol->piol_size <
1532 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1533 		/*
1534 		 * Out of space in the current buffer. Allocate more.
1535 		 */
1536 		piol_t *newiol;
1537 
1538 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1539 		newiol->piol_size = MAPSIZE;
1540 		newiol->piol_usedsize = 0;
1541 
1542 		list_insert_after(iolhead, iol, newiol);
1543 		iol = list_next(iolhead, iol);
1544 		ASSERT(iol == newiol);
1545 	}
1546 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1547 	iol->piol_usedsize += itemsize;
1548 	bzero(new, itemsize);
1549 	return (new);
1550 }
1551 
1552 int
1553 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1554 {
1555 	int error = errin;
1556 	piol_t	*iol;
1557 
1558 	while ((iol = list_head(iolhead)) != NULL) {
1559 		list_remove(iolhead, iol);
1560 		if (!error) {
1561 			if (copyout(PIOL_DATABUF(iol), *tgt,
1562 			    iol->piol_usedsize))
1563 				error = EFAULT;
1564 			*tgt += iol->piol_usedsize;
1565 		}
1566 		kmem_free(iol, iol->piol_size);
1567 	}
1568 	list_destroy(iolhead);
1569 
1570 	return (error);
1571 }
1572 
1573 int
1574 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1575 {
1576 	offset_t	off = uiop->uio_offset;
1577 	char		*base;
1578 	size_t		size;
1579 	piol_t		*iol;
1580 	int		error = errin;
1581 
1582 	while ((iol = list_head(iolhead)) != NULL) {
1583 		list_remove(iolhead, iol);
1584 		base = PIOL_DATABUF(iol);
1585 		size = iol->piol_usedsize;
1586 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1587 			error = uiomove(base + off, size - off,
1588 			    UIO_READ, uiop);
1589 		off = MAX(0, off - (offset_t)size);
1590 		kmem_free(iol, iol->piol_size);
1591 	}
1592 	list_destroy(iolhead);
1593 
1594 	return (error);
1595 }
1596 
1597 /*
1598  * Return an array of structures with memory map information.
1599  * We allocate here; the caller must deallocate.
1600  */
1601 int
1602 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1603 {
1604 	struct as *as = p->p_as;
1605 	prmap_t *mp;
1606 	struct seg *seg;
1607 	struct seg *brkseg, *stkseg;
1608 	struct vnode *vp;
1609 	struct vattr vattr;
1610 	uint_t prot;
1611 
1612 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1613 
1614 	/*
1615 	 * Request an initial buffer size that doesn't waste memory
1616 	 * if the address space has only a small number of segments.
1617 	 */
1618 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1619 
1620 	if ((seg = AS_SEGFIRST(as)) == NULL)
1621 		return (0);
1622 
1623 	brkseg = break_seg(p);
1624 	stkseg = as_segat(as, prgetstackbase(p));
1625 
1626 	do {
1627 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1628 		caddr_t saddr, naddr;
1629 		void *tmp = NULL;
1630 
1631 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1632 			prot = pr_getprot(seg, reserved, &tmp,
1633 			    &saddr, &naddr, eaddr);
1634 			if (saddr == naddr)
1635 				continue;
1636 
1637 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1638 
1639 			mp->pr_vaddr = (uintptr_t)saddr;
1640 			mp->pr_size = naddr - saddr;
1641 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1642 			mp->pr_mflags = 0;
1643 			if (prot & PROT_READ)
1644 				mp->pr_mflags |= MA_READ;
1645 			if (prot & PROT_WRITE)
1646 				mp->pr_mflags |= MA_WRITE;
1647 			if (prot & PROT_EXEC)
1648 				mp->pr_mflags |= MA_EXEC;
1649 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1650 				mp->pr_mflags |= MA_SHARED;
1651 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1652 				mp->pr_mflags |= MA_NORESERVE;
1653 			if (seg->s_ops == &segspt_shmops ||
1654 			    (seg->s_ops == &segvn_ops &&
1655 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1656 				mp->pr_mflags |= MA_ANON;
1657 			if (seg == brkseg)
1658 				mp->pr_mflags |= MA_BREAK;
1659 			else if (seg == stkseg) {
1660 				mp->pr_mflags |= MA_STACK;
1661 				if (reserved) {
1662 					size_t maxstack =
1663 					    ((size_t)p->p_stk_ctl +
1664 					    PAGEOFFSET) & PAGEMASK;
1665 					mp->pr_vaddr =
1666 					    (uintptr_t)prgetstackbase(p) +
1667 					    p->p_stksize - maxstack;
1668 					mp->pr_size = (uintptr_t)naddr -
1669 					    mp->pr_vaddr;
1670 				}
1671 			}
1672 			if (seg->s_ops == &segspt_shmops)
1673 				mp->pr_mflags |= MA_ISM | MA_SHM;
1674 			mp->pr_pagesize = PAGESIZE;
1675 
1676 			/*
1677 			 * Manufacture a filename for the "object" directory.
1678 			 */
1679 			vattr.va_mask = AT_FSID|AT_NODEID;
1680 			if (seg->s_ops == &segvn_ops &&
1681 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1682 			    vp != NULL && vp->v_type == VREG &&
1683 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1684 				if (vp == p->p_exec)
1685 					(void) strcpy(mp->pr_mapname, "a.out");
1686 				else
1687 					pr_object_name(mp->pr_mapname,
1688 						vp, &vattr);
1689 			}
1690 
1691 			/*
1692 			 * Get the SysV shared memory id, if any.
1693 			 */
1694 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1695 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1696 			    SHMID_NONE) {
1697 				if (mp->pr_shmid == SHMID_FREE)
1698 					mp->pr_shmid = -1;
1699 
1700 				mp->pr_mflags |= MA_SHM;
1701 			} else {
1702 				mp->pr_shmid = -1;
1703 			}
1704 		}
1705 		ASSERT(tmp == NULL);
1706 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1707 
1708 	return (0);
1709 }
1710 
1711 #ifdef _SYSCALL32_IMPL
1712 int
1713 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1714 {
1715 	struct as *as = p->p_as;
1716 	prmap32_t *mp;
1717 	struct seg *seg;
1718 	struct seg *brkseg, *stkseg;
1719 	struct vnode *vp;
1720 	struct vattr vattr;
1721 	uint_t prot;
1722 
1723 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1724 
1725 	/*
1726 	 * Request an initial buffer size that doesn't waste memory
1727 	 * if the address space has only a small number of segments.
1728 	 */
1729 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1730 
1731 	if ((seg = AS_SEGFIRST(as)) == NULL)
1732 		return (0);
1733 
1734 	brkseg = break_seg(p);
1735 	stkseg = as_segat(as, prgetstackbase(p));
1736 
1737 	do {
1738 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1739 		caddr_t saddr, naddr;
1740 		void *tmp = NULL;
1741 
1742 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1743 			prot = pr_getprot(seg, reserved, &tmp,
1744 			    &saddr, &naddr, eaddr);
1745 			if (saddr == naddr)
1746 				continue;
1747 
1748 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1749 
1750 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1751 			mp->pr_size = (size32_t)(naddr - saddr);
1752 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1753 			mp->pr_mflags = 0;
1754 			if (prot & PROT_READ)
1755 				mp->pr_mflags |= MA_READ;
1756 			if (prot & PROT_WRITE)
1757 				mp->pr_mflags |= MA_WRITE;
1758 			if (prot & PROT_EXEC)
1759 				mp->pr_mflags |= MA_EXEC;
1760 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1761 				mp->pr_mflags |= MA_SHARED;
1762 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1763 				mp->pr_mflags |= MA_NORESERVE;
1764 			if (seg->s_ops == &segspt_shmops ||
1765 			    (seg->s_ops == &segvn_ops &&
1766 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1767 				mp->pr_mflags |= MA_ANON;
1768 			if (seg == brkseg)
1769 				mp->pr_mflags |= MA_BREAK;
1770 			else if (seg == stkseg) {
1771 				mp->pr_mflags |= MA_STACK;
1772 				if (reserved) {
1773 					size_t maxstack =
1774 					    ((size_t)p->p_stk_ctl +
1775 					    PAGEOFFSET) & PAGEMASK;
1776 					uintptr_t vaddr =
1777 					    (uintptr_t)prgetstackbase(p) +
1778 					    p->p_stksize - maxstack;
1779 					mp->pr_vaddr = (caddr32_t)vaddr;
1780 					mp->pr_size = (size32_t)
1781 					    ((uintptr_t)naddr - vaddr);
1782 				}
1783 			}
1784 			if (seg->s_ops == &segspt_shmops)
1785 				mp->pr_mflags |= MA_ISM | MA_SHM;
1786 			mp->pr_pagesize = PAGESIZE;
1787 
1788 			/*
1789 			 * Manufacture a filename for the "object" directory.
1790 			 */
1791 			vattr.va_mask = AT_FSID|AT_NODEID;
1792 			if (seg->s_ops == &segvn_ops &&
1793 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1794 			    vp != NULL && vp->v_type == VREG &&
1795 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1796 				if (vp == p->p_exec)
1797 					(void) strcpy(mp->pr_mapname, "a.out");
1798 				else
1799 					pr_object_name(mp->pr_mapname,
1800 						vp, &vattr);
1801 			}
1802 
1803 			/*
1804 			 * Get the SysV shared memory id, if any.
1805 			 */
1806 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1807 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1808 			    SHMID_NONE) {
1809 				if (mp->pr_shmid == SHMID_FREE)
1810 					mp->pr_shmid = -1;
1811 
1812 				mp->pr_mflags |= MA_SHM;
1813 			} else {
1814 				mp->pr_shmid = -1;
1815 			}
1816 		}
1817 		ASSERT(tmp == NULL);
1818 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1819 
1820 	return (0);
1821 }
1822 #endif	/* _SYSCALL32_IMPL */
1823 
1824 /*
1825  * Return the size of the /proc page data file.
1826  */
1827 size_t
1828 prpdsize(struct as *as)
1829 {
1830 	struct seg *seg;
1831 	size_t size;
1832 
1833 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1834 
1835 	if ((seg = AS_SEGFIRST(as)) == NULL)
1836 		return (0);
1837 
1838 	size = sizeof (prpageheader_t);
1839 	do {
1840 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1841 		caddr_t saddr, naddr;
1842 		void *tmp = NULL;
1843 		size_t npage;
1844 
1845 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1846 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1847 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1848 				size += sizeof (prasmap_t) + round8(npage);
1849 		}
1850 		ASSERT(tmp == NULL);
1851 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1852 
1853 	return (size);
1854 }
1855 
1856 #ifdef _SYSCALL32_IMPL
1857 size_t
1858 prpdsize32(struct as *as)
1859 {
1860 	struct seg *seg;
1861 	size_t size;
1862 
1863 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1864 
1865 	if ((seg = AS_SEGFIRST(as)) == NULL)
1866 		return (0);
1867 
1868 	size = sizeof (prpageheader32_t);
1869 	do {
1870 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1871 		caddr_t saddr, naddr;
1872 		void *tmp = NULL;
1873 		size_t npage;
1874 
1875 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1876 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1877 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1878 				size += sizeof (prasmap32_t) + round8(npage);
1879 		}
1880 		ASSERT(tmp == NULL);
1881 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1882 
1883 	return (size);
1884 }
1885 #endif	/* _SYSCALL32_IMPL */
1886 
1887 /*
1888  * Read page data information.
1889  */
1890 int
1891 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1892 {
1893 	struct as *as = p->p_as;
1894 	caddr_t buf;
1895 	size_t size;
1896 	prpageheader_t *php;
1897 	prasmap_t *pmp;
1898 	struct seg *seg;
1899 	int error;
1900 
1901 again:
1902 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1903 
1904 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1905 		AS_LOCK_EXIT(as, &as->a_lock);
1906 		return (0);
1907 	}
1908 	size = prpdsize(as);
1909 	if (uiop->uio_resid < size) {
1910 		AS_LOCK_EXIT(as, &as->a_lock);
1911 		return (E2BIG);
1912 	}
1913 
1914 	buf = kmem_zalloc(size, KM_SLEEP);
1915 	php = (prpageheader_t *)buf;
1916 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1917 
1918 	hrt2ts(gethrtime(), &php->pr_tstamp);
1919 	php->pr_nmap = 0;
1920 	php->pr_npage = 0;
1921 	do {
1922 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1923 		caddr_t saddr, naddr;
1924 		void *tmp = NULL;
1925 
1926 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1927 			struct vnode *vp;
1928 			struct vattr vattr;
1929 			size_t len;
1930 			size_t npage;
1931 			uint_t prot;
1932 			uintptr_t next;
1933 
1934 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1935 			if ((len = (size_t)(naddr - saddr)) == 0)
1936 				continue;
1937 			npage = len / PAGESIZE;
1938 			next = (uintptr_t)(pmp + 1) + round8(npage);
1939 			/*
1940 			 * It's possible that the address space can change
1941 			 * subtlely even though we're holding as->a_lock
1942 			 * due to the nondeterminism of page_exists() in
1943 			 * the presence of asychronously flushed pages or
1944 			 * mapped files whose sizes are changing.
1945 			 * page_exists() may be called indirectly from
1946 			 * pr_getprot() by a SEGOP_INCORE() routine.
1947 			 * If this happens we need to make sure we don't
1948 			 * overrun the buffer whose size we computed based
1949 			 * on the initial iteration through the segments.
1950 			 * Once we've detected an overflow, we need to clean
1951 			 * up the temporary memory allocated in pr_getprot()
1952 			 * and retry. If there's a pending signal, we return
1953 			 * EINTR so that this thread can be dislodged if
1954 			 * a latent bug causes us to spin indefinitely.
1955 			 */
1956 			if (next > (uintptr_t)buf + size) {
1957 				pr_getprot_done(&tmp);
1958 				AS_LOCK_EXIT(as, &as->a_lock);
1959 
1960 				kmem_free(buf, size);
1961 
1962 				if (ISSIG(curthread, JUSTLOOKING))
1963 					return (EINTR);
1964 
1965 				goto again;
1966 			}
1967 
1968 			php->pr_nmap++;
1969 			php->pr_npage += npage;
1970 			pmp->pr_vaddr = (uintptr_t)saddr;
1971 			pmp->pr_npage = npage;
1972 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1973 			pmp->pr_mflags = 0;
1974 			if (prot & PROT_READ)
1975 				pmp->pr_mflags |= MA_READ;
1976 			if (prot & PROT_WRITE)
1977 				pmp->pr_mflags |= MA_WRITE;
1978 			if (prot & PROT_EXEC)
1979 				pmp->pr_mflags |= MA_EXEC;
1980 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1981 				pmp->pr_mflags |= MA_SHARED;
1982 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1983 				pmp->pr_mflags |= MA_NORESERVE;
1984 			if (seg->s_ops == &segspt_shmops ||
1985 			    (seg->s_ops == &segvn_ops &&
1986 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1987 				pmp->pr_mflags |= MA_ANON;
1988 			if (seg->s_ops == &segspt_shmops)
1989 				pmp->pr_mflags |= MA_ISM | MA_SHM;
1990 			pmp->pr_pagesize = PAGESIZE;
1991 			/*
1992 			 * Manufacture a filename for the "object" directory.
1993 			 */
1994 			vattr.va_mask = AT_FSID|AT_NODEID;
1995 			if (seg->s_ops == &segvn_ops &&
1996 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1997 			    vp != NULL && vp->v_type == VREG &&
1998 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
1999 				if (vp == p->p_exec)
2000 					(void) strcpy(pmp->pr_mapname, "a.out");
2001 				else
2002 					pr_object_name(pmp->pr_mapname,
2003 						vp, &vattr);
2004 			}
2005 
2006 			/*
2007 			 * Get the SysV shared memory id, if any.
2008 			 */
2009 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2010 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2011 			    SHMID_NONE) {
2012 				if (pmp->pr_shmid == SHMID_FREE)
2013 					pmp->pr_shmid = -1;
2014 
2015 				pmp->pr_mflags |= MA_SHM;
2016 			} else {
2017 				pmp->pr_shmid = -1;
2018 			}
2019 
2020 			hat_getstat(as, saddr, len, hatid,
2021 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2022 			pmp = (prasmap_t *)next;
2023 		}
2024 		ASSERT(tmp == NULL);
2025 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2026 
2027 	AS_LOCK_EXIT(as, &as->a_lock);
2028 
2029 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2030 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2031 	kmem_free(buf, size);
2032 
2033 	return (error);
2034 }
2035 
2036 #ifdef _SYSCALL32_IMPL
2037 int
2038 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2039 {
2040 	struct as *as = p->p_as;
2041 	caddr_t buf;
2042 	size_t size;
2043 	prpageheader32_t *php;
2044 	prasmap32_t *pmp;
2045 	struct seg *seg;
2046 	int error;
2047 
2048 again:
2049 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2050 
2051 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2052 		AS_LOCK_EXIT(as, &as->a_lock);
2053 		return (0);
2054 	}
2055 	size = prpdsize32(as);
2056 	if (uiop->uio_resid < size) {
2057 		AS_LOCK_EXIT(as, &as->a_lock);
2058 		return (E2BIG);
2059 	}
2060 
2061 	buf = kmem_zalloc(size, KM_SLEEP);
2062 	php = (prpageheader32_t *)buf;
2063 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2064 
2065 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2066 	php->pr_nmap = 0;
2067 	php->pr_npage = 0;
2068 	do {
2069 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2070 		caddr_t saddr, naddr;
2071 		void *tmp = NULL;
2072 
2073 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2074 			struct vnode *vp;
2075 			struct vattr vattr;
2076 			size_t len;
2077 			size_t npage;
2078 			uint_t prot;
2079 			uintptr_t next;
2080 
2081 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2082 			if ((len = (size_t)(naddr - saddr)) == 0)
2083 				continue;
2084 			npage = len / PAGESIZE;
2085 			next = (uintptr_t)(pmp + 1) + round8(npage);
2086 			/*
2087 			 * It's possible that the address space can change
2088 			 * subtlely even though we're holding as->a_lock
2089 			 * due to the nondeterminism of page_exists() in
2090 			 * the presence of asychronously flushed pages or
2091 			 * mapped files whose sizes are changing.
2092 			 * page_exists() may be called indirectly from
2093 			 * pr_getprot() by a SEGOP_INCORE() routine.
2094 			 * If this happens we need to make sure we don't
2095 			 * overrun the buffer whose size we computed based
2096 			 * on the initial iteration through the segments.
2097 			 * Once we've detected an overflow, we need to clean
2098 			 * up the temporary memory allocated in pr_getprot()
2099 			 * and retry. If there's a pending signal, we return
2100 			 * EINTR so that this thread can be dislodged if
2101 			 * a latent bug causes us to spin indefinitely.
2102 			 */
2103 			if (next > (uintptr_t)buf + size) {
2104 				pr_getprot_done(&tmp);
2105 				AS_LOCK_EXIT(as, &as->a_lock);
2106 
2107 				kmem_free(buf, size);
2108 
2109 				if (ISSIG(curthread, JUSTLOOKING))
2110 					return (EINTR);
2111 
2112 				goto again;
2113 			}
2114 
2115 			php->pr_nmap++;
2116 			php->pr_npage += npage;
2117 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2118 			pmp->pr_npage = (size32_t)npage;
2119 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2120 			pmp->pr_mflags = 0;
2121 			if (prot & PROT_READ)
2122 				pmp->pr_mflags |= MA_READ;
2123 			if (prot & PROT_WRITE)
2124 				pmp->pr_mflags |= MA_WRITE;
2125 			if (prot & PROT_EXEC)
2126 				pmp->pr_mflags |= MA_EXEC;
2127 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2128 				pmp->pr_mflags |= MA_SHARED;
2129 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2130 				pmp->pr_mflags |= MA_NORESERVE;
2131 			if (seg->s_ops == &segspt_shmops ||
2132 			    (seg->s_ops == &segvn_ops &&
2133 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2134 				pmp->pr_mflags |= MA_ANON;
2135 			if (seg->s_ops == &segspt_shmops)
2136 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2137 			pmp->pr_pagesize = PAGESIZE;
2138 			/*
2139 			 * Manufacture a filename for the "object" directory.
2140 			 */
2141 			vattr.va_mask = AT_FSID|AT_NODEID;
2142 			if (seg->s_ops == &segvn_ops &&
2143 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2144 			    vp != NULL && vp->v_type == VREG &&
2145 			    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
2146 				if (vp == p->p_exec)
2147 					(void) strcpy(pmp->pr_mapname, "a.out");
2148 				else
2149 					pr_object_name(pmp->pr_mapname,
2150 						vp, &vattr);
2151 			}
2152 
2153 			/*
2154 			 * Get the SysV shared memory id, if any.
2155 			 */
2156 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2157 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2158 			    SHMID_NONE) {
2159 				if (pmp->pr_shmid == SHMID_FREE)
2160 					pmp->pr_shmid = -1;
2161 
2162 				pmp->pr_mflags |= MA_SHM;
2163 			} else {
2164 				pmp->pr_shmid = -1;
2165 			}
2166 
2167 			hat_getstat(as, saddr, len, hatid,
2168 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2169 			pmp = (prasmap32_t *)next;
2170 		}
2171 		ASSERT(tmp == NULL);
2172 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2173 
2174 	AS_LOCK_EXIT(as, &as->a_lock);
2175 
2176 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2177 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2178 	kmem_free(buf, size);
2179 
2180 	return (error);
2181 }
2182 #endif	/* _SYSCALL32_IMPL */
2183 
2184 ushort_t
2185 prgetpctcpu(uint64_t pct)
2186 {
2187 	/*
2188 	 * The value returned will be relevant in the zone of the examiner,
2189 	 * which may not be the same as the zone which performed the procfs
2190 	 * mount.
2191 	 */
2192 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2193 
2194 	/*
2195 	 * Prorate over online cpus so we don't exceed 100%
2196 	 */
2197 	if (nonline > 1)
2198 		pct /= nonline;
2199 	pct >>= 16;		/* convert to 16-bit scaled integer */
2200 	if (pct > 0x8000)	/* might happen, due to rounding */
2201 		pct = 0x8000;
2202 	return ((ushort_t)pct);
2203 }
2204 
2205 /*
2206  * Return information used by ps(1).
2207  */
2208 void
2209 prgetpsinfo(proc_t *p, psinfo_t *psp)
2210 {
2211 	kthread_t *t;
2212 	struct cred *cred;
2213 	hrtime_t hrutime, hrstime;
2214 
2215 	ASSERT(MUTEX_HELD(&p->p_lock));
2216 
2217 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2218 		bzero(psp, sizeof (*psp));
2219 	else {
2220 		thread_unlock(t);
2221 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2222 	}
2223 
2224 	/*
2225 	 * only export SSYS and SMSACCT; everything else is off-limits to
2226 	 * userland apps.
2227 	 */
2228 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2229 	psp->pr_nlwp = p->p_lwpcnt;
2230 	psp->pr_nzomb = p->p_zombcnt;
2231 	mutex_enter(&p->p_crlock);
2232 	cred = p->p_cred;
2233 	psp->pr_uid = crgetruid(cred);
2234 	psp->pr_euid = crgetuid(cred);
2235 	psp->pr_gid = crgetrgid(cred);
2236 	psp->pr_egid = crgetgid(cred);
2237 	mutex_exit(&p->p_crlock);
2238 	psp->pr_pid = p->p_pid;
2239 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2240 	    (p->p_flag & SZONETOP)) {
2241 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2242 		/*
2243 		 * Inside local zones, fake zsched's pid as parent pids for
2244 		 * processes which reference processes outside of the zone.
2245 		 */
2246 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2247 	} else {
2248 		psp->pr_ppid = p->p_ppid;
2249 	}
2250 	psp->pr_pgid = p->p_pgrp;
2251 	psp->pr_sid = p->p_sessp->s_sid;
2252 	psp->pr_taskid = p->p_task->tk_tkid;
2253 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2254 	psp->pr_poolid = p->p_pool->pool_id;
2255 	psp->pr_zoneid = p->p_zone->zone_id;
2256 	if ((psp->pr_contract = PRCTID(p)) == 0)
2257 		psp->pr_contract = -1;
2258 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2259 	switch (p->p_model) {
2260 	case DATAMODEL_ILP32:
2261 		psp->pr_dmodel = PR_MODEL_ILP32;
2262 		break;
2263 	case DATAMODEL_LP64:
2264 		psp->pr_dmodel = PR_MODEL_LP64;
2265 		break;
2266 	}
2267 	hrutime = mstate_aggr_state(p, LMS_USER);
2268 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2269 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2270 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2271 
2272 	if (t == NULL) {
2273 		int wcode = p->p_wcode;		/* must be atomic read */
2274 
2275 		if (wcode)
2276 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2277 		psp->pr_ttydev = PRNODEV;
2278 		psp->pr_lwp.pr_state = SZOMB;
2279 		psp->pr_lwp.pr_sname = 'Z';
2280 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2281 		psp->pr_lwp.pr_bindpset = PS_NONE;
2282 	} else {
2283 		user_t *up = PTOU(p);
2284 		struct as *as;
2285 		dev_t d;
2286 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2287 
2288 		d = cttydev(p);
2289 		/*
2290 		 * If the controlling terminal is the real
2291 		 * or workstation console device, map to what the
2292 		 * user thinks is the console device.
2293 		 */
2294 		if (d == rwsconsdev || d == rconsdev)
2295 			d = uconsdev;
2296 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2297 		psp->pr_start = up->u_start;
2298 		bcopy(up->u_comm, psp->pr_fname,
2299 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2300 		bcopy(up->u_psargs, psp->pr_psargs,
2301 		    MIN(PRARGSZ-1, PSARGSZ));
2302 		psp->pr_argc = up->u_argc;
2303 		psp->pr_argv = up->u_argv;
2304 		psp->pr_envp = up->u_envp;
2305 
2306 		/* get the chosen lwp's lwpsinfo */
2307 		prgetlwpsinfo(t, &psp->pr_lwp);
2308 
2309 		/* compute %cpu for the process */
2310 		if (p->p_lwpcnt == 1)
2311 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2312 		else {
2313 			uint64_t pct = 0;
2314 			hrtime_t cur_time = gethrtime_unscaled();
2315 
2316 			t = p->p_tlist;
2317 			do {
2318 				pct += cpu_update_pct(t, cur_time);
2319 			} while ((t = t->t_forw) != p->p_tlist);
2320 
2321 			psp->pr_pctcpu = prgetpctcpu(pct);
2322 		}
2323 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2324 			psp->pr_size = 0;
2325 			psp->pr_rssize = 0;
2326 		} else {
2327 			mutex_exit(&p->p_lock);
2328 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2329 			psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024);
2330 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2331 			psp->pr_pctmem = rm_pctmemory(as);
2332 			AS_LOCK_EXIT(as, &as->a_lock);
2333 			mutex_enter(&p->p_lock);
2334 		}
2335 	}
2336 }
2337 
2338 #ifdef _SYSCALL32_IMPL
2339 void
2340 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2341 {
2342 	kthread_t *t;
2343 	struct cred *cred;
2344 	hrtime_t hrutime, hrstime;
2345 
2346 	ASSERT(MUTEX_HELD(&p->p_lock));
2347 
2348 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2349 		bzero(psp, sizeof (*psp));
2350 	else {
2351 		thread_unlock(t);
2352 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2353 	}
2354 
2355 	/*
2356 	 * only export SSYS and SMSACCT; everything else is off-limits to
2357 	 * userland apps.
2358 	 */
2359 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2360 	psp->pr_nlwp = p->p_lwpcnt;
2361 	psp->pr_nzomb = p->p_zombcnt;
2362 	mutex_enter(&p->p_crlock);
2363 	cred = p->p_cred;
2364 	psp->pr_uid = crgetruid(cred);
2365 	psp->pr_euid = crgetuid(cred);
2366 	psp->pr_gid = crgetrgid(cred);
2367 	psp->pr_egid = crgetgid(cred);
2368 	mutex_exit(&p->p_crlock);
2369 	psp->pr_pid = p->p_pid;
2370 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2371 	    (p->p_flag & SZONETOP)) {
2372 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2373 		/*
2374 		 * Inside local zones, fake zsched's pid as parent pids for
2375 		 * processes which reference processes outside of the zone.
2376 		 */
2377 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2378 	} else {
2379 		psp->pr_ppid = p->p_ppid;
2380 	}
2381 	psp->pr_pgid = p->p_pgrp;
2382 	psp->pr_sid = p->p_sessp->s_sid;
2383 	psp->pr_taskid = p->p_task->tk_tkid;
2384 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2385 	psp->pr_poolid = p->p_pool->pool_id;
2386 	psp->pr_zoneid = p->p_zone->zone_id;
2387 	if ((psp->pr_contract = PRCTID(p)) == 0)
2388 		psp->pr_contract = -1;
2389 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2390 	switch (p->p_model) {
2391 	case DATAMODEL_ILP32:
2392 		psp->pr_dmodel = PR_MODEL_ILP32;
2393 		break;
2394 	case DATAMODEL_LP64:
2395 		psp->pr_dmodel = PR_MODEL_LP64;
2396 		break;
2397 	}
2398 	hrutime = mstate_aggr_state(p, LMS_USER);
2399 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2400 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2401 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2402 
2403 	if (t == NULL) {
2404 		extern int wstat(int, int);	/* needs a header file */
2405 		int wcode = p->p_wcode;		/* must be atomic read */
2406 
2407 		if (wcode)
2408 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2409 		psp->pr_ttydev = PRNODEV32;
2410 		psp->pr_lwp.pr_state = SZOMB;
2411 		psp->pr_lwp.pr_sname = 'Z';
2412 	} else {
2413 		user_t *up = PTOU(p);
2414 		struct as *as;
2415 		dev_t d;
2416 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2417 
2418 		d = cttydev(p);
2419 		/*
2420 		 * If the controlling terminal is the real
2421 		 * or workstation console device, map to what the
2422 		 * user thinks is the console device.
2423 		 */
2424 		if (d == rwsconsdev || d == rconsdev)
2425 			d = uconsdev;
2426 		(void) cmpldev(&psp->pr_ttydev, d);
2427 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2428 		bcopy(up->u_comm, psp->pr_fname,
2429 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2430 		bcopy(up->u_psargs, psp->pr_psargs,
2431 		    MIN(PRARGSZ-1, PSARGSZ));
2432 		psp->pr_argc = up->u_argc;
2433 		psp->pr_argv = (caddr32_t)up->u_argv;
2434 		psp->pr_envp = (caddr32_t)up->u_envp;
2435 
2436 		/* get the chosen lwp's lwpsinfo */
2437 		prgetlwpsinfo32(t, &psp->pr_lwp);
2438 
2439 		/* compute %cpu for the process */
2440 		if (p->p_lwpcnt == 1)
2441 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2442 		else {
2443 			uint64_t pct = 0;
2444 			hrtime_t cur_time;
2445 
2446 			t = p->p_tlist;
2447 			cur_time = gethrtime_unscaled();
2448 			do {
2449 				pct += cpu_update_pct(t, cur_time);
2450 			} while ((t = t->t_forw) != p->p_tlist);
2451 
2452 			psp->pr_pctcpu = prgetpctcpu(pct);
2453 		}
2454 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2455 			psp->pr_size = 0;
2456 			psp->pr_rssize = 0;
2457 		} else {
2458 			mutex_exit(&p->p_lock);
2459 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2460 			psp->pr_size = (size32_t)
2461 				(btopr(rm_assize(as)) * (PAGESIZE / 1024));
2462 			psp->pr_rssize = (size32_t)
2463 				(rm_asrss(as) * (PAGESIZE / 1024));
2464 			psp->pr_pctmem = rm_pctmemory(as);
2465 			AS_LOCK_EXIT(as, &as->a_lock);
2466 			mutex_enter(&p->p_lock);
2467 		}
2468 	}
2469 
2470 	/*
2471 	 * If we are looking at an LP64 process, zero out
2472 	 * the fields that cannot be represented in ILP32.
2473 	 */
2474 	if (p->p_model != DATAMODEL_ILP32) {
2475 		psp->pr_size = 0;
2476 		psp->pr_rssize = 0;
2477 		psp->pr_argv = 0;
2478 		psp->pr_envp = 0;
2479 	}
2480 }
2481 #endif	/* _SYSCALL32_IMPL */
2482 
2483 void
2484 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2485 {
2486 	klwp_t *lwp = ttolwp(t);
2487 	sobj_ops_t *sobj;
2488 	char c, state;
2489 	uint64_t pct;
2490 	int retval, niceval;
2491 	hrtime_t hrutime, hrstime;
2492 
2493 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2494 
2495 	bzero(psp, sizeof (*psp));
2496 
2497 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2498 	psp->pr_lwpid = t->t_tid;
2499 	psp->pr_addr = (uintptr_t)t;
2500 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2501 
2502 	/* map the thread state enum into a process state enum */
2503 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2504 	switch (state) {
2505 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2506 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2507 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2508 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2509 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2510 	default:		state = 0;		c = '?';	break;
2511 	}
2512 	psp->pr_state = state;
2513 	psp->pr_sname = c;
2514 	if ((sobj = t->t_sobj_ops) != NULL)
2515 		psp->pr_stype = SOBJ_TYPE(sobj);
2516 	retval = CL_DONICE(t, NULL, 0, &niceval);
2517 	if (retval == 0) {
2518 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2519 		psp->pr_nice = niceval + NZERO;
2520 	}
2521 	psp->pr_syscall = t->t_sysnum;
2522 	psp->pr_pri = t->t_pri;
2523 	psp->pr_start.tv_sec = t->t_start;
2524 	psp->pr_start.tv_nsec = 0L;
2525 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2526 	scalehrtime(&hrutime);
2527 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2528 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2529 	scalehrtime(&hrstime);
2530 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2531 	/* compute %cpu for the lwp */
2532 	pct = cpu_update_pct(t, gethrtime_unscaled());
2533 	psp->pr_pctcpu = prgetpctcpu(pct);
2534 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2535 	if (psp->pr_cpu > 99)
2536 		psp->pr_cpu = 99;
2537 
2538 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2539 		sizeof (psp->pr_clname) - 1);
2540 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2541 	psp->pr_onpro = t->t_cpu->cpu_id;
2542 	psp->pr_bindpro = t->t_bind_cpu;
2543 	psp->pr_bindpset = t->t_bind_pset;
2544 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2545 }
2546 
2547 #ifdef _SYSCALL32_IMPL
2548 void
2549 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2550 {
2551 	proc_t *p = ttoproc(t);
2552 	klwp_t *lwp = ttolwp(t);
2553 	sobj_ops_t *sobj;
2554 	char c, state;
2555 	uint64_t pct;
2556 	int retval, niceval;
2557 	hrtime_t hrutime, hrstime;
2558 
2559 	ASSERT(MUTEX_HELD(&p->p_lock));
2560 
2561 	bzero(psp, sizeof (*psp));
2562 
2563 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2564 	psp->pr_lwpid = t->t_tid;
2565 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2566 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2567 
2568 	/* map the thread state enum into a process state enum */
2569 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2570 	switch (state) {
2571 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2572 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2573 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2574 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2575 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2576 	default:		state = 0;		c = '?';	break;
2577 	}
2578 	psp->pr_state = state;
2579 	psp->pr_sname = c;
2580 	if ((sobj = t->t_sobj_ops) != NULL)
2581 		psp->pr_stype = SOBJ_TYPE(sobj);
2582 	retval = CL_DONICE(t, NULL, 0, &niceval);
2583 	if (retval == 0) {
2584 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2585 		psp->pr_nice = niceval + NZERO;
2586 	} else {
2587 		psp->pr_oldpri = 0;
2588 		psp->pr_nice = 0;
2589 	}
2590 	psp->pr_syscall = t->t_sysnum;
2591 	psp->pr_pri = t->t_pri;
2592 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2593 	psp->pr_start.tv_nsec = 0L;
2594 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2595 	scalehrtime(&hrutime);
2596 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2597 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2598 	scalehrtime(&hrstime);
2599 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2600 	/* compute %cpu for the lwp */
2601 	pct = cpu_update_pct(t, gethrtime_unscaled());
2602 	psp->pr_pctcpu = prgetpctcpu(pct);
2603 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2604 	if (psp->pr_cpu > 99)
2605 		psp->pr_cpu = 99;
2606 
2607 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2608 		sizeof (psp->pr_clname) - 1);
2609 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2610 	psp->pr_onpro = t->t_cpu->cpu_id;
2611 	psp->pr_bindpro = t->t_bind_cpu;
2612 	psp->pr_bindpset = t->t_bind_pset;
2613 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2614 }
2615 #endif	/* _SYSCALL32_IMPL */
2616 
2617 /*
2618  * This used to get called when microstate accounting was disabled but
2619  * microstate information was requested.  Since Microstate accounting is on
2620  * regardless of the proc flags, this simply makes it appear to procfs that
2621  * microstate accounting is on.  This is relatively meaningless since you
2622  * can't turn it off, but this is here for the sake of appearances.
2623  */
2624 
2625 /*ARGSUSED*/
2626 void
2627 estimate_msacct(kthread_t *t, hrtime_t curtime)
2628 {
2629 	proc_t *p;
2630 
2631 	if (t == NULL)
2632 		return;
2633 
2634 	p = ttoproc(t);
2635 	ASSERT(MUTEX_HELD(&p->p_lock));
2636 
2637 	/*
2638 	 * A system process (p0) could be referenced if the thread is
2639 	 * in the process of exiting.  Don't turn on microstate accounting
2640 	 * in that case.
2641 	 */
2642 	if (p->p_flag & SSYS)
2643 		return;
2644 
2645 	/*
2646 	 * Loop through all the LWPs (kernel threads) in the process.
2647 	 */
2648 	t = p->p_tlist;
2649 	do {
2650 		t->t_proc_flag |= TP_MSACCT;
2651 	} while ((t = t->t_forw) != p->p_tlist);
2652 
2653 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2654 }
2655 
2656 /*
2657  * It's not really possible to disable microstate accounting anymore.
2658  * However, this routine simply turns off the ms accounting flags in a process
2659  * This way procfs can still pretend to turn microstate accounting on and
2660  * off for a process, but it actually doesn't do anything.  This is
2661  * a neutered form of preemptive idiot-proofing.
2662  */
2663 void
2664 disable_msacct(proc_t *p)
2665 {
2666 	kthread_t *t;
2667 
2668 	ASSERT(MUTEX_HELD(&p->p_lock));
2669 
2670 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2671 	/*
2672 	 * Loop through all the LWPs (kernel threads) in the process.
2673 	 */
2674 	if ((t = p->p_tlist) != NULL) {
2675 		do {
2676 			/* clear per-thread flag */
2677 			t->t_proc_flag &= ~TP_MSACCT;
2678 		} while ((t = t->t_forw) != p->p_tlist);
2679 	}
2680 }
2681 
2682 /*
2683  * Return resource usage information.
2684  */
2685 void
2686 prgetusage(kthread_t *t, prhusage_t *pup)
2687 {
2688 	klwp_t *lwp = ttolwp(t);
2689 	hrtime_t *mstimep;
2690 	struct mstate *ms = &lwp->lwp_mstate;
2691 	int state;
2692 	int i;
2693 	hrtime_t curtime;
2694 	hrtime_t waitrq;
2695 	hrtime_t tmp1;
2696 
2697 	curtime = gethrtime_unscaled();
2698 
2699 	pup->pr_lwpid	= t->t_tid;
2700 	pup->pr_count	= 1;
2701 	pup->pr_create	= ms->ms_start;
2702 	pup->pr_term    = ms->ms_term;
2703 	scalehrtime(&pup->pr_create);
2704 	scalehrtime(&pup->pr_term);
2705 	if (ms->ms_term == 0) {
2706 		pup->pr_rtime = curtime - ms->ms_start;
2707 		scalehrtime(&pup->pr_rtime);
2708 	} else {
2709 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2710 		scalehrtime(&pup->pr_rtime);
2711 	}
2712 
2713 
2714 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2715 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2716 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2717 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2718 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2719 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2720 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2721 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2722 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2723 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2724 
2725 	prscaleusage(pup);
2726 
2727 	/*
2728 	 * Adjust for time waiting in the dispatcher queue.
2729 	 */
2730 	waitrq = t->t_waitrq;	/* hopefully atomic */
2731 	if (waitrq != 0) {
2732 		tmp1 = curtime - waitrq;
2733 		scalehrtime(&tmp1);
2734 		pup->pr_wtime += tmp1;
2735 		curtime = waitrq;
2736 	}
2737 
2738 	/*
2739 	 * Adjust for time spent in current microstate.
2740 	 */
2741 	if (ms->ms_state_start > curtime) {
2742 		curtime = gethrtime_unscaled();
2743 	}
2744 
2745 	i = 0;
2746 	do {
2747 		switch (state = t->t_mstate) {
2748 		case LMS_SLEEP:
2749 			/*
2750 			 * Update the timer for the current sleep state.
2751 			 */
2752 			switch (state = ms->ms_prev) {
2753 			case LMS_TFAULT:
2754 			case LMS_DFAULT:
2755 			case LMS_KFAULT:
2756 			case LMS_USER_LOCK:
2757 				break;
2758 			default:
2759 				state = LMS_SLEEP;
2760 				break;
2761 			}
2762 			break;
2763 		case LMS_TFAULT:
2764 		case LMS_DFAULT:
2765 		case LMS_KFAULT:
2766 		case LMS_USER_LOCK:
2767 			state = LMS_SYSTEM;
2768 			break;
2769 		}
2770 		switch (state) {
2771 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2772 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2773 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2774 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2775 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2776 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2777 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2778 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2779 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2780 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2781 		default:		panic("prgetusage: unknown microstate");
2782 		}
2783 		tmp1 = curtime - ms->ms_state_start;
2784 		if (tmp1 < 0) {
2785 			curtime = gethrtime_unscaled();
2786 			i++;
2787 			continue;
2788 		}
2789 		scalehrtime(&tmp1);
2790 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2791 
2792 	*mstimep += tmp1;
2793 
2794 	/* update pup timestamp */
2795 	pup->pr_tstamp = curtime;
2796 	scalehrtime(&pup->pr_tstamp);
2797 
2798 	/*
2799 	 * Resource usage counters.
2800 	 */
2801 	pup->pr_minf  = lwp->lwp_ru.minflt;
2802 	pup->pr_majf  = lwp->lwp_ru.majflt;
2803 	pup->pr_nswap = lwp->lwp_ru.nswap;
2804 	pup->pr_inblk = lwp->lwp_ru.inblock;
2805 	pup->pr_oublk = lwp->lwp_ru.oublock;
2806 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2807 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2808 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2809 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2810 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2811 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2812 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2813 }
2814 
2815 /*
2816  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2817  */
2818 void
2819 prscaleusage(prhusage_t *usg)
2820 {
2821 	scalehrtime(&usg->pr_utime);
2822 	scalehrtime(&usg->pr_stime);
2823 	scalehrtime(&usg->pr_ttime);
2824 	scalehrtime(&usg->pr_tftime);
2825 	scalehrtime(&usg->pr_dftime);
2826 	scalehrtime(&usg->pr_kftime);
2827 	scalehrtime(&usg->pr_ltime);
2828 	scalehrtime(&usg->pr_slptime);
2829 	scalehrtime(&usg->pr_wtime);
2830 	scalehrtime(&usg->pr_stoptime);
2831 }
2832 
2833 
2834 /*
2835  * Sum resource usage information.
2836  */
2837 void
2838 praddusage(kthread_t *t, prhusage_t *pup)
2839 {
2840 	klwp_t *lwp = ttolwp(t);
2841 	hrtime_t *mstimep;
2842 	struct mstate *ms = &lwp->lwp_mstate;
2843 	int state;
2844 	int i;
2845 	hrtime_t curtime;
2846 	hrtime_t waitrq;
2847 	hrtime_t tmp;
2848 	prhusage_t conv;
2849 
2850 	curtime = gethrtime_unscaled();
2851 
2852 	if (ms->ms_term == 0) {
2853 		tmp = curtime - ms->ms_start;
2854 		scalehrtime(&tmp);
2855 		pup->pr_rtime += tmp;
2856 	} else {
2857 		tmp = ms->ms_term - ms->ms_start;
2858 		scalehrtime(&tmp);
2859 		pup->pr_rtime += tmp;
2860 	}
2861 
2862 	conv.pr_utime = ms->ms_acct[LMS_USER];
2863 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2864 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2865 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2866 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2867 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2868 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2869 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2870 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2871 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2872 
2873 	prscaleusage(&conv);
2874 
2875 	pup->pr_utime	+= conv.pr_utime;
2876 	pup->pr_stime	+= conv.pr_stime;
2877 	pup->pr_ttime	+= conv.pr_ttime;
2878 	pup->pr_tftime	+= conv.pr_tftime;
2879 	pup->pr_dftime	+= conv.pr_dftime;
2880 	pup->pr_kftime	+= conv.pr_kftime;
2881 	pup->pr_ltime	+= conv.pr_ltime;
2882 	pup->pr_slptime	+= conv.pr_slptime;
2883 	pup->pr_wtime	+= conv.pr_wtime;
2884 	pup->pr_stoptime += conv.pr_stoptime;
2885 
2886 	/*
2887 	 * Adjust for time waiting in the dispatcher queue.
2888 	 */
2889 	waitrq = t->t_waitrq;	/* hopefully atomic */
2890 	if (waitrq != 0) {
2891 		tmp = curtime - waitrq;
2892 		scalehrtime(&tmp);
2893 		pup->pr_wtime += tmp;
2894 		curtime = waitrq;
2895 	}
2896 
2897 	/*
2898 	 * Adjust for time spent in current microstate.
2899 	 */
2900 	if (ms->ms_state_start > curtime) {
2901 		curtime = gethrtime_unscaled();
2902 	}
2903 
2904 	i = 0;
2905 	do {
2906 		switch (state = t->t_mstate) {
2907 		case LMS_SLEEP:
2908 			/*
2909 			 * Update the timer for the current sleep state.
2910 			 */
2911 			switch (state = ms->ms_prev) {
2912 			case LMS_TFAULT:
2913 			case LMS_DFAULT:
2914 			case LMS_KFAULT:
2915 			case LMS_USER_LOCK:
2916 				break;
2917 			default:
2918 				state = LMS_SLEEP;
2919 				break;
2920 			}
2921 			break;
2922 		case LMS_TFAULT:
2923 		case LMS_DFAULT:
2924 		case LMS_KFAULT:
2925 		case LMS_USER_LOCK:
2926 			state = LMS_SYSTEM;
2927 			break;
2928 		}
2929 		switch (state) {
2930 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2931 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2932 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2933 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2934 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2935 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2936 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2937 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2938 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2939 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2940 		default:		panic("praddusage: unknown microstate");
2941 		}
2942 		tmp = curtime - ms->ms_state_start;
2943 		if (tmp < 0) {
2944 			curtime = gethrtime_unscaled();
2945 			i++;
2946 			continue;
2947 		}
2948 		scalehrtime(&tmp);
2949 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2950 
2951 	*mstimep += tmp;
2952 
2953 	/* update pup timestamp */
2954 	pup->pr_tstamp = curtime;
2955 	scalehrtime(&pup->pr_tstamp);
2956 
2957 	/*
2958 	 * Resource usage counters.
2959 	 */
2960 	pup->pr_minf  += lwp->lwp_ru.minflt;
2961 	pup->pr_majf  += lwp->lwp_ru.majflt;
2962 	pup->pr_nswap += lwp->lwp_ru.nswap;
2963 	pup->pr_inblk += lwp->lwp_ru.inblock;
2964 	pup->pr_oublk += lwp->lwp_ru.oublock;
2965 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2966 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2967 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2968 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2969 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2970 	pup->pr_sysc  += lwp->lwp_ru.sysc;
2971 	pup->pr_ioch  += lwp->lwp_ru.ioch;
2972 }
2973 
2974 /*
2975  * Convert a prhusage_t to a prusage_t.
2976  * This means convert each hrtime_t to a timestruc_t
2977  * and copy the count fields uint64_t => ulong_t.
2978  */
2979 void
2980 prcvtusage(prhusage_t *pup, prusage_t *upup)
2981 {
2982 	uint64_t *ullp;
2983 	ulong_t *ulp;
2984 	int i;
2985 
2986 	upup->pr_lwpid = pup->pr_lwpid;
2987 	upup->pr_count = pup->pr_count;
2988 
2989 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
2990 	hrt2ts(pup->pr_create,	&upup->pr_create);
2991 	hrt2ts(pup->pr_term,	&upup->pr_term);
2992 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
2993 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
2994 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
2995 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
2996 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
2997 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
2998 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
2999 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3000 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3001 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3002 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3003 	bzero(upup->filltime, sizeof (upup->filltime));
3004 
3005 	ullp = &pup->pr_minf;
3006 	ulp = &upup->pr_minf;
3007 	for (i = 0; i < 22; i++)
3008 		*ulp++ = (ulong_t)*ullp++;
3009 }
3010 
3011 #ifdef _SYSCALL32_IMPL
3012 void
3013 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3014 {
3015 	uint64_t *ullp;
3016 	uint32_t *ulp;
3017 	int i;
3018 
3019 	upup->pr_lwpid = pup->pr_lwpid;
3020 	upup->pr_count = pup->pr_count;
3021 
3022 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3023 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3024 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3025 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3026 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3027 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3028 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3029 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3030 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3031 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3032 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3033 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3034 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3035 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3036 	bzero(upup->filltime, sizeof (upup->filltime));
3037 
3038 	ullp = &pup->pr_minf;
3039 	ulp = &upup->pr_minf;
3040 	for (i = 0; i < 22; i++)
3041 		*ulp++ = (uint32_t)*ullp++;
3042 }
3043 #endif	/* _SYSCALL32_IMPL */
3044 
3045 /*
3046  * Determine whether a set is empty.
3047  */
3048 int
3049 setisempty(uint32_t *sp, uint_t n)
3050 {
3051 	while (n--)
3052 		if (*sp++)
3053 			return (0);
3054 	return (1);
3055 }
3056 
3057 /*
3058  * Utility routine for establishing a watched area in the process.
3059  * Keep the list of watched areas sorted by virtual address.
3060  */
3061 int
3062 set_watched_area(proc_t *p, struct watched_area *pwa)
3063 {
3064 	caddr_t vaddr = pwa->wa_vaddr;
3065 	caddr_t eaddr = pwa->wa_eaddr;
3066 	ulong_t flags = pwa->wa_flags;
3067 	struct watched_area *target;
3068 	avl_index_t where;
3069 	int error = 0;
3070 
3071 	/* we must not be holding p->p_lock, but the process must be locked */
3072 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3073 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3074 
3075 	/*
3076 	 * If this is our first watchpoint, enable watchpoints for the process.
3077 	 */
3078 	if (!pr_watch_active(p)) {
3079 		kthread_t *t;
3080 
3081 		mutex_enter(&p->p_lock);
3082 		if ((t = p->p_tlist) != NULL) {
3083 			do {
3084 				watch_enable(t);
3085 			} while ((t = t->t_forw) != p->p_tlist);
3086 		}
3087 		mutex_exit(&p->p_lock);
3088 	}
3089 
3090 	target = pr_find_watched_area(p, pwa, &where);
3091 	if (target != NULL) {
3092 		/*
3093 		 * We discovered an existing, overlapping watched area.
3094 		 * Allow it only if it is an exact match.
3095 		 */
3096 		if (target->wa_vaddr != vaddr ||
3097 		    target->wa_eaddr != eaddr)
3098 			error = EINVAL;
3099 		else if (target->wa_flags != flags) {
3100 			error = set_watched_page(p, vaddr, eaddr,
3101 			    flags, target->wa_flags);
3102 			target->wa_flags = flags;
3103 		}
3104 		kmem_free(pwa, sizeof (struct watched_area));
3105 	} else {
3106 		avl_insert(&p->p_warea, pwa, where);
3107 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3108 	}
3109 
3110 	return (error);
3111 }
3112 
3113 /*
3114  * Utility routine for clearing a watched area in the process.
3115  * Must be an exact match of the virtual address.
3116  * size and flags don't matter.
3117  */
3118 int
3119 clear_watched_area(proc_t *p, struct watched_area *pwa)
3120 {
3121 	struct watched_area *found;
3122 
3123 	/* we must not be holding p->p_lock, but the process must be locked */
3124 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3125 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3126 
3127 
3128 	if (!pr_watch_active(p)) {
3129 		kmem_free(pwa, sizeof (struct watched_area));
3130 		return (0);
3131 	}
3132 
3133 	/*
3134 	 * Look for a matching address in the watched areas.  If a match is
3135 	 * found, clear the old watched area and adjust the watched page(s).  It
3136 	 * is not an error if there is no match.
3137 	 */
3138 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3139 	    found->wa_vaddr == pwa->wa_vaddr) {
3140 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3141 		    found->wa_flags);
3142 		avl_remove(&p->p_warea, found);
3143 		kmem_free(found, sizeof (struct watched_area));
3144 	}
3145 
3146 	kmem_free(pwa, sizeof (struct watched_area));
3147 
3148 	/*
3149 	 * If we removed the last watched area from the process, disable
3150 	 * watchpoints.
3151 	 */
3152 	if (!pr_watch_active(p)) {
3153 		kthread_t *t;
3154 
3155 		mutex_enter(&p->p_lock);
3156 		if ((t = p->p_tlist) != NULL) {
3157 			do {
3158 				watch_disable(t);
3159 			} while ((t = t->t_forw) != p->p_tlist);
3160 		}
3161 		mutex_exit(&p->p_lock);
3162 	}
3163 
3164 	return (0);
3165 }
3166 
3167 /*
3168  * Frees all the watched_area structures
3169  */
3170 void
3171 pr_free_watchpoints(proc_t *p)
3172 {
3173 	struct watched_area *delp;
3174 	void *cookie;
3175 
3176 	cookie = NULL;
3177 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3178 		kmem_free(delp, sizeof (struct watched_area));
3179 
3180 	avl_destroy(&p->p_warea);
3181 }
3182 
3183 /*
3184  * This one is called by the traced process to unwatch all the
3185  * pages while deallocating the list of watched_page structs.
3186  */
3187 void
3188 pr_free_watched_pages(proc_t *p)
3189 {
3190 	struct as *as = p->p_as;
3191 	struct watched_page *pwp;
3192 	uint_t prot;
3193 	int    retrycnt, err;
3194 	void *cookie;
3195 
3196 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3197 		return;
3198 
3199 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3200 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3201 
3202 	pwp = avl_first(&as->a_wpage);
3203 
3204 	cookie = NULL;
3205 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3206 		retrycnt = 0;
3207 		if ((prot = pwp->wp_oprot) != 0) {
3208 			caddr_t addr = pwp->wp_vaddr;
3209 			struct seg *seg;
3210 		retry:
3211 
3212 			if ((pwp->wp_prot != prot ||
3213 			    (pwp->wp_flags & WP_NOWATCH)) &&
3214 			    (seg = as_segat(as, addr)) != NULL) {
3215 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3216 				if (err == IE_RETRY) {
3217 					ASSERT(retrycnt == 0);
3218 					retrycnt++;
3219 					goto retry;
3220 				}
3221 			}
3222 		}
3223 		kmem_free(pwp, sizeof (struct watched_page));
3224 	}
3225 
3226 	avl_destroy(&as->a_wpage);
3227 	p->p_wprot = NULL;
3228 
3229 	AS_LOCK_EXIT(as, &as->a_lock);
3230 }
3231 
3232 /*
3233  * Insert a watched area into the list of watched pages.
3234  * If oflags is zero then we are adding a new watched area.
3235  * Otherwise we are changing the flags of an existing watched area.
3236  */
3237 static int
3238 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3239 	ulong_t flags, ulong_t oflags)
3240 {
3241 	struct as *as = p->p_as;
3242 	avl_tree_t *pwp_tree;
3243 	struct watched_page *pwp, *newpwp;
3244 	struct watched_page tpw;
3245 	avl_index_t where;
3246 	struct seg *seg;
3247 	uint_t prot;
3248 	caddr_t addr;
3249 
3250 	/*
3251 	 * We need to pre-allocate a list of structures before we grab the
3252 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3253 	 * held.
3254 	 */
3255 	newpwp = NULL;
3256 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3257 	    addr < eaddr; addr += PAGESIZE) {
3258 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3259 		pwp->wp_list = newpwp;
3260 		newpwp = pwp;
3261 	}
3262 
3263 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3264 
3265 	/*
3266 	 * Search for an existing watched page to contain the watched area.
3267 	 * If none is found, grab a new one from the available list
3268 	 * and insert it in the active list, keeping the list sorted
3269 	 * by user-level virtual address.
3270 	 */
3271 	if (p->p_flag & SVFWAIT)
3272 		pwp_tree = &p->p_wpage;
3273 	else
3274 		pwp_tree = &as->a_wpage;
3275 
3276 again:
3277 	if (avl_numnodes(pwp_tree) > prnwatch) {
3278 		AS_LOCK_EXIT(as, &as->a_lock);
3279 		while (newpwp != NULL) {
3280 			pwp = newpwp->wp_list;
3281 			kmem_free(newpwp, sizeof (struct watched_page));
3282 			newpwp = pwp;
3283 		}
3284 		return (E2BIG);
3285 	}
3286 
3287 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3288 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3289 		pwp = newpwp;
3290 		newpwp = newpwp->wp_list;
3291 		pwp->wp_list = NULL;
3292 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3293 		    (uintptr_t)PAGEMASK);
3294 		avl_insert(pwp_tree, pwp, where);
3295 	}
3296 
3297 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3298 
3299 	if (oflags & WA_READ)
3300 		pwp->wp_read--;
3301 	if (oflags & WA_WRITE)
3302 		pwp->wp_write--;
3303 	if (oflags & WA_EXEC)
3304 		pwp->wp_exec--;
3305 
3306 	ASSERT(pwp->wp_read >= 0);
3307 	ASSERT(pwp->wp_write >= 0);
3308 	ASSERT(pwp->wp_exec >= 0);
3309 
3310 	if (flags & WA_READ)
3311 		pwp->wp_read++;
3312 	if (flags & WA_WRITE)
3313 		pwp->wp_write++;
3314 	if (flags & WA_EXEC)
3315 		pwp->wp_exec++;
3316 
3317 	if (!(p->p_flag & SVFWAIT)) {
3318 		vaddr = pwp->wp_vaddr;
3319 		if (pwp->wp_oprot == 0 &&
3320 		    (seg = as_segat(as, vaddr)) != NULL) {
3321 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3322 			pwp->wp_oprot = (uchar_t)prot;
3323 			pwp->wp_prot = (uchar_t)prot;
3324 		}
3325 		if (pwp->wp_oprot != 0) {
3326 			prot = pwp->wp_oprot;
3327 			if (pwp->wp_read)
3328 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3329 			if (pwp->wp_write)
3330 				prot &= ~PROT_WRITE;
3331 			if (pwp->wp_exec)
3332 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3333 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3334 			    pwp->wp_prot != prot &&
3335 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3336 				pwp->wp_flags |= WP_SETPROT;
3337 				pwp->wp_list = p->p_wprot;
3338 				p->p_wprot = pwp;
3339 			}
3340 			pwp->wp_prot = (uchar_t)prot;
3341 		}
3342 	}
3343 
3344 	/*
3345 	 * If the watched area extends into the next page then do
3346 	 * it over again with the virtual address of the next page.
3347 	 */
3348 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3349 		goto again;
3350 
3351 	AS_LOCK_EXIT(as, &as->a_lock);
3352 
3353 	/*
3354 	 * Free any pages we may have over-allocated
3355 	 */
3356 	while (newpwp != NULL) {
3357 		pwp = newpwp->wp_list;
3358 		kmem_free(newpwp, sizeof (struct watched_page));
3359 		newpwp = pwp;
3360 	}
3361 
3362 	return (0);
3363 }
3364 
3365 /*
3366  * Remove a watched area from the list of watched pages.
3367  * A watched area may extend over more than one page.
3368  */
3369 static void
3370 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3371 {
3372 	struct as *as = p->p_as;
3373 	struct watched_page *pwp;
3374 	struct watched_page tpw;
3375 	avl_tree_t *tree;
3376 	avl_index_t where;
3377 
3378 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3379 
3380 	if (p->p_flag & SVFWAIT)
3381 		tree = &p->p_wpage;
3382 	else
3383 		tree = &as->a_wpage;
3384 
3385 	tpw.wp_vaddr = vaddr =
3386 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3387 	pwp = avl_find(tree, &tpw, &where);
3388 	if (pwp == NULL)
3389 		pwp = avl_nearest(tree, where, AVL_AFTER);
3390 
3391 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3392 		ASSERT(vaddr <=  pwp->wp_vaddr);
3393 
3394 		if (flags & WA_READ)
3395 			pwp->wp_read--;
3396 		if (flags & WA_WRITE)
3397 			pwp->wp_write--;
3398 		if (flags & WA_EXEC)
3399 			pwp->wp_exec--;
3400 
3401 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3402 			/*
3403 			 * Reset the hat layer's protections on this page.
3404 			 */
3405 			if (pwp->wp_oprot != 0) {
3406 				uint_t prot = pwp->wp_oprot;
3407 
3408 				if (pwp->wp_read)
3409 					prot &=
3410 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3411 				if (pwp->wp_write)
3412 					prot &= ~PROT_WRITE;
3413 				if (pwp->wp_exec)
3414 					prot &=
3415 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3416 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3417 				    pwp->wp_prot != prot &&
3418 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3419 					pwp->wp_flags |= WP_SETPROT;
3420 					pwp->wp_list = p->p_wprot;
3421 					p->p_wprot = pwp;
3422 				}
3423 				pwp->wp_prot = (uchar_t)prot;
3424 			}
3425 		} else {
3426 			/*
3427 			 * No watched areas remain in this page.
3428 			 * Reset everything to normal.
3429 			 */
3430 			if (pwp->wp_oprot != 0) {
3431 				pwp->wp_prot = pwp->wp_oprot;
3432 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3433 					pwp->wp_flags |= WP_SETPROT;
3434 					pwp->wp_list = p->p_wprot;
3435 					p->p_wprot = pwp;
3436 				}
3437 			}
3438 		}
3439 
3440 		pwp = AVL_NEXT(tree, pwp);
3441 	}
3442 
3443 	AS_LOCK_EXIT(as, &as->a_lock);
3444 }
3445 
3446 /*
3447  * Return the original protections for the specified page.
3448  */
3449 static void
3450 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3451 {
3452 	struct watched_page *pwp;
3453 	struct watched_page tpw;
3454 
3455 	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3456 
3457 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3458 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3459 		*prot = pwp->wp_oprot;
3460 }
3461 
3462 static prpagev_t *
3463 pr_pagev_create(struct seg *seg, int check_noreserve)
3464 {
3465 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3466 	size_t total_pages = seg_pages(seg);
3467 
3468 	/*
3469 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3470 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3471 	 * to about a megabyte of kernel heap by default.
3472 	 */
3473 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3474 	pagev->pg_pnbase = 0;
3475 
3476 	pagev->pg_protv =
3477 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3478 
3479 	if (check_noreserve)
3480 		pagev->pg_incore =
3481 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3482 	else
3483 		pagev->pg_incore = NULL;
3484 
3485 	return (pagev);
3486 }
3487 
3488 static void
3489 pr_pagev_destroy(prpagev_t *pagev)
3490 {
3491 	if (pagev->pg_incore != NULL)
3492 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3493 
3494 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3495 	kmem_free(pagev, sizeof (prpagev_t));
3496 }
3497 
3498 static caddr_t
3499 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3500 {
3501 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3502 	ulong_t pn, pnlim;
3503 	caddr_t saddr;
3504 	size_t len;
3505 
3506 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3507 
3508 	if (addr == eaddr)
3509 		return (eaddr);
3510 
3511 refill:
3512 	ASSERT(addr < eaddr);
3513 	pagev->pg_pnbase = seg_page(seg, addr);
3514 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3515 	saddr = addr;
3516 
3517 	if (lastpg < pnlim)
3518 		len = (size_t)(eaddr - addr);
3519 	else
3520 		len = pagev->pg_npages * PAGESIZE;
3521 
3522 	if (pagev->pg_incore != NULL) {
3523 		/*
3524 		 * INCORE cleverly has different semantics than GETPROT:
3525 		 * it returns info on pages up to but NOT including addr + len.
3526 		 */
3527 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3528 		pn = pagev->pg_pnbase;
3529 
3530 		do {
3531 			/*
3532 			 * Guilty knowledge here:  We know that segvn_incore
3533 			 * returns more than just the low-order bit that
3534 			 * indicates the page is actually in memory.  If any
3535 			 * bits are set, then the page has backing store.
3536 			 */
3537 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3538 				goto out;
3539 
3540 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3541 
3542 		/*
3543 		 * If we examined all the pages in the vector but we're not
3544 		 * at the end of the segment, take another lap.
3545 		 */
3546 		if (addr < eaddr)
3547 			goto refill;
3548 	}
3549 
3550 	/*
3551 	 * Need to take len - 1 because addr + len is the address of the
3552 	 * first byte of the page just past the end of what we want.
3553 	 */
3554 out:
3555 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3556 	return (addr);
3557 }
3558 
3559 static caddr_t
3560 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3561     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3562 {
3563 	/*
3564 	 * Our starting address is either the specified address, or the base
3565 	 * address from the start of the pagev.  If the latter is greater,
3566 	 * this means a previous call to pr_pagev_fill has already scanned
3567 	 * further than the end of the previous mapping.
3568 	 */
3569 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3570 	caddr_t addr = MAX(*saddrp, base);
3571 	ulong_t pn = seg_page(seg, addr);
3572 	uint_t prot, nprot;
3573 
3574 	/*
3575 	 * If we're dealing with noreserve pages, then advance addr to
3576 	 * the address of the next page which has backing store.
3577 	 */
3578 	if (pagev->pg_incore != NULL) {
3579 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3580 			if ((addr += PAGESIZE) == eaddr) {
3581 				*saddrp = addr;
3582 				prot = 0;
3583 				goto out;
3584 			}
3585 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3586 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3587 				if (addr == eaddr) {
3588 					*saddrp = addr;
3589 					prot = 0;
3590 					goto out;
3591 				}
3592 				pn = seg_page(seg, addr);
3593 			}
3594 		}
3595 	}
3596 
3597 	/*
3598 	 * Get the protections on the page corresponding to addr.
3599 	 */
3600 	pn = seg_page(seg, addr);
3601 	ASSERT(pn >= pagev->pg_pnbase);
3602 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3603 
3604 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3605 	getwatchprot(seg->s_as, addr, &prot);
3606 	*saddrp = addr;
3607 
3608 	/*
3609 	 * Now loop until we find a backed page with different protections
3610 	 * or we reach the end of this segment.
3611 	 */
3612 	while ((addr += PAGESIZE) < eaddr) {
3613 		/*
3614 		 * If pn has advanced to the page number following what we
3615 		 * have information on, refill the page vector and reset
3616 		 * addr and pn.  If pr_pagev_fill does not return the
3617 		 * address of the next page, we have a discontiguity and
3618 		 * thus have reached the end of the current mapping.
3619 		 */
3620 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3621 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3622 			if (naddr != addr)
3623 				goto out;
3624 			pn = seg_page(seg, addr);
3625 		}
3626 
3627 		/*
3628 		 * The previous page's protections are in prot, and it has
3629 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3630 		 * then end this mapping and return the previous protections.
3631 		 */
3632 		if (pagev->pg_incore != NULL &&
3633 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3634 			break;
3635 
3636 		/*
3637 		 * Otherwise end the mapping if this page's protections (nprot)
3638 		 * are different than those in the previous page (prot).
3639 		 */
3640 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3641 		getwatchprot(seg->s_as, addr, &nprot);
3642 
3643 		if (nprot != prot)
3644 			break;
3645 	}
3646 
3647 out:
3648 	*protp = prot;
3649 	return (addr);
3650 }
3651 
3652 size_t
3653 pr_getsegsize(struct seg *seg, int reserved)
3654 {
3655 	size_t size = seg->s_size;
3656 
3657 	/*
3658 	 * If we're interested in the reserved space, return the size of the
3659 	 * segment itself.  Everything else in this function is a special case
3660 	 * to determine the actual underlying size of various segment types.
3661 	 */
3662 	if (reserved)
3663 		return (size);
3664 
3665 	/*
3666 	 * If this is a segvn mapping of a regular file, return the smaller
3667 	 * of the segment size and the remaining size of the file beyond
3668 	 * the file offset corresponding to seg->s_base.
3669 	 */
3670 	if (seg->s_ops == &segvn_ops) {
3671 		vattr_t vattr;
3672 		vnode_t *vp;
3673 
3674 		vattr.va_mask = AT_SIZE;
3675 
3676 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3677 		    vp != NULL && vp->v_type == VREG &&
3678 		    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
3679 
3680 			u_offset_t fsize = vattr.va_size;
3681 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3682 
3683 			if (fsize < offset)
3684 				fsize = 0;
3685 			else
3686 				fsize -= offset;
3687 
3688 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3689 
3690 			if (fsize < (u_offset_t)size)
3691 				size = (size_t)fsize;
3692 		}
3693 
3694 		return (size);
3695 	}
3696 
3697 	/*
3698 	 * If this is an ISM shared segment, don't include pages that are
3699 	 * beyond the real size of the spt segment that backs it.
3700 	 */
3701 	if (seg->s_ops == &segspt_shmops)
3702 		return (MIN(spt_realsize(seg), size));
3703 
3704 	/*
3705 	 * If this is segment is a mapping from /dev/null, then this is a
3706 	 * reservation of virtual address space and has no actual size.
3707 	 * Such segments are backed by segdev and have type set to neither
3708 	 * MAP_SHARED nor MAP_PRIVATE.
3709 	 */
3710 	if (seg->s_ops == &segdev_ops &&
3711 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3712 		(MAP_SHARED | MAP_PRIVATE)) == 0))
3713 		return (0);
3714 
3715 	/*
3716 	 * If this segment doesn't match one of the special types we handle,
3717 	 * just return the size of the segment itself.
3718 	 */
3719 	return (size);
3720 }
3721 
3722 uint_t
3723 pr_getprot(struct seg *seg, int reserved, void **tmp,
3724 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3725 {
3726 	struct as *as = seg->s_as;
3727 
3728 	caddr_t saddr = *saddrp;
3729 	caddr_t naddr;
3730 
3731 	int check_noreserve;
3732 	uint_t prot;
3733 
3734 	union {
3735 		struct segvn_data *svd;
3736 		struct segdev_data *sdp;
3737 		void *data;
3738 	} s;
3739 
3740 	s.data = seg->s_data;
3741 
3742 	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3743 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3744 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3745 
3746 	/*
3747 	 * Don't include MAP_NORESERVE pages in the address range
3748 	 * unless their mappings have actually materialized.
3749 	 * We cheat by knowing that segvn is the only segment
3750 	 * driver that supports MAP_NORESERVE.
3751 	 */
3752 	check_noreserve =
3753 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3754 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3755 	    (s.svd->flags & MAP_NORESERVE));
3756 
3757 	/*
3758 	 * Examine every page only as a last resort.  We use guilty knowledge
3759 	 * of segvn and segdev to avoid this: if there are no per-page
3760 	 * protections present in the segment and we don't care about
3761 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3762 	 */
3763 	if (!check_noreserve && saddr == seg->s_base &&
3764 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3765 		prot = s.svd->prot;
3766 		getwatchprot(as, saddr, &prot);
3767 		naddr = eaddr;
3768 
3769 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3770 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3771 		prot = s.sdp->prot;
3772 		getwatchprot(as, saddr, &prot);
3773 		naddr = eaddr;
3774 
3775 	} else {
3776 		prpagev_t *pagev;
3777 
3778 		/*
3779 		 * If addr is sitting at the start of the segment, then
3780 		 * create a page vector to store protection and incore
3781 		 * information for pages in the segment, and fill it.
3782 		 * Otherwise, we expect *tmp to address the prpagev_t
3783 		 * allocated by a previous call to this function.
3784 		 */
3785 		if (saddr == seg->s_base) {
3786 			pagev = pr_pagev_create(seg, check_noreserve);
3787 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3788 
3789 			ASSERT(*tmp == NULL);
3790 			*tmp = pagev;
3791 
3792 			ASSERT(saddr <= eaddr);
3793 			*saddrp = saddr;
3794 
3795 			if (saddr == eaddr) {
3796 				naddr = saddr;
3797 				prot = 0;
3798 				goto out;
3799 			}
3800 
3801 		} else {
3802 			ASSERT(*tmp != NULL);
3803 			pagev = (prpagev_t *)*tmp;
3804 		}
3805 
3806 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3807 		ASSERT(naddr <= eaddr);
3808 	}
3809 
3810 out:
3811 	if (naddr == eaddr)
3812 		pr_getprot_done(tmp);
3813 	*naddrp = naddr;
3814 	return (prot);
3815 }
3816 
3817 void
3818 pr_getprot_done(void **tmp)
3819 {
3820 	if (*tmp != NULL) {
3821 		pr_pagev_destroy((prpagev_t *)*tmp);
3822 		*tmp = NULL;
3823 	}
3824 }
3825 
3826 /*
3827  * Return true iff the vnode is a /proc file from the object directory.
3828  */
3829 int
3830 pr_isobject(vnode_t *vp)
3831 {
3832 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3833 }
3834 
3835 /*
3836  * Return true iff the vnode is a /proc file opened by the process itself.
3837  */
3838 int
3839 pr_isself(vnode_t *vp)
3840 {
3841 	/*
3842 	 * XXX: To retain binary compatibility with the old
3843 	 * ioctl()-based version of /proc, we exempt self-opens
3844 	 * of /proc/<pid> from being marked close-on-exec.
3845 	 */
3846 	return (vn_matchops(vp, prvnodeops) &&
3847 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3848 	    VTOP(vp)->pr_type != PR_PIDDIR);
3849 }
3850 
3851 static ssize_t
3852 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3853 {
3854 	ssize_t pagesize, hatsize;
3855 
3856 	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3857 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3858 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3859 	ASSERT(saddr < eaddr);
3860 
3861 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3862 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3863 	ASSERT(pagesize != 0);
3864 
3865 	if (pagesize == -1)
3866 		pagesize = PAGESIZE;
3867 
3868 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3869 
3870 	while (saddr < eaddr) {
3871 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3872 			break;
3873 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3874 		saddr += pagesize;
3875 	}
3876 
3877 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3878 	return (hatsize);
3879 }
3880 
3881 /*
3882  * Return an array of structures with extended memory map information.
3883  * We allocate here; the caller must deallocate.
3884  */
3885 int
3886 prgetxmap(proc_t *p, list_t *iolhead)
3887 {
3888 	struct as *as = p->p_as;
3889 	prxmap_t *mp;
3890 	struct seg *seg;
3891 	struct seg *brkseg, *stkseg;
3892 	struct vnode *vp;
3893 	struct vattr vattr;
3894 	uint_t prot;
3895 
3896 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3897 
3898 	/*
3899 	 * Request an initial buffer size that doesn't waste memory
3900 	 * if the address space has only a small number of segments.
3901 	 */
3902 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3903 
3904 	if ((seg = AS_SEGFIRST(as)) == NULL)
3905 		return (0);
3906 
3907 	brkseg = break_seg(p);
3908 	stkseg = as_segat(as, prgetstackbase(p));
3909 
3910 	do {
3911 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3912 		caddr_t saddr, naddr, baddr;
3913 		void *tmp = NULL;
3914 		ssize_t psz;
3915 		char *parr;
3916 		uint64_t npages;
3917 		uint64_t pagenum;
3918 
3919 		/*
3920 		 * Segment loop part one: iterate from the base of the segment
3921 		 * to its end, pausing at each address boundary (baddr) between
3922 		 * ranges that have different virtual memory protections.
3923 		 */
3924 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3925 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3926 			ASSERT(baddr >= saddr && baddr <= eaddr);
3927 
3928 			/*
3929 			 * Segment loop part two: iterate from the current
3930 			 * position to the end of the protection boundary,
3931 			 * pausing at each address boundary (naddr) between
3932 			 * ranges that have different underlying page sizes.
3933 			 */
3934 			for (; saddr < baddr; saddr = naddr) {
3935 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3936 				ASSERT(naddr >= saddr && naddr <= baddr);
3937 
3938 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3939 
3940 				mp->pr_vaddr = (uintptr_t)saddr;
3941 				mp->pr_size = naddr - saddr;
3942 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3943 				mp->pr_mflags = 0;
3944 				if (prot & PROT_READ)
3945 					mp->pr_mflags |= MA_READ;
3946 				if (prot & PROT_WRITE)
3947 					mp->pr_mflags |= MA_WRITE;
3948 				if (prot & PROT_EXEC)
3949 					mp->pr_mflags |= MA_EXEC;
3950 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3951 					mp->pr_mflags |= MA_SHARED;
3952 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3953 					mp->pr_mflags |= MA_NORESERVE;
3954 				if (seg->s_ops == &segspt_shmops ||
3955 				    (seg->s_ops == &segvn_ops &&
3956 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3957 				    vp == NULL)))
3958 					mp->pr_mflags |= MA_ANON;
3959 				if (seg == brkseg)
3960 					mp->pr_mflags |= MA_BREAK;
3961 				else if (seg == stkseg)
3962 					mp->pr_mflags |= MA_STACK;
3963 				if (seg->s_ops == &segspt_shmops)
3964 					mp->pr_mflags |= MA_ISM | MA_SHM;
3965 
3966 				mp->pr_pagesize = PAGESIZE;
3967 				if (psz == -1) {
3968 					mp->pr_hatpagesize = 0;
3969 				} else {
3970 					mp->pr_hatpagesize = psz;
3971 				}
3972 
3973 				/*
3974 				 * Manufacture a filename for the "object" dir.
3975 				 */
3976 				mp->pr_dev = PRNODEV;
3977 				vattr.va_mask = AT_FSID|AT_NODEID;
3978 				if (seg->s_ops == &segvn_ops &&
3979 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
3980 				    vp != NULL && vp->v_type == VREG &&
3981 				    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
3982 					mp->pr_dev = vattr.va_fsid;
3983 					mp->pr_ino = vattr.va_nodeid;
3984 					if (vp == p->p_exec)
3985 						(void) strcpy(mp->pr_mapname,
3986 						    "a.out");
3987 					else
3988 						pr_object_name(mp->pr_mapname,
3989 						    vp, &vattr);
3990 				}
3991 
3992 				/*
3993 				 * Get the SysV shared memory id, if any.
3994 				 */
3995 				if ((mp->pr_mflags & MA_SHARED) &&
3996 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
3997 				    seg->s_base)) != SHMID_NONE) {
3998 					if (mp->pr_shmid == SHMID_FREE)
3999 						mp->pr_shmid = -1;
4000 
4001 					mp->pr_mflags |= MA_SHM;
4002 				} else {
4003 					mp->pr_shmid = -1;
4004 				}
4005 
4006 				npages = ((uintptr_t)(naddr - saddr)) >>
4007 				    PAGESHIFT;
4008 				parr = kmem_zalloc(npages, KM_SLEEP);
4009 
4010 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4011 
4012 				for (pagenum = 0; pagenum < npages; pagenum++) {
4013 					if (parr[pagenum] & SEG_PAGE_INCORE)
4014 						mp->pr_rss++;
4015 					if (parr[pagenum] & SEG_PAGE_ANON)
4016 						mp->pr_anon++;
4017 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4018 						mp->pr_locked++;
4019 				}
4020 				kmem_free(parr, npages);
4021 			}
4022 		}
4023 		ASSERT(tmp == NULL);
4024 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4025 
4026 	return (0);
4027 }
4028 
4029 /*
4030  * Return the process's credentials.  We don't need a 32-bit equivalent of
4031  * this function because prcred_t and prcred32_t are actually the same.
4032  */
4033 void
4034 prgetcred(proc_t *p, prcred_t *pcrp)
4035 {
4036 	mutex_enter(&p->p_crlock);
4037 	cred2prcred(p->p_cred, pcrp);
4038 	mutex_exit(&p->p_crlock);
4039 }
4040 
4041 /*
4042  * Compute actual size of the prpriv_t structure.
4043  */
4044 
4045 size_t
4046 prgetprivsize(void)
4047 {
4048 	return (priv_prgetprivsize(NULL));
4049 }
4050 
4051 /*
4052  * Return the process's privileges.  We don't need a 32-bit equivalent of
4053  * this function because prpriv_t and prpriv32_t are actually the same.
4054  */
4055 void
4056 prgetpriv(proc_t *p, prpriv_t *pprp)
4057 {
4058 	mutex_enter(&p->p_crlock);
4059 	cred2prpriv(p->p_cred, pprp);
4060 	mutex_exit(&p->p_crlock);
4061 }
4062 
4063 #ifdef _SYSCALL32_IMPL
4064 /*
4065  * Return an array of structures with HAT memory map information.
4066  * We allocate here; the caller must deallocate.
4067  */
4068 int
4069 prgetxmap32(proc_t *p, list_t *iolhead)
4070 {
4071 	struct as *as = p->p_as;
4072 	prxmap32_t *mp;
4073 	struct seg *seg;
4074 	struct seg *brkseg, *stkseg;
4075 	struct vnode *vp;
4076 	struct vattr vattr;
4077 	uint_t prot;
4078 
4079 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4080 
4081 	/*
4082 	 * Request an initial buffer size that doesn't waste memory
4083 	 * if the address space has only a small number of segments.
4084 	 */
4085 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4086 
4087 	if ((seg = AS_SEGFIRST(as)) == NULL)
4088 		return (0);
4089 
4090 	brkseg = break_seg(p);
4091 	stkseg = as_segat(as, prgetstackbase(p));
4092 
4093 	do {
4094 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4095 		caddr_t saddr, naddr, baddr;
4096 		void *tmp = NULL;
4097 		ssize_t psz;
4098 		char *parr;
4099 		uint64_t npages;
4100 		uint64_t pagenum;
4101 
4102 		/*
4103 		 * Segment loop part one: iterate from the base of the segment
4104 		 * to its end, pausing at each address boundary (baddr) between
4105 		 * ranges that have different virtual memory protections.
4106 		 */
4107 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4108 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4109 			ASSERT(baddr >= saddr && baddr <= eaddr);
4110 
4111 			/*
4112 			 * Segment loop part two: iterate from the current
4113 			 * position to the end of the protection boundary,
4114 			 * pausing at each address boundary (naddr) between
4115 			 * ranges that have different underlying page sizes.
4116 			 */
4117 			for (; saddr < baddr; saddr = naddr) {
4118 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4119 				ASSERT(naddr >= saddr && naddr <= baddr);
4120 
4121 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4122 
4123 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4124 				mp->pr_size = (size32_t)(naddr - saddr);
4125 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4126 				mp->pr_mflags = 0;
4127 				if (prot & PROT_READ)
4128 					mp->pr_mflags |= MA_READ;
4129 				if (prot & PROT_WRITE)
4130 					mp->pr_mflags |= MA_WRITE;
4131 				if (prot & PROT_EXEC)
4132 					mp->pr_mflags |= MA_EXEC;
4133 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4134 					mp->pr_mflags |= MA_SHARED;
4135 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4136 					mp->pr_mflags |= MA_NORESERVE;
4137 				if (seg->s_ops == &segspt_shmops ||
4138 				    (seg->s_ops == &segvn_ops &&
4139 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4140 				    vp == NULL)))
4141 					mp->pr_mflags |= MA_ANON;
4142 				if (seg == brkseg)
4143 					mp->pr_mflags |= MA_BREAK;
4144 				else if (seg == stkseg)
4145 					mp->pr_mflags |= MA_STACK;
4146 				if (seg->s_ops == &segspt_shmops)
4147 					mp->pr_mflags |= MA_ISM | MA_SHM;
4148 
4149 				mp->pr_pagesize = PAGESIZE;
4150 				if (psz == -1) {
4151 					mp->pr_hatpagesize = 0;
4152 				} else {
4153 					mp->pr_hatpagesize = psz;
4154 				}
4155 
4156 				/*
4157 				 * Manufacture a filename for the "object" dir.
4158 				 */
4159 				mp->pr_dev = PRNODEV32;
4160 				vattr.va_mask = AT_FSID|AT_NODEID;
4161 				if (seg->s_ops == &segvn_ops &&
4162 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4163 				    vp != NULL && vp->v_type == VREG &&
4164 				    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
4165 					(void) cmpldev(&mp->pr_dev,
4166 					    vattr.va_fsid);
4167 					mp->pr_ino = vattr.va_nodeid;
4168 					if (vp == p->p_exec)
4169 						(void) strcpy(mp->pr_mapname,
4170 						    "a.out");
4171 					else
4172 						pr_object_name(mp->pr_mapname,
4173 						    vp, &vattr);
4174 				}
4175 
4176 				/*
4177 				 * Get the SysV shared memory id, if any.
4178 				 */
4179 				if ((mp->pr_mflags & MA_SHARED) &&
4180 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4181 				    seg->s_base)) != SHMID_NONE) {
4182 					if (mp->pr_shmid == SHMID_FREE)
4183 						mp->pr_shmid = -1;
4184 
4185 					mp->pr_mflags |= MA_SHM;
4186 				} else {
4187 					mp->pr_shmid = -1;
4188 				}
4189 
4190 				npages = ((uintptr_t)(naddr - saddr)) >>
4191 				    PAGESHIFT;
4192 				parr = kmem_zalloc(npages, KM_SLEEP);
4193 
4194 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4195 
4196 				for (pagenum = 0; pagenum < npages; pagenum++) {
4197 					if (parr[pagenum] & SEG_PAGE_INCORE)
4198 						mp->pr_rss++;
4199 					if (parr[pagenum] & SEG_PAGE_ANON)
4200 						mp->pr_anon++;
4201 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4202 						mp->pr_locked++;
4203 				}
4204 				kmem_free(parr, npages);
4205 			}
4206 		}
4207 		ASSERT(tmp == NULL);
4208 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4209 
4210 	return (0);
4211 }
4212 #endif	/* _SYSCALL32_IMPL */
4213