xref: /titanic_44/usr/src/uts/common/fs/proc/prsubr.c (revision 15a2c75300554c829663ead6bc3489d86c83cde2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cred.h>
37 #include <sys/priv.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/inline.h>
41 #include <sys/kmem.h>
42 #include <sys/mman.h>
43 #include <sys/proc.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <vm/as.h>
70 #include <vm/rm.h>
71 #include <vm/seg.h>
72 #include <vm/seg_vn.h>
73 #include <vm/seg_dev.h>
74 #include <vm/seg_spt.h>
75 #include <vm/page.h>
76 #include <sys/vmparam.h>
77 #include <sys/swap.h>
78 #include <fs/proc/prdata.h>
79 #include <sys/task.h>
80 #include <sys/project.h>
81 #include <sys/contract_impl.h>
82 #include <sys/contract/process.h>
83 #include <sys/contract/process_impl.h>
84 #include <sys/schedctl.h>
85 #include <sys/pool.h>
86 #include <sys/zone.h>
87 #include <sys/atomic.h>
88 #include <sys/sdt.h>
89 
90 #define	MAX_ITERS_SPIN	5
91 
92 typedef struct prpagev {
93 	uint_t *pg_protv;	/* vector of page permissions */
94 	char *pg_incore;	/* vector of incore flags */
95 	size_t pg_npages;	/* number of pages in protv and incore */
96 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
97 } prpagev_t;
98 
99 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
100 
101 extern struct seg_ops segdev_ops;	/* needs a header file */
102 extern struct seg_ops segspt_shmops;	/* needs a header file */
103 
104 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
105 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
106 
107 /*
108  * Choose an lwp from the complete set of lwps for the process.
109  * This is called for any operation applied to the process
110  * file descriptor that requires an lwp to operate upon.
111  *
112  * Returns a pointer to the thread for the selected LWP,
113  * and with the dispatcher lock held for the thread.
114  *
115  * The algorithm for choosing an lwp is critical for /proc semantics;
116  * don't touch this code unless you know all of the implications.
117  */
118 kthread_t *
119 prchoose(proc_t *p)
120 {
121 	kthread_t *t;
122 	kthread_t *t_onproc = NULL;	/* running on processor */
123 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
124 	kthread_t *t_sleep = NULL;	/* sleeping */
125 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
126 	kthread_t *t_susp = NULL;	/* suspended stop */
127 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
128 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
129 	kthread_t *t_req = NULL;	/* requested stop */
130 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
131 
132 	ASSERT(MUTEX_HELD(&p->p_lock));
133 
134 	/*
135 	 * If the agent lwp exists, it takes precedence over all others.
136 	 */
137 	if ((t = p->p_agenttp) != NULL) {
138 		thread_lock(t);
139 		return (t);
140 	}
141 
142 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
143 		return (t);
144 	do {		/* for eacn lwp in the process */
145 		if (VSTOPPED(t)) {	/* virtually stopped */
146 			if (t_req == NULL)
147 				t_req = t;
148 			continue;
149 		}
150 
151 		thread_lock(t);		/* make sure thread is in good state */
152 		switch (t->t_state) {
153 		default:
154 			panic("prchoose: bad thread state %d, thread 0x%p",
155 			    t->t_state, (void *)t);
156 			/*NOTREACHED*/
157 		case TS_SLEEP:
158 			/* this is filthy */
159 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 			    t->t_wchan0 == NULL) {
161 				if (t_hold == NULL)
162 					t_hold = t;
163 			} else {
164 				if (t_sleep == NULL)
165 					t_sleep = t;
166 			}
167 			break;
168 		case TS_RUN:
169 		case TS_WAIT:
170 			if (t_run == NULL)
171 				t_run = t;
172 			break;
173 		case TS_ONPROC:
174 			if (t_onproc == NULL)
175 				t_onproc = t;
176 			break;
177 		case TS_ZOMB:		/* last possible choice */
178 			break;
179 		case TS_STOPPED:
180 			switch (t->t_whystop) {
181 			case PR_SUSPENDED:
182 				if (t_susp == NULL)
183 					t_susp = t;
184 				break;
185 			case PR_JOBCONTROL:
186 				if (t->t_proc_flag & TP_PRSTOP) {
187 					if (t_jdstop == NULL)
188 						t_jdstop = t;
189 				} else {
190 					if (t_jstop == NULL)
191 						t_jstop = t;
192 				}
193 				break;
194 			case PR_REQUESTED:
195 				if (t_req == NULL)
196 					t_req = t;
197 				break;
198 			case PR_SYSENTRY:
199 			case PR_SYSEXIT:
200 			case PR_SIGNALLED:
201 			case PR_FAULTED:
202 				/*
203 				 * Make an lwp calling exit() be the
204 				 * last lwp seen in the process.
205 				 */
206 				if (t_istop == NULL ||
207 				    (t_istop->t_whystop == PR_SYSENTRY &&
208 				    t_istop->t_whatstop == SYS_exit))
209 					t_istop = t;
210 				break;
211 			case PR_CHECKPOINT:	/* can't happen? */
212 				break;
213 			default:
214 				panic("prchoose: bad t_whystop %d, thread 0x%p",
215 				    t->t_whystop, (void *)t);
216 				/*NOTREACHED*/
217 			}
218 			break;
219 		}
220 		thread_unlock(t);
221 	} while ((t = t->t_forw) != p->p_tlist);
222 
223 	if (t_onproc)
224 		t = t_onproc;
225 	else if (t_run)
226 		t = t_run;
227 	else if (t_sleep)
228 		t = t_sleep;
229 	else if (t_jstop)
230 		t = t_jstop;
231 	else if (t_jdstop)
232 		t = t_jdstop;
233 	else if (t_istop)
234 		t = t_istop;
235 	else if (t_req)
236 		t = t_req;
237 	else if (t_hold)
238 		t = t_hold;
239 	else if (t_susp)
240 		t = t_susp;
241 	else			/* TS_ZOMB */
242 		t = p->p_tlist;
243 
244 	if (t != NULL)
245 		thread_lock(t);
246 	return (t);
247 }
248 
249 /*
250  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
251  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
252  * on the /proc file descriptor.  Called from stop() when a traced
253  * process stops on an event of interest.  Also called from exit()
254  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
255  */
256 void
257 prnotify(struct vnode *vp)
258 {
259 	prcommon_t *pcp = VTOP(vp)->pr_common;
260 
261 	mutex_enter(&pcp->prc_mutex);
262 	cv_broadcast(&pcp->prc_wait);
263 	mutex_exit(&pcp->prc_mutex);
264 	if (pcp->prc_flags & PRC_POLL) {
265 		/*
266 		 * We call pollwakeup() with POLLHUP to ensure that
267 		 * the pollers are awakened even if they are polling
268 		 * for nothing (i.e., waiting for the process to exit).
269 		 * This enables the use of the PRC_POLL flag for optimization
270 		 * (we can turn off PRC_POLL only if we know no pollers remain).
271 		 */
272 		pcp->prc_flags &= ~PRC_POLL;
273 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
274 	}
275 }
276 
277 /* called immediately below, in prfree() */
278 static void
279 prfreenotify(vnode_t *vp)
280 {
281 	prnode_t *pnp;
282 	prcommon_t *pcp;
283 
284 	while (vp != NULL) {
285 		pnp = VTOP(vp);
286 		pcp = pnp->pr_common;
287 		ASSERT(pcp->prc_thread == NULL);
288 		pcp->prc_proc = NULL;
289 		/*
290 		 * We can't call prnotify() here because we are holding
291 		 * pidlock.  We assert that there is no need to.
292 		 */
293 		mutex_enter(&pcp->prc_mutex);
294 		cv_broadcast(&pcp->prc_wait);
295 		mutex_exit(&pcp->prc_mutex);
296 		ASSERT(!(pcp->prc_flags & PRC_POLL));
297 
298 		vp = pnp->pr_next;
299 		pnp->pr_next = NULL;
300 	}
301 }
302 
303 /*
304  * Called from a hook in freeproc() when a traced process is removed
305  * from the process table.  The proc-table pointers of all associated
306  * /proc vnodes are cleared to indicate that the process has gone away.
307  */
308 void
309 prfree(proc_t *p)
310 {
311 	uint_t slot = p->p_slot;
312 
313 	ASSERT(MUTEX_HELD(&pidlock));
314 
315 	/*
316 	 * Block the process against /proc so it can be freed.
317 	 * It cannot be freed while locked by some controlling process.
318 	 * Lock ordering:
319 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
320 	 */
321 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
322 	mutex_enter(&p->p_lock);
323 	while (p->p_proc_flag & P_PR_LOCK) {
324 		mutex_exit(&pr_pidlock);
325 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
326 		mutex_exit(&p->p_lock);
327 		mutex_enter(&pr_pidlock);
328 		mutex_enter(&p->p_lock);
329 	}
330 
331 	ASSERT(p->p_tlist == NULL);
332 
333 	prfreenotify(p->p_plist);
334 	p->p_plist = NULL;
335 
336 	prfreenotify(p->p_trace);
337 	p->p_trace = NULL;
338 
339 	/*
340 	 * We broadcast to wake up everyone waiting for this process.
341 	 * No one can reach this process from this point on.
342 	 */
343 	cv_broadcast(&pr_pid_cv[slot]);
344 
345 	mutex_exit(&p->p_lock);
346 	mutex_exit(&pr_pidlock);
347 }
348 
349 /*
350  * Called from a hook in exit() when a traced process is becoming a zombie.
351  */
352 void
353 prexit(proc_t *p)
354 {
355 	ASSERT(MUTEX_HELD(&p->p_lock));
356 
357 	if (pr_watch_active(p)) {
358 		pr_free_watchpoints(p);
359 		watch_disable(curthread);
360 	}
361 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
362 	if (p->p_trace) {
363 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
364 		prnotify(p->p_trace);
365 	}
366 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
367 }
368 
369 /*
370  * Called when a thread calls lwp_exit().
371  */
372 void
373 prlwpexit(kthread_t *t)
374 {
375 	vnode_t *vp;
376 	prnode_t *pnp;
377 	prcommon_t *pcp;
378 	proc_t *p = ttoproc(t);
379 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
380 
381 	ASSERT(t == curthread);
382 	ASSERT(MUTEX_HELD(&p->p_lock));
383 
384 	/*
385 	 * The process must be blocked against /proc to do this safely.
386 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
387 	 * It is the caller's responsibility to have called prbarrier(p).
388 	 */
389 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
390 
391 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
392 		pnp = VTOP(vp);
393 		pcp = pnp->pr_common;
394 		if (pcp->prc_thread == t) {
395 			pcp->prc_thread = NULL;
396 			pcp->prc_flags |= PRC_DESTROY;
397 		}
398 	}
399 
400 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
401 		pnp = VTOP(vp);
402 		pcp = pnp->pr_common;
403 		pcp->prc_thread = NULL;
404 		pcp->prc_flags |= PRC_DESTROY;
405 		prnotify(vp);
406 	}
407 
408 	if (p->p_trace)
409 		prnotify(p->p_trace);
410 }
411 
412 /*
413  * Called when a zombie thread is joined or when a
414  * detached lwp exits.  Called from lwp_hash_out().
415  */
416 void
417 prlwpfree(proc_t *p, lwpent_t *lep)
418 {
419 	vnode_t *vp;
420 	prnode_t *pnp;
421 	prcommon_t *pcp;
422 
423 	ASSERT(MUTEX_HELD(&p->p_lock));
424 
425 	/*
426 	 * The process must be blocked against /proc to do this safely.
427 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
428 	 * It is the caller's responsibility to have called prbarrier(p).
429 	 */
430 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
431 
432 	vp = lep->le_trace;
433 	lep->le_trace = NULL;
434 	while (vp) {
435 		prnotify(vp);
436 		pnp = VTOP(vp);
437 		pcp = pnp->pr_common;
438 		ASSERT(pcp->prc_thread == NULL &&
439 		    (pcp->prc_flags & PRC_DESTROY));
440 		pcp->prc_tslot = -1;
441 		vp = pnp->pr_next;
442 		pnp->pr_next = NULL;
443 	}
444 
445 	if (p->p_trace)
446 		prnotify(p->p_trace);
447 }
448 
449 /*
450  * Called from a hook in exec() when a thread starts exec().
451  */
452 void
453 prexecstart(void)
454 {
455 	proc_t *p = ttoproc(curthread);
456 	klwp_t *lwp = ttolwp(curthread);
457 
458 	/*
459 	 * The P_PR_EXEC flag blocks /proc operations for
460 	 * the duration of the exec().
461 	 * We can't start exec() while the process is
462 	 * locked by /proc, so we call prbarrier().
463 	 * lwp_nostop keeps the process from being stopped
464 	 * via job control for the duration of the exec().
465 	 */
466 
467 	ASSERT(MUTEX_HELD(&p->p_lock));
468 	prbarrier(p);
469 	lwp->lwp_nostop++;
470 	p->p_proc_flag |= P_PR_EXEC;
471 }
472 
473 /*
474  * Called from a hook in exec() when a thread finishes exec().
475  * The thread may or may not have succeeded.  Some other thread
476  * may have beat it to the punch.
477  */
478 void
479 prexecend(void)
480 {
481 	proc_t *p = ttoproc(curthread);
482 	klwp_t *lwp = ttolwp(curthread);
483 	vnode_t *vp;
484 	prnode_t *pnp;
485 	prcommon_t *pcp;
486 	model_t model = p->p_model;
487 	id_t tid = curthread->t_tid;
488 	int tslot = curthread->t_dslot;
489 
490 	ASSERT(MUTEX_HELD(&p->p_lock));
491 
492 	lwp->lwp_nostop--;
493 	if (p->p_flag & SEXITLWPS) {
494 		/*
495 		 * We are on our way to exiting because some
496 		 * other thread beat us in the race to exec().
497 		 * Don't clear the P_PR_EXEC flag in this case.
498 		 */
499 		return;
500 	}
501 
502 	/*
503 	 * Wake up anyone waiting in /proc for the process to complete exec().
504 	 */
505 	p->p_proc_flag &= ~P_PR_EXEC;
506 	if ((vp = p->p_trace) != NULL) {
507 		pcp = VTOP(vp)->pr_common;
508 		mutex_enter(&pcp->prc_mutex);
509 		cv_broadcast(&pcp->prc_wait);
510 		mutex_exit(&pcp->prc_mutex);
511 		for (; vp != NULL; vp = pnp->pr_next) {
512 			pnp = VTOP(vp);
513 			pnp->pr_common->prc_datamodel = model;
514 		}
515 	}
516 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
517 		/*
518 		 * We dealt with the process common above.
519 		 */
520 		ASSERT(p->p_trace != NULL);
521 		pcp = VTOP(vp)->pr_common;
522 		mutex_enter(&pcp->prc_mutex);
523 		cv_broadcast(&pcp->prc_wait);
524 		mutex_exit(&pcp->prc_mutex);
525 		for (; vp != NULL; vp = pnp->pr_next) {
526 			pnp = VTOP(vp);
527 			pcp = pnp->pr_common;
528 			pcp->prc_datamodel = model;
529 			pcp->prc_tid = tid;
530 			pcp->prc_tslot = tslot;
531 		}
532 	}
533 }
534 
535 /*
536  * Called from a hook in relvm() just before freeing the address space.
537  * We free all the watched areas now.
538  */
539 void
540 prrelvm(void)
541 {
542 	proc_t *p = ttoproc(curthread);
543 
544 	mutex_enter(&p->p_lock);
545 	prbarrier(p);	/* block all other /proc operations */
546 	if (pr_watch_active(p)) {
547 		pr_free_watchpoints(p);
548 		watch_disable(curthread);
549 	}
550 	mutex_exit(&p->p_lock);
551 	pr_free_watched_pages(p);
552 }
553 
554 /*
555  * Called from hooks in exec-related code when a traced process
556  * attempts to exec(2) a setuid/setgid program or an unreadable
557  * file.  Rather than fail the exec we invalidate the associated
558  * /proc vnodes so that subsequent attempts to use them will fail.
559  *
560  * All /proc vnodes, except directory vnodes, are retained on a linked
561  * list (rooted at p_plist in the process structure) until last close.
562  *
563  * A controlling process must re-open the /proc files in order to
564  * regain control.
565  */
566 void
567 prinvalidate(struct user *up)
568 {
569 	kthread_t *t = curthread;
570 	proc_t *p = ttoproc(t);
571 	vnode_t *vp;
572 	prnode_t *pnp;
573 	int writers = 0;
574 
575 	mutex_enter(&p->p_lock);
576 	prbarrier(p);	/* block all other /proc operations */
577 
578 	/*
579 	 * At this moment, there can be only one lwp in the process.
580 	 */
581 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
582 
583 	/*
584 	 * Invalidate any currently active /proc vnodes.
585 	 */
586 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
587 		pnp = VTOP(vp);
588 		switch (pnp->pr_type) {
589 		case PR_PSINFO:		/* these files can read by anyone */
590 		case PR_LPSINFO:
591 		case PR_LWPSINFO:
592 		case PR_LWPDIR:
593 		case PR_LWPIDDIR:
594 		case PR_USAGE:
595 		case PR_LUSAGE:
596 		case PR_LWPUSAGE:
597 			break;
598 		default:
599 			pnp->pr_flags |= PR_INVAL;
600 			break;
601 		}
602 	}
603 	/*
604 	 * Wake up anyone waiting for the process or lwp.
605 	 * p->p_trace is guaranteed to be non-NULL if there
606 	 * are any open /proc files for this process.
607 	 */
608 	if ((vp = p->p_trace) != NULL) {
609 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
610 
611 		prnotify(vp);
612 		/*
613 		 * Are there any writers?
614 		 */
615 		if ((writers = pcp->prc_writers) != 0) {
616 			/*
617 			 * Clear the exclusive open flag (old /proc interface).
618 			 * Set prc_selfopens equal to prc_writers so that
619 			 * the next O_EXCL|O_WRITE open will succeed
620 			 * even with existing (though invalid) writers.
621 			 * prclose() must decrement prc_selfopens when
622 			 * the invalid files are closed.
623 			 */
624 			pcp->prc_flags &= ~PRC_EXCL;
625 			ASSERT(pcp->prc_selfopens <= writers);
626 			pcp->prc_selfopens = writers;
627 		}
628 	}
629 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
630 	while (vp != NULL) {
631 		/*
632 		 * We should not invalidate the lwpiddir vnodes,
633 		 * but the necessities of maintaining the old
634 		 * ioctl()-based version of /proc require it.
635 		 */
636 		pnp = VTOP(vp);
637 		pnp->pr_flags |= PR_INVAL;
638 		prnotify(vp);
639 		vp = pnp->pr_next;
640 	}
641 
642 	/*
643 	 * If any tracing flags are in effect and any vnodes are open for
644 	 * writing then set the requested-stop and run-on-last-close flags.
645 	 * Otherwise, clear all tracing flags.
646 	 */
647 	t->t_proc_flag &= ~TP_PAUSE;
648 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
649 		t->t_proc_flag |= TP_PRSTOP;
650 		aston(t);		/* so ISSIG will see the flag */
651 		p->p_proc_flag |= P_PR_RUNLCL;
652 	} else {
653 		premptyset(&up->u_entrymask);		/* syscalls */
654 		premptyset(&up->u_exitmask);
655 		up->u_systrap = 0;
656 		premptyset(&p->p_sigmask);		/* signals */
657 		premptyset(&p->p_fltmask);		/* faults */
658 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
659 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
660 		prnostep(ttolwp(t));
661 	}
662 
663 	mutex_exit(&p->p_lock);
664 }
665 
666 /*
667  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
668  * Return with pr_pidlock held in all cases.
669  * Return with p_lock held if the the process still exists.
670  * Return value is the process pointer if the process still exists, else NULL.
671  * If we lock the process, give ourself kernel priority to avoid deadlocks;
672  * this is undone in prunlock().
673  */
674 proc_t *
675 pr_p_lock(prnode_t *pnp)
676 {
677 	proc_t *p;
678 	prcommon_t *pcp;
679 
680 	mutex_enter(&pr_pidlock);
681 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
682 		return (NULL);
683 	mutex_enter(&p->p_lock);
684 	while (p->p_proc_flag & P_PR_LOCK) {
685 		/*
686 		 * This cv/mutex pair is persistent even if
687 		 * the process disappears while we sleep.
688 		 */
689 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
690 		kmutex_t *mp = &p->p_lock;
691 
692 		mutex_exit(&pr_pidlock);
693 		cv_wait(cv, mp);
694 		mutex_exit(mp);
695 		mutex_enter(&pr_pidlock);
696 		if (pcp->prc_proc == NULL)
697 			return (NULL);
698 		ASSERT(p == pcp->prc_proc);
699 		mutex_enter(&p->p_lock);
700 	}
701 	p->p_proc_flag |= P_PR_LOCK;
702 	THREAD_KPRI_REQUEST();
703 	return (p);
704 }
705 
706 /*
707  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
708  * This prevents any lwp of the process from disappearing and
709  * blocks most operations that a process can perform on itself.
710  * Returns 0 on success, a non-zero error number on failure.
711  *
712  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
713  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
714  *
715  * error returns:
716  *	ENOENT: process or lwp has disappeared or process is exiting
717  *		(or has become a zombie and zdisp == ZNO).
718  *	EAGAIN: procfs vnode has become invalid.
719  *	EINTR:  signal arrived while waiting for exec to complete.
720  */
721 int
722 prlock(prnode_t *pnp, int zdisp)
723 {
724 	prcommon_t *pcp;
725 	proc_t *p;
726 
727 again:
728 	pcp = pnp->pr_common;
729 	p = pr_p_lock(pnp);
730 	mutex_exit(&pr_pidlock);
731 
732 	/*
733 	 * Return ENOENT immediately if there is no process.
734 	 */
735 	if (p == NULL)
736 		return (ENOENT);
737 
738 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
739 
740 	/*
741 	 * Return ENOENT if process entered zombie state or is exiting
742 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
743 	 */
744 	if (zdisp == ZNO &&
745 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
746 		prunlock(pnp);
747 		return (ENOENT);
748 	}
749 
750 	/*
751 	 * If lwp-specific, check to see if lwp has disappeared.
752 	 */
753 	if (pcp->prc_flags & PRC_LWP) {
754 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
755 		    pcp->prc_tslot == -1) {
756 			prunlock(pnp);
757 			return (ENOENT);
758 		}
759 	}
760 
761 	/*
762 	 * Return EAGAIN if we have encountered a security violation.
763 	 * (The process exec'd a set-id or unreadable executable file.)
764 	 */
765 	if (pnp->pr_flags & PR_INVAL) {
766 		prunlock(pnp);
767 		return (EAGAIN);
768 	}
769 
770 	/*
771 	 * If process is undergoing an exec(), wait for
772 	 * completion and then start all over again.
773 	 */
774 	if (p->p_proc_flag & P_PR_EXEC) {
775 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
776 		mutex_enter(&pcp->prc_mutex);
777 		prunlock(pnp);
778 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
779 			mutex_exit(&pcp->prc_mutex);
780 			return (EINTR);
781 		}
782 		mutex_exit(&pcp->prc_mutex);
783 		goto again;
784 	}
785 
786 	/*
787 	 * We return holding p->p_lock.
788 	 */
789 	return (0);
790 }
791 
792 /*
793  * Undo prlock() and pr_p_lock().
794  * p->p_lock is still held; pr_pidlock is no longer held.
795  *
796  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
797  * if any, waiting for the flag to be dropped; it retains p->p_lock.
798  *
799  * prunlock() calls prunmark() and then drops p->p_lock.
800  */
801 void
802 prunmark(proc_t *p)
803 {
804 	ASSERT(p->p_proc_flag & P_PR_LOCK);
805 	ASSERT(MUTEX_HELD(&p->p_lock));
806 
807 	cv_signal(&pr_pid_cv[p->p_slot]);
808 	p->p_proc_flag &= ~P_PR_LOCK;
809 	THREAD_KPRI_RELEASE();
810 }
811 
812 void
813 prunlock(prnode_t *pnp)
814 {
815 	prcommon_t *pcp = pnp->pr_common;
816 	proc_t *p = pcp->prc_proc;
817 
818 	/*
819 	 * If we (or someone) gave it a SIGKILL, and it is not
820 	 * already a zombie, set it running unconditionally.
821 	 */
822 	if ((p->p_flag & SKILLED) &&
823 	    !(p->p_flag & SEXITING) &&
824 	    !(pcp->prc_flags & PRC_DESTROY) &&
825 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
826 		(void) pr_setrun(pnp, 0);
827 	prunmark(p);
828 	mutex_exit(&p->p_lock);
829 }
830 
831 /*
832  * Called while holding p->p_lock to delay until the process is unlocked.
833  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
834  * The process cannot become locked again until p->p_lock is dropped.
835  */
836 void
837 prbarrier(proc_t *p)
838 {
839 	ASSERT(MUTEX_HELD(&p->p_lock));
840 
841 	if (p->p_proc_flag & P_PR_LOCK) {
842 		/* The process is locked; delay until not locked */
843 		uint_t slot = p->p_slot;
844 
845 		while (p->p_proc_flag & P_PR_LOCK)
846 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
847 		cv_signal(&pr_pid_cv[slot]);
848 	}
849 }
850 
851 /*
852  * Return process/lwp status.
853  * The u-block is mapped in by this routine and unmapped at the end.
854  */
855 void
856 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
857 {
858 	kthread_t *t;
859 
860 	ASSERT(MUTEX_HELD(&p->p_lock));
861 
862 	t = prchoose(p);	/* returns locked thread */
863 	ASSERT(t != NULL);
864 	thread_unlock(t);
865 
866 	/* just bzero the process part, prgetlwpstatus() does the rest */
867 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
868 	sp->pr_nlwp = p->p_lwpcnt;
869 	sp->pr_nzomb = p->p_zombcnt;
870 	prassignset(&sp->pr_sigpend, &p->p_sig);
871 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
872 	sp->pr_brksize = p->p_brksize;
873 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
874 	sp->pr_stksize = p->p_stksize;
875 	sp->pr_pid = p->p_pid;
876 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
877 	    (p->p_flag & SZONETOP)) {
878 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
879 		/*
880 		 * Inside local zones, fake zsched's pid as parent pids for
881 		 * processes which reference processes outside of the zone.
882 		 */
883 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
884 	} else {
885 		sp->pr_ppid = p->p_ppid;
886 	}
887 	sp->pr_pgid  = p->p_pgrp;
888 	sp->pr_sid   = p->p_sessp->s_sid;
889 	sp->pr_taskid = p->p_task->tk_tkid;
890 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
891 	sp->pr_zoneid = p->p_zone->zone_id;
892 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
893 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
894 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
895 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
896 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
897 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
898 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
899 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
900 	switch (p->p_model) {
901 	case DATAMODEL_ILP32:
902 		sp->pr_dmodel = PR_MODEL_ILP32;
903 		break;
904 	case DATAMODEL_LP64:
905 		sp->pr_dmodel = PR_MODEL_LP64;
906 		break;
907 	}
908 	if (p->p_agenttp)
909 		sp->pr_agentid = p->p_agenttp->t_tid;
910 
911 	/* get the chosen lwp's status */
912 	prgetlwpstatus(t, &sp->pr_lwp, zp);
913 
914 	/* replicate the flags */
915 	sp->pr_flags = sp->pr_lwp.pr_flags;
916 }
917 
918 #ifdef _SYSCALL32_IMPL
919 void
920 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
921 {
922 	proc_t *p = ttoproc(t);
923 	klwp_t *lwp = ttolwp(t);
924 	struct mstate *ms = &lwp->lwp_mstate;
925 	hrtime_t usr, sys;
926 	int flags;
927 	ulong_t instr;
928 
929 	ASSERT(MUTEX_HELD(&p->p_lock));
930 
931 	bzero(sp, sizeof (*sp));
932 	flags = 0L;
933 	if (t->t_state == TS_STOPPED) {
934 		flags |= PR_STOPPED;
935 		if ((t->t_schedflag & TS_PSTART) == 0)
936 			flags |= PR_ISTOP;
937 	} else if (VSTOPPED(t)) {
938 		flags |= PR_STOPPED|PR_ISTOP;
939 	}
940 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
941 		flags |= PR_DSTOP;
942 	if (lwp->lwp_asleep)
943 		flags |= PR_ASLEEP;
944 	if (t == p->p_agenttp)
945 		flags |= PR_AGENT;
946 	if (!(t->t_proc_flag & TP_TWAIT))
947 		flags |= PR_DETACH;
948 	if (t->t_proc_flag & TP_DAEMON)
949 		flags |= PR_DAEMON;
950 	if (p->p_proc_flag & P_PR_FORK)
951 		flags |= PR_FORK;
952 	if (p->p_proc_flag & P_PR_RUNLCL)
953 		flags |= PR_RLC;
954 	if (p->p_proc_flag & P_PR_KILLCL)
955 		flags |= PR_KLC;
956 	if (p->p_proc_flag & P_PR_ASYNC)
957 		flags |= PR_ASYNC;
958 	if (p->p_proc_flag & P_PR_BPTADJ)
959 		flags |= PR_BPTADJ;
960 	if (p->p_proc_flag & P_PR_PTRACE)
961 		flags |= PR_PTRACE;
962 	if (p->p_flag & SMSACCT)
963 		flags |= PR_MSACCT;
964 	if (p->p_flag & SMSFORK)
965 		flags |= PR_MSFORK;
966 	if (p->p_flag & SVFWAIT)
967 		flags |= PR_VFORKP;
968 	sp->pr_flags = flags;
969 	if (VSTOPPED(t)) {
970 		sp->pr_why   = PR_REQUESTED;
971 		sp->pr_what  = 0;
972 	} else {
973 		sp->pr_why   = t->t_whystop;
974 		sp->pr_what  = t->t_whatstop;
975 	}
976 	sp->pr_lwpid = t->t_tid;
977 	sp->pr_cursig  = lwp->lwp_cursig;
978 	prassignset(&sp->pr_lwppend, &t->t_sig);
979 	schedctl_finish_sigblock(t);
980 	prassignset(&sp->pr_lwphold, &t->t_hold);
981 	if (t->t_whystop == PR_FAULTED) {
982 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
983 		if (t->t_whatstop == FLTPAGE)
984 			sp->pr_info.si_addr =
985 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
986 	} else if (lwp->lwp_curinfo)
987 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
988 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
989 	    sp->pr_info.si_zoneid != zp->zone_id) {
990 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
991 		sp->pr_info.si_uid = 0;
992 		sp->pr_info.si_ctid = -1;
993 		sp->pr_info.si_zoneid = zp->zone_id;
994 	}
995 	sp->pr_altstack.ss_sp =
996 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
997 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
998 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
999 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1000 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1001 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1002 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1003 	    sizeof (sp->pr_clname) - 1);
1004 	if (flags & PR_STOPPED)
1005 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1006 	usr = ms->ms_acct[LMS_USER];
1007 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1008 	scalehrtime(&usr);
1009 	scalehrtime(&sys);
1010 	hrt2ts32(usr, &sp->pr_utime);
1011 	hrt2ts32(sys, &sp->pr_stime);
1012 
1013 	/*
1014 	 * Fetch the current instruction, if not a system process.
1015 	 * We don't attempt this unless the lwp is stopped.
1016 	 */
1017 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1018 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1019 	else if (!(flags & PR_STOPPED))
1020 		sp->pr_flags |= PR_PCINVAL;
1021 	else if (!prfetchinstr(lwp, &instr))
1022 		sp->pr_flags |= PR_PCINVAL;
1023 	else
1024 		sp->pr_instr = (uint32_t)instr;
1025 
1026 	/*
1027 	 * Drop p_lock while touching the lwp's stack.
1028 	 */
1029 	mutex_exit(&p->p_lock);
1030 	if (prisstep(lwp))
1031 		sp->pr_flags |= PR_STEP;
1032 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1033 		int i;
1034 
1035 		sp->pr_syscall = get_syscall32_args(lwp,
1036 		    (int *)sp->pr_sysarg, &i);
1037 		sp->pr_nsysarg = (ushort_t)i;
1038 	}
1039 	if ((flags & PR_STOPPED) || t == curthread)
1040 		prgetprregs32(lwp, sp->pr_reg);
1041 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1042 	    (flags & PR_VFORKP)) {
1043 		long r1, r2;
1044 		user_t *up;
1045 		auxv_t *auxp;
1046 		int i;
1047 
1048 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1049 		if (sp->pr_errno == 0) {
1050 			sp->pr_rval1 = (int32_t)r1;
1051 			sp->pr_rval2 = (int32_t)r2;
1052 			sp->pr_errpriv = PRIV_NONE;
1053 		} else
1054 			sp->pr_errpriv = lwp->lwp_badpriv;
1055 
1056 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1057 			up = PTOU(p);
1058 			sp->pr_sysarg[0] = 0;
1059 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1060 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1061 			for (i = 0, auxp = up->u_auxv;
1062 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1063 			    i++, auxp++) {
1064 				if (auxp->a_type == AT_SUN_EXECNAME) {
1065 					sp->pr_sysarg[0] =
1066 					    (caddr32_t)
1067 					    (uintptr_t)auxp->a_un.a_ptr;
1068 					break;
1069 				}
1070 			}
1071 		}
1072 	}
1073 	if (prhasfp())
1074 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1075 	mutex_enter(&p->p_lock);
1076 }
1077 
1078 void
1079 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1080 {
1081 	kthread_t *t;
1082 
1083 	ASSERT(MUTEX_HELD(&p->p_lock));
1084 
1085 	t = prchoose(p);	/* returns locked thread */
1086 	ASSERT(t != NULL);
1087 	thread_unlock(t);
1088 
1089 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1090 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1091 	sp->pr_nlwp = p->p_lwpcnt;
1092 	sp->pr_nzomb = p->p_zombcnt;
1093 	prassignset(&sp->pr_sigpend, &p->p_sig);
1094 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1095 	sp->pr_brksize = (uint32_t)p->p_brksize;
1096 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1097 	sp->pr_stksize = (uint32_t)p->p_stksize;
1098 	sp->pr_pid   = p->p_pid;
1099 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1100 	    (p->p_flag & SZONETOP)) {
1101 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1102 		/*
1103 		 * Inside local zones, fake zsched's pid as parent pids for
1104 		 * processes which reference processes outside of the zone.
1105 		 */
1106 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1107 	} else {
1108 		sp->pr_ppid = p->p_ppid;
1109 	}
1110 	sp->pr_pgid  = p->p_pgrp;
1111 	sp->pr_sid   = p->p_sessp->s_sid;
1112 	sp->pr_taskid = p->p_task->tk_tkid;
1113 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1114 	sp->pr_zoneid = p->p_zone->zone_id;
1115 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1116 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1117 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1118 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1119 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1120 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1121 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1122 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1123 	switch (p->p_model) {
1124 	case DATAMODEL_ILP32:
1125 		sp->pr_dmodel = PR_MODEL_ILP32;
1126 		break;
1127 	case DATAMODEL_LP64:
1128 		sp->pr_dmodel = PR_MODEL_LP64;
1129 		break;
1130 	}
1131 	if (p->p_agenttp)
1132 		sp->pr_agentid = p->p_agenttp->t_tid;
1133 
1134 	/* get the chosen lwp's status */
1135 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1136 
1137 	/* replicate the flags */
1138 	sp->pr_flags = sp->pr_lwp.pr_flags;
1139 }
1140 #endif	/* _SYSCALL32_IMPL */
1141 
1142 /*
1143  * Return lwp status.
1144  */
1145 void
1146 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1147 {
1148 	proc_t *p = ttoproc(t);
1149 	klwp_t *lwp = ttolwp(t);
1150 	struct mstate *ms = &lwp->lwp_mstate;
1151 	hrtime_t usr, sys;
1152 	int flags;
1153 	ulong_t instr;
1154 
1155 	ASSERT(MUTEX_HELD(&p->p_lock));
1156 
1157 	bzero(sp, sizeof (*sp));
1158 	flags = 0L;
1159 	if (t->t_state == TS_STOPPED) {
1160 		flags |= PR_STOPPED;
1161 		if ((t->t_schedflag & TS_PSTART) == 0)
1162 			flags |= PR_ISTOP;
1163 	} else if (VSTOPPED(t)) {
1164 		flags |= PR_STOPPED|PR_ISTOP;
1165 	}
1166 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1167 		flags |= PR_DSTOP;
1168 	if (lwp->lwp_asleep)
1169 		flags |= PR_ASLEEP;
1170 	if (t == p->p_agenttp)
1171 		flags |= PR_AGENT;
1172 	if (!(t->t_proc_flag & TP_TWAIT))
1173 		flags |= PR_DETACH;
1174 	if (t->t_proc_flag & TP_DAEMON)
1175 		flags |= PR_DAEMON;
1176 	if (p->p_proc_flag & P_PR_FORK)
1177 		flags |= PR_FORK;
1178 	if (p->p_proc_flag & P_PR_RUNLCL)
1179 		flags |= PR_RLC;
1180 	if (p->p_proc_flag & P_PR_KILLCL)
1181 		flags |= PR_KLC;
1182 	if (p->p_proc_flag & P_PR_ASYNC)
1183 		flags |= PR_ASYNC;
1184 	if (p->p_proc_flag & P_PR_BPTADJ)
1185 		flags |= PR_BPTADJ;
1186 	if (p->p_proc_flag & P_PR_PTRACE)
1187 		flags |= PR_PTRACE;
1188 	if (p->p_flag & SMSACCT)
1189 		flags |= PR_MSACCT;
1190 	if (p->p_flag & SMSFORK)
1191 		flags |= PR_MSFORK;
1192 	if (p->p_flag & SVFWAIT)
1193 		flags |= PR_VFORKP;
1194 	if (p->p_pgidp->pid_pgorphaned)
1195 		flags |= PR_ORPHAN;
1196 	if (p->p_pidflag & CLDNOSIGCHLD)
1197 		flags |= PR_NOSIGCHLD;
1198 	if (p->p_pidflag & CLDWAITPID)
1199 		flags |= PR_WAITPID;
1200 	sp->pr_flags = flags;
1201 	if (VSTOPPED(t)) {
1202 		sp->pr_why   = PR_REQUESTED;
1203 		sp->pr_what  = 0;
1204 	} else {
1205 		sp->pr_why   = t->t_whystop;
1206 		sp->pr_what  = t->t_whatstop;
1207 	}
1208 	sp->pr_lwpid = t->t_tid;
1209 	sp->pr_cursig  = lwp->lwp_cursig;
1210 	prassignset(&sp->pr_lwppend, &t->t_sig);
1211 	schedctl_finish_sigblock(t);
1212 	prassignset(&sp->pr_lwphold, &t->t_hold);
1213 	if (t->t_whystop == PR_FAULTED)
1214 		bcopy(&lwp->lwp_siginfo,
1215 		    &sp->pr_info, sizeof (k_siginfo_t));
1216 	else if (lwp->lwp_curinfo)
1217 		bcopy(&lwp->lwp_curinfo->sq_info,
1218 		    &sp->pr_info, sizeof (k_siginfo_t));
1219 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1220 	    sp->pr_info.si_zoneid != zp->zone_id) {
1221 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1222 		sp->pr_info.si_uid = 0;
1223 		sp->pr_info.si_ctid = -1;
1224 		sp->pr_info.si_zoneid = zp->zone_id;
1225 	}
1226 	sp->pr_altstack = lwp->lwp_sigaltstack;
1227 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1228 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1229 	sp->pr_ustack = lwp->lwp_ustack;
1230 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1231 	    sizeof (sp->pr_clname) - 1);
1232 	if (flags & PR_STOPPED)
1233 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1234 	usr = ms->ms_acct[LMS_USER];
1235 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1236 	scalehrtime(&usr);
1237 	scalehrtime(&sys);
1238 	hrt2ts(usr, &sp->pr_utime);
1239 	hrt2ts(sys, &sp->pr_stime);
1240 
1241 	/*
1242 	 * Fetch the current instruction, if not a system process.
1243 	 * We don't attempt this unless the lwp is stopped.
1244 	 */
1245 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1246 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1247 	else if (!(flags & PR_STOPPED))
1248 		sp->pr_flags |= PR_PCINVAL;
1249 	else if (!prfetchinstr(lwp, &instr))
1250 		sp->pr_flags |= PR_PCINVAL;
1251 	else
1252 		sp->pr_instr = instr;
1253 
1254 	/*
1255 	 * Drop p_lock while touching the lwp's stack.
1256 	 */
1257 	mutex_exit(&p->p_lock);
1258 	if (prisstep(lwp))
1259 		sp->pr_flags |= PR_STEP;
1260 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1261 		int i;
1262 
1263 		sp->pr_syscall = get_syscall_args(lwp,
1264 		    (long *)sp->pr_sysarg, &i);
1265 		sp->pr_nsysarg = (ushort_t)i;
1266 	}
1267 	if ((flags & PR_STOPPED) || t == curthread)
1268 		prgetprregs(lwp, sp->pr_reg);
1269 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1270 	    (flags & PR_VFORKP)) {
1271 		user_t *up;
1272 		auxv_t *auxp;
1273 		int i;
1274 
1275 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1276 		if (sp->pr_errno == 0)
1277 			sp->pr_errpriv = PRIV_NONE;
1278 		else
1279 			sp->pr_errpriv = lwp->lwp_badpriv;
1280 
1281 		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1282 			up = PTOU(p);
1283 			sp->pr_sysarg[0] = 0;
1284 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1285 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1286 			for (i = 0, auxp = up->u_auxv;
1287 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1288 			    i++, auxp++) {
1289 				if (auxp->a_type == AT_SUN_EXECNAME) {
1290 					sp->pr_sysarg[0] =
1291 					    (uintptr_t)auxp->a_un.a_ptr;
1292 					break;
1293 				}
1294 			}
1295 		}
1296 	}
1297 	if (prhasfp())
1298 		prgetprfpregs(lwp, &sp->pr_fpreg);
1299 	mutex_enter(&p->p_lock);
1300 }
1301 
1302 /*
1303  * Get the sigaction structure for the specified signal.  The u-block
1304  * must already have been mapped in by the caller.
1305  */
1306 void
1307 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1308 {
1309 	bzero(sp, sizeof (*sp));
1310 
1311 	if (sig != 0 && (unsigned)sig < NSIG) {
1312 		sp->sa_handler = up->u_signal[sig-1];
1313 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1314 		if (sigismember(&up->u_sigonstack, sig))
1315 			sp->sa_flags |= SA_ONSTACK;
1316 		if (sigismember(&up->u_sigresethand, sig))
1317 			sp->sa_flags |= SA_RESETHAND;
1318 		if (sigismember(&up->u_sigrestart, sig))
1319 			sp->sa_flags |= SA_RESTART;
1320 		if (sigismember(&p->p_siginfo, sig))
1321 			sp->sa_flags |= SA_SIGINFO;
1322 		if (sigismember(&up->u_signodefer, sig))
1323 			sp->sa_flags |= SA_NODEFER;
1324 		if (sig == SIGCLD) {
1325 			if (p->p_flag & SNOWAIT)
1326 				sp->sa_flags |= SA_NOCLDWAIT;
1327 			if ((p->p_flag & SJCTL) == 0)
1328 				sp->sa_flags |= SA_NOCLDSTOP;
1329 		}
1330 	}
1331 }
1332 
1333 #ifdef _SYSCALL32_IMPL
1334 void
1335 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1336 {
1337 	bzero(sp, sizeof (*sp));
1338 
1339 	if (sig != 0 && (unsigned)sig < NSIG) {
1340 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1341 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1342 		if (sigismember(&up->u_sigonstack, sig))
1343 			sp->sa_flags |= SA_ONSTACK;
1344 		if (sigismember(&up->u_sigresethand, sig))
1345 			sp->sa_flags |= SA_RESETHAND;
1346 		if (sigismember(&up->u_sigrestart, sig))
1347 			sp->sa_flags |= SA_RESTART;
1348 		if (sigismember(&p->p_siginfo, sig))
1349 			sp->sa_flags |= SA_SIGINFO;
1350 		if (sigismember(&up->u_signodefer, sig))
1351 			sp->sa_flags |= SA_NODEFER;
1352 		if (sig == SIGCLD) {
1353 			if (p->p_flag & SNOWAIT)
1354 				sp->sa_flags |= SA_NOCLDWAIT;
1355 			if ((p->p_flag & SJCTL) == 0)
1356 				sp->sa_flags |= SA_NOCLDSTOP;
1357 		}
1358 	}
1359 }
1360 #endif	/* _SYSCALL32_IMPL */
1361 
1362 /*
1363  * Count the number of segments in this process's address space.
1364  */
1365 int
1366 prnsegs(struct as *as, int reserved)
1367 {
1368 	int n = 0;
1369 	struct seg *seg;
1370 
1371 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1372 
1373 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1374 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1375 		caddr_t saddr, naddr;
1376 		void *tmp = NULL;
1377 
1378 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1379 			(void) pr_getprot(seg, reserved, &tmp,
1380 			    &saddr, &naddr, eaddr);
1381 			if (saddr != naddr)
1382 				n++;
1383 		}
1384 
1385 		ASSERT(tmp == NULL);
1386 	}
1387 
1388 	return (n);
1389 }
1390 
1391 /*
1392  * Convert uint32_t to decimal string w/o leading zeros.
1393  * Add trailing null characters if 'len' is greater than string length.
1394  * Return the string length.
1395  */
1396 int
1397 pr_u32tos(uint32_t n, char *s, int len)
1398 {
1399 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1400 	char *cp = cbuf;
1401 	char *end = s + len;
1402 
1403 	do {
1404 		*cp++ = (char)(n % 10 + '0');
1405 		n /= 10;
1406 	} while (n);
1407 
1408 	len = (int)(cp - cbuf);
1409 
1410 	do {
1411 		*s++ = *--cp;
1412 	} while (cp > cbuf);
1413 
1414 	while (s < end)		/* optional pad */
1415 		*s++ = '\0';
1416 
1417 	return (len);
1418 }
1419 
1420 /*
1421  * Convert uint64_t to decimal string w/o leading zeros.
1422  * Return the string length.
1423  */
1424 static int
1425 pr_u64tos(uint64_t n, char *s)
1426 {
1427 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1428 	char *cp = cbuf;
1429 	int len;
1430 
1431 	do {
1432 		*cp++ = (char)(n % 10 + '0');
1433 		n /= 10;
1434 	} while (n);
1435 
1436 	len = (int)(cp - cbuf);
1437 
1438 	do {
1439 		*s++ = *--cp;
1440 	} while (cp > cbuf);
1441 
1442 	return (len);
1443 }
1444 
1445 void
1446 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1447 {
1448 	char *s = name;
1449 	struct vfs *vfsp;
1450 	struct vfssw *vfsswp;
1451 
1452 	if ((vfsp = vp->v_vfsp) != NULL &&
1453 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1454 	    *vfsswp->vsw_name) {
1455 		(void) strcpy(s, vfsswp->vsw_name);
1456 		s += strlen(s);
1457 		*s++ = '.';
1458 	}
1459 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1460 	*s++ = '.';
1461 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1462 	*s++ = '.';
1463 	s += pr_u64tos(vattr->va_nodeid, s);
1464 	*s++ = '\0';
1465 }
1466 
1467 struct seg *
1468 break_seg(proc_t *p)
1469 {
1470 	caddr_t addr = p->p_brkbase;
1471 	struct seg *seg;
1472 	struct vnode *vp;
1473 
1474 	if (p->p_brksize != 0)
1475 		addr += p->p_brksize - 1;
1476 	seg = as_segat(p->p_as, addr);
1477 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1478 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1479 		return (seg);
1480 	return (NULL);
1481 }
1482 
1483 /*
1484  * Implementation of service functions to handle procfs generic chained
1485  * copyout buffers.
1486  */
1487 typedef struct pr_iobuf_list {
1488 	list_node_t	piol_link;	/* buffer linkage */
1489 	size_t		piol_size;	/* total size (header + data) */
1490 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1491 } piol_t;
1492 
1493 #define	MAPSIZE	(64 * 1024)
1494 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1495 
1496 void
1497 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1498 {
1499 	piol_t	*iol;
1500 	size_t	initial_size = MIN(1, n) * itemsize;
1501 
1502 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1503 
1504 	ASSERT(list_head(iolhead) == NULL);
1505 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1506 	ASSERT(initial_size > 0);
1507 
1508 	/*
1509 	 * Someone creating chained copyout buffers may ask for less than
1510 	 * MAPSIZE if the amount of data to be buffered is known to be
1511 	 * smaller than that.
1512 	 * But in order to prevent involuntary self-denial of service,
1513 	 * the requested input size is clamped at MAPSIZE.
1514 	 */
1515 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1516 	iol = kmem_alloc(initial_size, KM_SLEEP);
1517 	list_insert_head(iolhead, iol);
1518 	iol->piol_usedsize = 0;
1519 	iol->piol_size = initial_size;
1520 }
1521 
1522 void *
1523 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1524 {
1525 	piol_t	*iol;
1526 	char	*new;
1527 
1528 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1529 	ASSERT(list_head(iolhead) != NULL);
1530 
1531 	iol = (piol_t *)list_tail(iolhead);
1532 
1533 	if (iol->piol_size <
1534 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1535 		/*
1536 		 * Out of space in the current buffer. Allocate more.
1537 		 */
1538 		piol_t *newiol;
1539 
1540 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1541 		newiol->piol_size = MAPSIZE;
1542 		newiol->piol_usedsize = 0;
1543 
1544 		list_insert_after(iolhead, iol, newiol);
1545 		iol = list_next(iolhead, iol);
1546 		ASSERT(iol == newiol);
1547 	}
1548 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1549 	iol->piol_usedsize += itemsize;
1550 	bzero(new, itemsize);
1551 	return (new);
1552 }
1553 
1554 int
1555 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1556 {
1557 	int error = errin;
1558 	piol_t	*iol;
1559 
1560 	while ((iol = list_head(iolhead)) != NULL) {
1561 		list_remove(iolhead, iol);
1562 		if (!error) {
1563 			if (copyout(PIOL_DATABUF(iol), *tgt,
1564 			    iol->piol_usedsize))
1565 				error = EFAULT;
1566 			*tgt += iol->piol_usedsize;
1567 		}
1568 		kmem_free(iol, iol->piol_size);
1569 	}
1570 	list_destroy(iolhead);
1571 
1572 	return (error);
1573 }
1574 
1575 int
1576 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1577 {
1578 	offset_t	off = uiop->uio_offset;
1579 	char		*base;
1580 	size_t		size;
1581 	piol_t		*iol;
1582 	int		error = errin;
1583 
1584 	while ((iol = list_head(iolhead)) != NULL) {
1585 		list_remove(iolhead, iol);
1586 		base = PIOL_DATABUF(iol);
1587 		size = iol->piol_usedsize;
1588 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1589 			error = uiomove(base + off, size - off,
1590 			    UIO_READ, uiop);
1591 		off = MAX(0, off - (offset_t)size);
1592 		kmem_free(iol, iol->piol_size);
1593 	}
1594 	list_destroy(iolhead);
1595 
1596 	return (error);
1597 }
1598 
1599 /*
1600  * Return an array of structures with memory map information.
1601  * We allocate here; the caller must deallocate.
1602  */
1603 int
1604 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1605 {
1606 	struct as *as = p->p_as;
1607 	prmap_t *mp;
1608 	struct seg *seg;
1609 	struct seg *brkseg, *stkseg;
1610 	struct vnode *vp;
1611 	struct vattr vattr;
1612 	uint_t prot;
1613 
1614 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1615 
1616 	/*
1617 	 * Request an initial buffer size that doesn't waste memory
1618 	 * if the address space has only a small number of segments.
1619 	 */
1620 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1621 
1622 	if ((seg = AS_SEGFIRST(as)) == NULL)
1623 		return (0);
1624 
1625 	brkseg = break_seg(p);
1626 	stkseg = as_segat(as, prgetstackbase(p));
1627 
1628 	do {
1629 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1630 		caddr_t saddr, naddr;
1631 		void *tmp = NULL;
1632 
1633 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1634 			prot = pr_getprot(seg, reserved, &tmp,
1635 			    &saddr, &naddr, eaddr);
1636 			if (saddr == naddr)
1637 				continue;
1638 
1639 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1640 
1641 			mp->pr_vaddr = (uintptr_t)saddr;
1642 			mp->pr_size = naddr - saddr;
1643 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1644 			mp->pr_mflags = 0;
1645 			if (prot & PROT_READ)
1646 				mp->pr_mflags |= MA_READ;
1647 			if (prot & PROT_WRITE)
1648 				mp->pr_mflags |= MA_WRITE;
1649 			if (prot & PROT_EXEC)
1650 				mp->pr_mflags |= MA_EXEC;
1651 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1652 				mp->pr_mflags |= MA_SHARED;
1653 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1654 				mp->pr_mflags |= MA_NORESERVE;
1655 			if (seg->s_ops == &segspt_shmops ||
1656 			    (seg->s_ops == &segvn_ops &&
1657 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1658 				mp->pr_mflags |= MA_ANON;
1659 			if (seg == brkseg)
1660 				mp->pr_mflags |= MA_BREAK;
1661 			else if (seg == stkseg) {
1662 				mp->pr_mflags |= MA_STACK;
1663 				if (reserved) {
1664 					size_t maxstack =
1665 					    ((size_t)p->p_stk_ctl +
1666 					    PAGEOFFSET) & PAGEMASK;
1667 					mp->pr_vaddr =
1668 					    (uintptr_t)prgetstackbase(p) +
1669 					    p->p_stksize - maxstack;
1670 					mp->pr_size = (uintptr_t)naddr -
1671 					    mp->pr_vaddr;
1672 				}
1673 			}
1674 			if (seg->s_ops == &segspt_shmops)
1675 				mp->pr_mflags |= MA_ISM | MA_SHM;
1676 			mp->pr_pagesize = PAGESIZE;
1677 
1678 			/*
1679 			 * Manufacture a filename for the "object" directory.
1680 			 */
1681 			vattr.va_mask = AT_FSID|AT_NODEID;
1682 			if (seg->s_ops == &segvn_ops &&
1683 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1684 			    vp != NULL && vp->v_type == VREG &&
1685 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1686 				if (vp == p->p_exec)
1687 					(void) strcpy(mp->pr_mapname, "a.out");
1688 				else
1689 					pr_object_name(mp->pr_mapname,
1690 					    vp, &vattr);
1691 			}
1692 
1693 			/*
1694 			 * Get the SysV shared memory id, if any.
1695 			 */
1696 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1697 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1698 			    SHMID_NONE) {
1699 				if (mp->pr_shmid == SHMID_FREE)
1700 					mp->pr_shmid = -1;
1701 
1702 				mp->pr_mflags |= MA_SHM;
1703 			} else {
1704 				mp->pr_shmid = -1;
1705 			}
1706 		}
1707 		ASSERT(tmp == NULL);
1708 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1709 
1710 	return (0);
1711 }
1712 
1713 #ifdef _SYSCALL32_IMPL
1714 int
1715 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1716 {
1717 	struct as *as = p->p_as;
1718 	prmap32_t *mp;
1719 	struct seg *seg;
1720 	struct seg *brkseg, *stkseg;
1721 	struct vnode *vp;
1722 	struct vattr vattr;
1723 	uint_t prot;
1724 
1725 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1726 
1727 	/*
1728 	 * Request an initial buffer size that doesn't waste memory
1729 	 * if the address space has only a small number of segments.
1730 	 */
1731 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1732 
1733 	if ((seg = AS_SEGFIRST(as)) == NULL)
1734 		return (0);
1735 
1736 	brkseg = break_seg(p);
1737 	stkseg = as_segat(as, prgetstackbase(p));
1738 
1739 	do {
1740 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1741 		caddr_t saddr, naddr;
1742 		void *tmp = NULL;
1743 
1744 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1745 			prot = pr_getprot(seg, reserved, &tmp,
1746 			    &saddr, &naddr, eaddr);
1747 			if (saddr == naddr)
1748 				continue;
1749 
1750 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1751 
1752 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1753 			mp->pr_size = (size32_t)(naddr - saddr);
1754 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1755 			mp->pr_mflags = 0;
1756 			if (prot & PROT_READ)
1757 				mp->pr_mflags |= MA_READ;
1758 			if (prot & PROT_WRITE)
1759 				mp->pr_mflags |= MA_WRITE;
1760 			if (prot & PROT_EXEC)
1761 				mp->pr_mflags |= MA_EXEC;
1762 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1763 				mp->pr_mflags |= MA_SHARED;
1764 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1765 				mp->pr_mflags |= MA_NORESERVE;
1766 			if (seg->s_ops == &segspt_shmops ||
1767 			    (seg->s_ops == &segvn_ops &&
1768 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1769 				mp->pr_mflags |= MA_ANON;
1770 			if (seg == brkseg)
1771 				mp->pr_mflags |= MA_BREAK;
1772 			else if (seg == stkseg) {
1773 				mp->pr_mflags |= MA_STACK;
1774 				if (reserved) {
1775 					size_t maxstack =
1776 					    ((size_t)p->p_stk_ctl +
1777 					    PAGEOFFSET) & PAGEMASK;
1778 					uintptr_t vaddr =
1779 					    (uintptr_t)prgetstackbase(p) +
1780 					    p->p_stksize - maxstack;
1781 					mp->pr_vaddr = (caddr32_t)vaddr;
1782 					mp->pr_size = (size32_t)
1783 					    ((uintptr_t)naddr - vaddr);
1784 				}
1785 			}
1786 			if (seg->s_ops == &segspt_shmops)
1787 				mp->pr_mflags |= MA_ISM | MA_SHM;
1788 			mp->pr_pagesize = PAGESIZE;
1789 
1790 			/*
1791 			 * Manufacture a filename for the "object" directory.
1792 			 */
1793 			vattr.va_mask = AT_FSID|AT_NODEID;
1794 			if (seg->s_ops == &segvn_ops &&
1795 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1796 			    vp != NULL && vp->v_type == VREG &&
1797 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1798 				if (vp == p->p_exec)
1799 					(void) strcpy(mp->pr_mapname, "a.out");
1800 				else
1801 					pr_object_name(mp->pr_mapname,
1802 					    vp, &vattr);
1803 			}
1804 
1805 			/*
1806 			 * Get the SysV shared memory id, if any.
1807 			 */
1808 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1809 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1810 			    SHMID_NONE) {
1811 				if (mp->pr_shmid == SHMID_FREE)
1812 					mp->pr_shmid = -1;
1813 
1814 				mp->pr_mflags |= MA_SHM;
1815 			} else {
1816 				mp->pr_shmid = -1;
1817 			}
1818 		}
1819 		ASSERT(tmp == NULL);
1820 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1821 
1822 	return (0);
1823 }
1824 #endif	/* _SYSCALL32_IMPL */
1825 
1826 /*
1827  * Return the size of the /proc page data file.
1828  */
1829 size_t
1830 prpdsize(struct as *as)
1831 {
1832 	struct seg *seg;
1833 	size_t size;
1834 
1835 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1836 
1837 	if ((seg = AS_SEGFIRST(as)) == NULL)
1838 		return (0);
1839 
1840 	size = sizeof (prpageheader_t);
1841 	do {
1842 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1843 		caddr_t saddr, naddr;
1844 		void *tmp = NULL;
1845 		size_t npage;
1846 
1847 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1848 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1849 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1850 				size += sizeof (prasmap_t) + round8(npage);
1851 		}
1852 		ASSERT(tmp == NULL);
1853 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1854 
1855 	return (size);
1856 }
1857 
1858 #ifdef _SYSCALL32_IMPL
1859 size_t
1860 prpdsize32(struct as *as)
1861 {
1862 	struct seg *seg;
1863 	size_t size;
1864 
1865 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1866 
1867 	if ((seg = AS_SEGFIRST(as)) == NULL)
1868 		return (0);
1869 
1870 	size = sizeof (prpageheader32_t);
1871 	do {
1872 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1873 		caddr_t saddr, naddr;
1874 		void *tmp = NULL;
1875 		size_t npage;
1876 
1877 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1878 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1879 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1880 				size += sizeof (prasmap32_t) + round8(npage);
1881 		}
1882 		ASSERT(tmp == NULL);
1883 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1884 
1885 	return (size);
1886 }
1887 #endif	/* _SYSCALL32_IMPL */
1888 
1889 /*
1890  * Read page data information.
1891  */
1892 int
1893 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1894 {
1895 	struct as *as = p->p_as;
1896 	caddr_t buf;
1897 	size_t size;
1898 	prpageheader_t *php;
1899 	prasmap_t *pmp;
1900 	struct seg *seg;
1901 	int error;
1902 
1903 again:
1904 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1905 
1906 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1907 		AS_LOCK_EXIT(as, &as->a_lock);
1908 		return (0);
1909 	}
1910 	size = prpdsize(as);
1911 	if (uiop->uio_resid < size) {
1912 		AS_LOCK_EXIT(as, &as->a_lock);
1913 		return (E2BIG);
1914 	}
1915 
1916 	buf = kmem_zalloc(size, KM_SLEEP);
1917 	php = (prpageheader_t *)buf;
1918 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1919 
1920 	hrt2ts(gethrtime(), &php->pr_tstamp);
1921 	php->pr_nmap = 0;
1922 	php->pr_npage = 0;
1923 	do {
1924 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1925 		caddr_t saddr, naddr;
1926 		void *tmp = NULL;
1927 
1928 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1929 			struct vnode *vp;
1930 			struct vattr vattr;
1931 			size_t len;
1932 			size_t npage;
1933 			uint_t prot;
1934 			uintptr_t next;
1935 
1936 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1937 			if ((len = (size_t)(naddr - saddr)) == 0)
1938 				continue;
1939 			npage = len / PAGESIZE;
1940 			next = (uintptr_t)(pmp + 1) + round8(npage);
1941 			/*
1942 			 * It's possible that the address space can change
1943 			 * subtlely even though we're holding as->a_lock
1944 			 * due to the nondeterminism of page_exists() in
1945 			 * the presence of asychronously flushed pages or
1946 			 * mapped files whose sizes are changing.
1947 			 * page_exists() may be called indirectly from
1948 			 * pr_getprot() by a SEGOP_INCORE() routine.
1949 			 * If this happens we need to make sure we don't
1950 			 * overrun the buffer whose size we computed based
1951 			 * on the initial iteration through the segments.
1952 			 * Once we've detected an overflow, we need to clean
1953 			 * up the temporary memory allocated in pr_getprot()
1954 			 * and retry. If there's a pending signal, we return
1955 			 * EINTR so that this thread can be dislodged if
1956 			 * a latent bug causes us to spin indefinitely.
1957 			 */
1958 			if (next > (uintptr_t)buf + size) {
1959 				pr_getprot_done(&tmp);
1960 				AS_LOCK_EXIT(as, &as->a_lock);
1961 
1962 				kmem_free(buf, size);
1963 
1964 				if (ISSIG(curthread, JUSTLOOKING))
1965 					return (EINTR);
1966 
1967 				goto again;
1968 			}
1969 
1970 			php->pr_nmap++;
1971 			php->pr_npage += npage;
1972 			pmp->pr_vaddr = (uintptr_t)saddr;
1973 			pmp->pr_npage = npage;
1974 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1975 			pmp->pr_mflags = 0;
1976 			if (prot & PROT_READ)
1977 				pmp->pr_mflags |= MA_READ;
1978 			if (prot & PROT_WRITE)
1979 				pmp->pr_mflags |= MA_WRITE;
1980 			if (prot & PROT_EXEC)
1981 				pmp->pr_mflags |= MA_EXEC;
1982 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1983 				pmp->pr_mflags |= MA_SHARED;
1984 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1985 				pmp->pr_mflags |= MA_NORESERVE;
1986 			if (seg->s_ops == &segspt_shmops ||
1987 			    (seg->s_ops == &segvn_ops &&
1988 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1989 				pmp->pr_mflags |= MA_ANON;
1990 			if (seg->s_ops == &segspt_shmops)
1991 				pmp->pr_mflags |= MA_ISM | MA_SHM;
1992 			pmp->pr_pagesize = PAGESIZE;
1993 			/*
1994 			 * Manufacture a filename for the "object" directory.
1995 			 */
1996 			vattr.va_mask = AT_FSID|AT_NODEID;
1997 			if (seg->s_ops == &segvn_ops &&
1998 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1999 			    vp != NULL && vp->v_type == VREG &&
2000 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2001 				if (vp == p->p_exec)
2002 					(void) strcpy(pmp->pr_mapname, "a.out");
2003 				else
2004 					pr_object_name(pmp->pr_mapname,
2005 					    vp, &vattr);
2006 			}
2007 
2008 			/*
2009 			 * Get the SysV shared memory id, if any.
2010 			 */
2011 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2012 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2013 			    SHMID_NONE) {
2014 				if (pmp->pr_shmid == SHMID_FREE)
2015 					pmp->pr_shmid = -1;
2016 
2017 				pmp->pr_mflags |= MA_SHM;
2018 			} else {
2019 				pmp->pr_shmid = -1;
2020 			}
2021 
2022 			hat_getstat(as, saddr, len, hatid,
2023 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2024 			pmp = (prasmap_t *)next;
2025 		}
2026 		ASSERT(tmp == NULL);
2027 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2028 
2029 	AS_LOCK_EXIT(as, &as->a_lock);
2030 
2031 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2032 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2033 	kmem_free(buf, size);
2034 
2035 	return (error);
2036 }
2037 
2038 #ifdef _SYSCALL32_IMPL
2039 int
2040 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2041 {
2042 	struct as *as = p->p_as;
2043 	caddr_t buf;
2044 	size_t size;
2045 	prpageheader32_t *php;
2046 	prasmap32_t *pmp;
2047 	struct seg *seg;
2048 	int error;
2049 
2050 again:
2051 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2052 
2053 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2054 		AS_LOCK_EXIT(as, &as->a_lock);
2055 		return (0);
2056 	}
2057 	size = prpdsize32(as);
2058 	if (uiop->uio_resid < size) {
2059 		AS_LOCK_EXIT(as, &as->a_lock);
2060 		return (E2BIG);
2061 	}
2062 
2063 	buf = kmem_zalloc(size, KM_SLEEP);
2064 	php = (prpageheader32_t *)buf;
2065 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2066 
2067 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2068 	php->pr_nmap = 0;
2069 	php->pr_npage = 0;
2070 	do {
2071 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2072 		caddr_t saddr, naddr;
2073 		void *tmp = NULL;
2074 
2075 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2076 			struct vnode *vp;
2077 			struct vattr vattr;
2078 			size_t len;
2079 			size_t npage;
2080 			uint_t prot;
2081 			uintptr_t next;
2082 
2083 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2084 			if ((len = (size_t)(naddr - saddr)) == 0)
2085 				continue;
2086 			npage = len / PAGESIZE;
2087 			next = (uintptr_t)(pmp + 1) + round8(npage);
2088 			/*
2089 			 * It's possible that the address space can change
2090 			 * subtlely even though we're holding as->a_lock
2091 			 * due to the nondeterminism of page_exists() in
2092 			 * the presence of asychronously flushed pages or
2093 			 * mapped files whose sizes are changing.
2094 			 * page_exists() may be called indirectly from
2095 			 * pr_getprot() by a SEGOP_INCORE() routine.
2096 			 * If this happens we need to make sure we don't
2097 			 * overrun the buffer whose size we computed based
2098 			 * on the initial iteration through the segments.
2099 			 * Once we've detected an overflow, we need to clean
2100 			 * up the temporary memory allocated in pr_getprot()
2101 			 * and retry. If there's a pending signal, we return
2102 			 * EINTR so that this thread can be dislodged if
2103 			 * a latent bug causes us to spin indefinitely.
2104 			 */
2105 			if (next > (uintptr_t)buf + size) {
2106 				pr_getprot_done(&tmp);
2107 				AS_LOCK_EXIT(as, &as->a_lock);
2108 
2109 				kmem_free(buf, size);
2110 
2111 				if (ISSIG(curthread, JUSTLOOKING))
2112 					return (EINTR);
2113 
2114 				goto again;
2115 			}
2116 
2117 			php->pr_nmap++;
2118 			php->pr_npage += npage;
2119 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2120 			pmp->pr_npage = (size32_t)npage;
2121 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2122 			pmp->pr_mflags = 0;
2123 			if (prot & PROT_READ)
2124 				pmp->pr_mflags |= MA_READ;
2125 			if (prot & PROT_WRITE)
2126 				pmp->pr_mflags |= MA_WRITE;
2127 			if (prot & PROT_EXEC)
2128 				pmp->pr_mflags |= MA_EXEC;
2129 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2130 				pmp->pr_mflags |= MA_SHARED;
2131 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2132 				pmp->pr_mflags |= MA_NORESERVE;
2133 			if (seg->s_ops == &segspt_shmops ||
2134 			    (seg->s_ops == &segvn_ops &&
2135 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2136 				pmp->pr_mflags |= MA_ANON;
2137 			if (seg->s_ops == &segspt_shmops)
2138 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2139 			pmp->pr_pagesize = PAGESIZE;
2140 			/*
2141 			 * Manufacture a filename for the "object" directory.
2142 			 */
2143 			vattr.va_mask = AT_FSID|AT_NODEID;
2144 			if (seg->s_ops == &segvn_ops &&
2145 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2146 			    vp != NULL && vp->v_type == VREG &&
2147 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2148 				if (vp == p->p_exec)
2149 					(void) strcpy(pmp->pr_mapname, "a.out");
2150 				else
2151 					pr_object_name(pmp->pr_mapname,
2152 					    vp, &vattr);
2153 			}
2154 
2155 			/*
2156 			 * Get the SysV shared memory id, if any.
2157 			 */
2158 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2159 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2160 			    SHMID_NONE) {
2161 				if (pmp->pr_shmid == SHMID_FREE)
2162 					pmp->pr_shmid = -1;
2163 
2164 				pmp->pr_mflags |= MA_SHM;
2165 			} else {
2166 				pmp->pr_shmid = -1;
2167 			}
2168 
2169 			hat_getstat(as, saddr, len, hatid,
2170 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2171 			pmp = (prasmap32_t *)next;
2172 		}
2173 		ASSERT(tmp == NULL);
2174 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2175 
2176 	AS_LOCK_EXIT(as, &as->a_lock);
2177 
2178 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2179 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2180 	kmem_free(buf, size);
2181 
2182 	return (error);
2183 }
2184 #endif	/* _SYSCALL32_IMPL */
2185 
2186 ushort_t
2187 prgetpctcpu(uint64_t pct)
2188 {
2189 	/*
2190 	 * The value returned will be relevant in the zone of the examiner,
2191 	 * which may not be the same as the zone which performed the procfs
2192 	 * mount.
2193 	 */
2194 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2195 
2196 	/*
2197 	 * Prorate over online cpus so we don't exceed 100%
2198 	 */
2199 	if (nonline > 1)
2200 		pct /= nonline;
2201 	pct >>= 16;		/* convert to 16-bit scaled integer */
2202 	if (pct > 0x8000)	/* might happen, due to rounding */
2203 		pct = 0x8000;
2204 	return ((ushort_t)pct);
2205 }
2206 
2207 /*
2208  * Return information used by ps(1).
2209  */
2210 void
2211 prgetpsinfo(proc_t *p, psinfo_t *psp)
2212 {
2213 	kthread_t *t;
2214 	struct cred *cred;
2215 	hrtime_t hrutime, hrstime;
2216 
2217 	ASSERT(MUTEX_HELD(&p->p_lock));
2218 
2219 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2220 		bzero(psp, sizeof (*psp));
2221 	else {
2222 		thread_unlock(t);
2223 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2224 	}
2225 
2226 	/*
2227 	 * only export SSYS and SMSACCT; everything else is off-limits to
2228 	 * userland apps.
2229 	 */
2230 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2231 	psp->pr_nlwp = p->p_lwpcnt;
2232 	psp->pr_nzomb = p->p_zombcnt;
2233 	mutex_enter(&p->p_crlock);
2234 	cred = p->p_cred;
2235 	psp->pr_uid = crgetruid(cred);
2236 	psp->pr_euid = crgetuid(cred);
2237 	psp->pr_gid = crgetrgid(cred);
2238 	psp->pr_egid = crgetgid(cred);
2239 	mutex_exit(&p->p_crlock);
2240 	psp->pr_pid = p->p_pid;
2241 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2242 	    (p->p_flag & SZONETOP)) {
2243 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2244 		/*
2245 		 * Inside local zones, fake zsched's pid as parent pids for
2246 		 * processes which reference processes outside of the zone.
2247 		 */
2248 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2249 	} else {
2250 		psp->pr_ppid = p->p_ppid;
2251 	}
2252 	psp->pr_pgid = p->p_pgrp;
2253 	psp->pr_sid = p->p_sessp->s_sid;
2254 	psp->pr_taskid = p->p_task->tk_tkid;
2255 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2256 	psp->pr_poolid = p->p_pool->pool_id;
2257 	psp->pr_zoneid = p->p_zone->zone_id;
2258 	if ((psp->pr_contract = PRCTID(p)) == 0)
2259 		psp->pr_contract = -1;
2260 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2261 	switch (p->p_model) {
2262 	case DATAMODEL_ILP32:
2263 		psp->pr_dmodel = PR_MODEL_ILP32;
2264 		break;
2265 	case DATAMODEL_LP64:
2266 		psp->pr_dmodel = PR_MODEL_LP64;
2267 		break;
2268 	}
2269 	hrutime = mstate_aggr_state(p, LMS_USER);
2270 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2271 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2272 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2273 
2274 	if (t == NULL) {
2275 		int wcode = p->p_wcode;		/* must be atomic read */
2276 
2277 		if (wcode)
2278 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2279 		psp->pr_ttydev = PRNODEV;
2280 		psp->pr_lwp.pr_state = SZOMB;
2281 		psp->pr_lwp.pr_sname = 'Z';
2282 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2283 		psp->pr_lwp.pr_bindpset = PS_NONE;
2284 	} else {
2285 		user_t *up = PTOU(p);
2286 		struct as *as;
2287 		dev_t d;
2288 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2289 
2290 		d = cttydev(p);
2291 		/*
2292 		 * If the controlling terminal is the real
2293 		 * or workstation console device, map to what the
2294 		 * user thinks is the console device. Handle case when
2295 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2296 		 */
2297 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2298 			d = uconsdev;
2299 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2300 		psp->pr_start = up->u_start;
2301 		bcopy(up->u_comm, psp->pr_fname,
2302 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2303 		bcopy(up->u_psargs, psp->pr_psargs,
2304 		    MIN(PRARGSZ-1, PSARGSZ));
2305 		psp->pr_argc = up->u_argc;
2306 		psp->pr_argv = up->u_argv;
2307 		psp->pr_envp = up->u_envp;
2308 
2309 		/* get the chosen lwp's lwpsinfo */
2310 		prgetlwpsinfo(t, &psp->pr_lwp);
2311 
2312 		/* compute %cpu for the process */
2313 		if (p->p_lwpcnt == 1)
2314 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2315 		else {
2316 			uint64_t pct = 0;
2317 			hrtime_t cur_time = gethrtime_unscaled();
2318 
2319 			t = p->p_tlist;
2320 			do {
2321 				pct += cpu_update_pct(t, cur_time);
2322 			} while ((t = t->t_forw) != p->p_tlist);
2323 
2324 			psp->pr_pctcpu = prgetpctcpu(pct);
2325 		}
2326 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2327 			psp->pr_size = 0;
2328 			psp->pr_rssize = 0;
2329 		} else {
2330 			mutex_exit(&p->p_lock);
2331 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2332 			psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024);
2333 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2334 			psp->pr_pctmem = rm_pctmemory(as);
2335 			AS_LOCK_EXIT(as, &as->a_lock);
2336 			mutex_enter(&p->p_lock);
2337 		}
2338 	}
2339 }
2340 
2341 #ifdef _SYSCALL32_IMPL
2342 void
2343 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2344 {
2345 	kthread_t *t;
2346 	struct cred *cred;
2347 	hrtime_t hrutime, hrstime;
2348 
2349 	ASSERT(MUTEX_HELD(&p->p_lock));
2350 
2351 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2352 		bzero(psp, sizeof (*psp));
2353 	else {
2354 		thread_unlock(t);
2355 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2356 	}
2357 
2358 	/*
2359 	 * only export SSYS and SMSACCT; everything else is off-limits to
2360 	 * userland apps.
2361 	 */
2362 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2363 	psp->pr_nlwp = p->p_lwpcnt;
2364 	psp->pr_nzomb = p->p_zombcnt;
2365 	mutex_enter(&p->p_crlock);
2366 	cred = p->p_cred;
2367 	psp->pr_uid = crgetruid(cred);
2368 	psp->pr_euid = crgetuid(cred);
2369 	psp->pr_gid = crgetrgid(cred);
2370 	psp->pr_egid = crgetgid(cred);
2371 	mutex_exit(&p->p_crlock);
2372 	psp->pr_pid = p->p_pid;
2373 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2374 	    (p->p_flag & SZONETOP)) {
2375 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2376 		/*
2377 		 * Inside local zones, fake zsched's pid as parent pids for
2378 		 * processes which reference processes outside of the zone.
2379 		 */
2380 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2381 	} else {
2382 		psp->pr_ppid = p->p_ppid;
2383 	}
2384 	psp->pr_pgid = p->p_pgrp;
2385 	psp->pr_sid = p->p_sessp->s_sid;
2386 	psp->pr_taskid = p->p_task->tk_tkid;
2387 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2388 	psp->pr_poolid = p->p_pool->pool_id;
2389 	psp->pr_zoneid = p->p_zone->zone_id;
2390 	if ((psp->pr_contract = PRCTID(p)) == 0)
2391 		psp->pr_contract = -1;
2392 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2393 	switch (p->p_model) {
2394 	case DATAMODEL_ILP32:
2395 		psp->pr_dmodel = PR_MODEL_ILP32;
2396 		break;
2397 	case DATAMODEL_LP64:
2398 		psp->pr_dmodel = PR_MODEL_LP64;
2399 		break;
2400 	}
2401 	hrutime = mstate_aggr_state(p, LMS_USER);
2402 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2403 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2404 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2405 
2406 	if (t == NULL) {
2407 		extern int wstat(int, int);	/* needs a header file */
2408 		int wcode = p->p_wcode;		/* must be atomic read */
2409 
2410 		if (wcode)
2411 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2412 		psp->pr_ttydev = PRNODEV32;
2413 		psp->pr_lwp.pr_state = SZOMB;
2414 		psp->pr_lwp.pr_sname = 'Z';
2415 	} else {
2416 		user_t *up = PTOU(p);
2417 		struct as *as;
2418 		dev_t d;
2419 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2420 
2421 		d = cttydev(p);
2422 		/*
2423 		 * If the controlling terminal is the real
2424 		 * or workstation console device, map to what the
2425 		 * user thinks is the console device. Handle case when
2426 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2427 		 */
2428 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2429 			d = uconsdev;
2430 		(void) cmpldev(&psp->pr_ttydev, d);
2431 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2432 		bcopy(up->u_comm, psp->pr_fname,
2433 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2434 		bcopy(up->u_psargs, psp->pr_psargs,
2435 		    MIN(PRARGSZ-1, PSARGSZ));
2436 		psp->pr_argc = up->u_argc;
2437 		psp->pr_argv = (caddr32_t)up->u_argv;
2438 		psp->pr_envp = (caddr32_t)up->u_envp;
2439 
2440 		/* get the chosen lwp's lwpsinfo */
2441 		prgetlwpsinfo32(t, &psp->pr_lwp);
2442 
2443 		/* compute %cpu for the process */
2444 		if (p->p_lwpcnt == 1)
2445 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2446 		else {
2447 			uint64_t pct = 0;
2448 			hrtime_t cur_time;
2449 
2450 			t = p->p_tlist;
2451 			cur_time = gethrtime_unscaled();
2452 			do {
2453 				pct += cpu_update_pct(t, cur_time);
2454 			} while ((t = t->t_forw) != p->p_tlist);
2455 
2456 			psp->pr_pctcpu = prgetpctcpu(pct);
2457 		}
2458 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2459 			psp->pr_size = 0;
2460 			psp->pr_rssize = 0;
2461 		} else {
2462 			mutex_exit(&p->p_lock);
2463 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2464 			psp->pr_size = (size32_t)
2465 			    (btopr(rm_assize(as)) * (PAGESIZE / 1024));
2466 			psp->pr_rssize = (size32_t)
2467 			    (rm_asrss(as) * (PAGESIZE / 1024));
2468 			psp->pr_pctmem = rm_pctmemory(as);
2469 			AS_LOCK_EXIT(as, &as->a_lock);
2470 			mutex_enter(&p->p_lock);
2471 		}
2472 	}
2473 
2474 	/*
2475 	 * If we are looking at an LP64 process, zero out
2476 	 * the fields that cannot be represented in ILP32.
2477 	 */
2478 	if (p->p_model != DATAMODEL_ILP32) {
2479 		psp->pr_size = 0;
2480 		psp->pr_rssize = 0;
2481 		psp->pr_argv = 0;
2482 		psp->pr_envp = 0;
2483 	}
2484 }
2485 #endif	/* _SYSCALL32_IMPL */
2486 
2487 void
2488 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2489 {
2490 	klwp_t *lwp = ttolwp(t);
2491 	sobj_ops_t *sobj;
2492 	char c, state;
2493 	uint64_t pct;
2494 	int retval, niceval;
2495 	hrtime_t hrutime, hrstime;
2496 
2497 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2498 
2499 	bzero(psp, sizeof (*psp));
2500 
2501 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2502 	psp->pr_lwpid = t->t_tid;
2503 	psp->pr_addr = (uintptr_t)t;
2504 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2505 
2506 	/* map the thread state enum into a process state enum */
2507 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2508 	switch (state) {
2509 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2510 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2511 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2512 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2513 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2514 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2515 	default:		state = 0;		c = '?';	break;
2516 	}
2517 	psp->pr_state = state;
2518 	psp->pr_sname = c;
2519 	if ((sobj = t->t_sobj_ops) != NULL)
2520 		psp->pr_stype = SOBJ_TYPE(sobj);
2521 	retval = CL_DONICE(t, NULL, 0, &niceval);
2522 	if (retval == 0) {
2523 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2524 		psp->pr_nice = niceval + NZERO;
2525 	}
2526 	psp->pr_syscall = t->t_sysnum;
2527 	psp->pr_pri = t->t_pri;
2528 	psp->pr_start.tv_sec = t->t_start;
2529 	psp->pr_start.tv_nsec = 0L;
2530 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2531 	scalehrtime(&hrutime);
2532 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2533 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2534 	scalehrtime(&hrstime);
2535 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2536 	/* compute %cpu for the lwp */
2537 	pct = cpu_update_pct(t, gethrtime_unscaled());
2538 	psp->pr_pctcpu = prgetpctcpu(pct);
2539 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2540 	if (psp->pr_cpu > 99)
2541 		psp->pr_cpu = 99;
2542 
2543 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2544 	    sizeof (psp->pr_clname) - 1);
2545 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2546 	psp->pr_onpro = t->t_cpu->cpu_id;
2547 	psp->pr_bindpro = t->t_bind_cpu;
2548 	psp->pr_bindpset = t->t_bind_pset;
2549 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2550 }
2551 
2552 #ifdef _SYSCALL32_IMPL
2553 void
2554 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2555 {
2556 	proc_t *p = ttoproc(t);
2557 	klwp_t *lwp = ttolwp(t);
2558 	sobj_ops_t *sobj;
2559 	char c, state;
2560 	uint64_t pct;
2561 	int retval, niceval;
2562 	hrtime_t hrutime, hrstime;
2563 
2564 	ASSERT(MUTEX_HELD(&p->p_lock));
2565 
2566 	bzero(psp, sizeof (*psp));
2567 
2568 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2569 	psp->pr_lwpid = t->t_tid;
2570 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2571 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2572 
2573 	/* map the thread state enum into a process state enum */
2574 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2575 	switch (state) {
2576 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2577 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2578 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2579 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2580 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2581 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2582 	default:		state = 0;		c = '?';	break;
2583 	}
2584 	psp->pr_state = state;
2585 	psp->pr_sname = c;
2586 	if ((sobj = t->t_sobj_ops) != NULL)
2587 		psp->pr_stype = SOBJ_TYPE(sobj);
2588 	retval = CL_DONICE(t, NULL, 0, &niceval);
2589 	if (retval == 0) {
2590 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2591 		psp->pr_nice = niceval + NZERO;
2592 	} else {
2593 		psp->pr_oldpri = 0;
2594 		psp->pr_nice = 0;
2595 	}
2596 	psp->pr_syscall = t->t_sysnum;
2597 	psp->pr_pri = t->t_pri;
2598 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2599 	psp->pr_start.tv_nsec = 0L;
2600 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2601 	scalehrtime(&hrutime);
2602 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2603 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2604 	scalehrtime(&hrstime);
2605 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2606 	/* compute %cpu for the lwp */
2607 	pct = cpu_update_pct(t, gethrtime_unscaled());
2608 	psp->pr_pctcpu = prgetpctcpu(pct);
2609 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2610 	if (psp->pr_cpu > 99)
2611 		psp->pr_cpu = 99;
2612 
2613 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2614 	    sizeof (psp->pr_clname) - 1);
2615 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2616 	psp->pr_onpro = t->t_cpu->cpu_id;
2617 	psp->pr_bindpro = t->t_bind_cpu;
2618 	psp->pr_bindpset = t->t_bind_pset;
2619 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2620 }
2621 #endif	/* _SYSCALL32_IMPL */
2622 
2623 /*
2624  * This used to get called when microstate accounting was disabled but
2625  * microstate information was requested.  Since Microstate accounting is on
2626  * regardless of the proc flags, this simply makes it appear to procfs that
2627  * microstate accounting is on.  This is relatively meaningless since you
2628  * can't turn it off, but this is here for the sake of appearances.
2629  */
2630 
2631 /*ARGSUSED*/
2632 void
2633 estimate_msacct(kthread_t *t, hrtime_t curtime)
2634 {
2635 	proc_t *p;
2636 
2637 	if (t == NULL)
2638 		return;
2639 
2640 	p = ttoproc(t);
2641 	ASSERT(MUTEX_HELD(&p->p_lock));
2642 
2643 	/*
2644 	 * A system process (p0) could be referenced if the thread is
2645 	 * in the process of exiting.  Don't turn on microstate accounting
2646 	 * in that case.
2647 	 */
2648 	if (p->p_flag & SSYS)
2649 		return;
2650 
2651 	/*
2652 	 * Loop through all the LWPs (kernel threads) in the process.
2653 	 */
2654 	t = p->p_tlist;
2655 	do {
2656 		t->t_proc_flag |= TP_MSACCT;
2657 	} while ((t = t->t_forw) != p->p_tlist);
2658 
2659 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2660 }
2661 
2662 /*
2663  * It's not really possible to disable microstate accounting anymore.
2664  * However, this routine simply turns off the ms accounting flags in a process
2665  * This way procfs can still pretend to turn microstate accounting on and
2666  * off for a process, but it actually doesn't do anything.  This is
2667  * a neutered form of preemptive idiot-proofing.
2668  */
2669 void
2670 disable_msacct(proc_t *p)
2671 {
2672 	kthread_t *t;
2673 
2674 	ASSERT(MUTEX_HELD(&p->p_lock));
2675 
2676 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2677 	/*
2678 	 * Loop through all the LWPs (kernel threads) in the process.
2679 	 */
2680 	if ((t = p->p_tlist) != NULL) {
2681 		do {
2682 			/* clear per-thread flag */
2683 			t->t_proc_flag &= ~TP_MSACCT;
2684 		} while ((t = t->t_forw) != p->p_tlist);
2685 	}
2686 }
2687 
2688 /*
2689  * Return resource usage information.
2690  */
2691 void
2692 prgetusage(kthread_t *t, prhusage_t *pup)
2693 {
2694 	klwp_t *lwp = ttolwp(t);
2695 	hrtime_t *mstimep;
2696 	struct mstate *ms = &lwp->lwp_mstate;
2697 	int state;
2698 	int i;
2699 	hrtime_t curtime;
2700 	hrtime_t waitrq;
2701 	hrtime_t tmp1;
2702 
2703 	curtime = gethrtime_unscaled();
2704 
2705 	pup->pr_lwpid	= t->t_tid;
2706 	pup->pr_count	= 1;
2707 	pup->pr_create	= ms->ms_start;
2708 	pup->pr_term    = ms->ms_term;
2709 	scalehrtime(&pup->pr_create);
2710 	scalehrtime(&pup->pr_term);
2711 	if (ms->ms_term == 0) {
2712 		pup->pr_rtime = curtime - ms->ms_start;
2713 		scalehrtime(&pup->pr_rtime);
2714 	} else {
2715 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2716 		scalehrtime(&pup->pr_rtime);
2717 	}
2718 
2719 
2720 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2721 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2722 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2723 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2724 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2725 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2726 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2727 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2728 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2729 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2730 
2731 	prscaleusage(pup);
2732 
2733 	/*
2734 	 * Adjust for time waiting in the dispatcher queue.
2735 	 */
2736 	waitrq = t->t_waitrq;	/* hopefully atomic */
2737 	if (waitrq != 0) {
2738 		tmp1 = curtime - waitrq;
2739 		scalehrtime(&tmp1);
2740 		pup->pr_wtime += tmp1;
2741 		curtime = waitrq;
2742 	}
2743 
2744 	/*
2745 	 * Adjust for time spent in current microstate.
2746 	 */
2747 	if (ms->ms_state_start > curtime) {
2748 		curtime = gethrtime_unscaled();
2749 	}
2750 
2751 	i = 0;
2752 	do {
2753 		switch (state = t->t_mstate) {
2754 		case LMS_SLEEP:
2755 			/*
2756 			 * Update the timer for the current sleep state.
2757 			 */
2758 			switch (state = ms->ms_prev) {
2759 			case LMS_TFAULT:
2760 			case LMS_DFAULT:
2761 			case LMS_KFAULT:
2762 			case LMS_USER_LOCK:
2763 				break;
2764 			default:
2765 				state = LMS_SLEEP;
2766 				break;
2767 			}
2768 			break;
2769 		case LMS_TFAULT:
2770 		case LMS_DFAULT:
2771 		case LMS_KFAULT:
2772 		case LMS_USER_LOCK:
2773 			state = LMS_SYSTEM;
2774 			break;
2775 		}
2776 		switch (state) {
2777 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2778 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2779 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2780 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2781 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2782 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2783 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2784 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2785 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2786 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2787 		default:		panic("prgetusage: unknown microstate");
2788 		}
2789 		tmp1 = curtime - ms->ms_state_start;
2790 		if (tmp1 < 0) {
2791 			curtime = gethrtime_unscaled();
2792 			i++;
2793 			continue;
2794 		}
2795 		scalehrtime(&tmp1);
2796 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2797 
2798 	*mstimep += tmp1;
2799 
2800 	/* update pup timestamp */
2801 	pup->pr_tstamp = curtime;
2802 	scalehrtime(&pup->pr_tstamp);
2803 
2804 	/*
2805 	 * Resource usage counters.
2806 	 */
2807 	pup->pr_minf  = lwp->lwp_ru.minflt;
2808 	pup->pr_majf  = lwp->lwp_ru.majflt;
2809 	pup->pr_nswap = lwp->lwp_ru.nswap;
2810 	pup->pr_inblk = lwp->lwp_ru.inblock;
2811 	pup->pr_oublk = lwp->lwp_ru.oublock;
2812 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2813 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2814 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2815 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2816 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2817 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2818 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2819 }
2820 
2821 /*
2822  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2823  */
2824 void
2825 prscaleusage(prhusage_t *usg)
2826 {
2827 	scalehrtime(&usg->pr_utime);
2828 	scalehrtime(&usg->pr_stime);
2829 	scalehrtime(&usg->pr_ttime);
2830 	scalehrtime(&usg->pr_tftime);
2831 	scalehrtime(&usg->pr_dftime);
2832 	scalehrtime(&usg->pr_kftime);
2833 	scalehrtime(&usg->pr_ltime);
2834 	scalehrtime(&usg->pr_slptime);
2835 	scalehrtime(&usg->pr_wtime);
2836 	scalehrtime(&usg->pr_stoptime);
2837 }
2838 
2839 
2840 /*
2841  * Sum resource usage information.
2842  */
2843 void
2844 praddusage(kthread_t *t, prhusage_t *pup)
2845 {
2846 	klwp_t *lwp = ttolwp(t);
2847 	hrtime_t *mstimep;
2848 	struct mstate *ms = &lwp->lwp_mstate;
2849 	int state;
2850 	int i;
2851 	hrtime_t curtime;
2852 	hrtime_t waitrq;
2853 	hrtime_t tmp;
2854 	prhusage_t conv;
2855 
2856 	curtime = gethrtime_unscaled();
2857 
2858 	if (ms->ms_term == 0) {
2859 		tmp = curtime - ms->ms_start;
2860 		scalehrtime(&tmp);
2861 		pup->pr_rtime += tmp;
2862 	} else {
2863 		tmp = ms->ms_term - ms->ms_start;
2864 		scalehrtime(&tmp);
2865 		pup->pr_rtime += tmp;
2866 	}
2867 
2868 	conv.pr_utime = ms->ms_acct[LMS_USER];
2869 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2870 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2871 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2872 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2873 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2874 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2875 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2876 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2877 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2878 
2879 	prscaleusage(&conv);
2880 
2881 	pup->pr_utime	+= conv.pr_utime;
2882 	pup->pr_stime	+= conv.pr_stime;
2883 	pup->pr_ttime	+= conv.pr_ttime;
2884 	pup->pr_tftime	+= conv.pr_tftime;
2885 	pup->pr_dftime	+= conv.pr_dftime;
2886 	pup->pr_kftime	+= conv.pr_kftime;
2887 	pup->pr_ltime	+= conv.pr_ltime;
2888 	pup->pr_slptime	+= conv.pr_slptime;
2889 	pup->pr_wtime	+= conv.pr_wtime;
2890 	pup->pr_stoptime += conv.pr_stoptime;
2891 
2892 	/*
2893 	 * Adjust for time waiting in the dispatcher queue.
2894 	 */
2895 	waitrq = t->t_waitrq;	/* hopefully atomic */
2896 	if (waitrq != 0) {
2897 		tmp = curtime - waitrq;
2898 		scalehrtime(&tmp);
2899 		pup->pr_wtime += tmp;
2900 		curtime = waitrq;
2901 	}
2902 
2903 	/*
2904 	 * Adjust for time spent in current microstate.
2905 	 */
2906 	if (ms->ms_state_start > curtime) {
2907 		curtime = gethrtime_unscaled();
2908 	}
2909 
2910 	i = 0;
2911 	do {
2912 		switch (state = t->t_mstate) {
2913 		case LMS_SLEEP:
2914 			/*
2915 			 * Update the timer for the current sleep state.
2916 			 */
2917 			switch (state = ms->ms_prev) {
2918 			case LMS_TFAULT:
2919 			case LMS_DFAULT:
2920 			case LMS_KFAULT:
2921 			case LMS_USER_LOCK:
2922 				break;
2923 			default:
2924 				state = LMS_SLEEP;
2925 				break;
2926 			}
2927 			break;
2928 		case LMS_TFAULT:
2929 		case LMS_DFAULT:
2930 		case LMS_KFAULT:
2931 		case LMS_USER_LOCK:
2932 			state = LMS_SYSTEM;
2933 			break;
2934 		}
2935 		switch (state) {
2936 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2937 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2938 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2939 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2940 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2941 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2942 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2943 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2944 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2945 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2946 		default:		panic("praddusage: unknown microstate");
2947 		}
2948 		tmp = curtime - ms->ms_state_start;
2949 		if (tmp < 0) {
2950 			curtime = gethrtime_unscaled();
2951 			i++;
2952 			continue;
2953 		}
2954 		scalehrtime(&tmp);
2955 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2956 
2957 	*mstimep += tmp;
2958 
2959 	/* update pup timestamp */
2960 	pup->pr_tstamp = curtime;
2961 	scalehrtime(&pup->pr_tstamp);
2962 
2963 	/*
2964 	 * Resource usage counters.
2965 	 */
2966 	pup->pr_minf  += lwp->lwp_ru.minflt;
2967 	pup->pr_majf  += lwp->lwp_ru.majflt;
2968 	pup->pr_nswap += lwp->lwp_ru.nswap;
2969 	pup->pr_inblk += lwp->lwp_ru.inblock;
2970 	pup->pr_oublk += lwp->lwp_ru.oublock;
2971 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2972 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2973 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2974 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2975 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2976 	pup->pr_sysc  += lwp->lwp_ru.sysc;
2977 	pup->pr_ioch  += lwp->lwp_ru.ioch;
2978 }
2979 
2980 /*
2981  * Convert a prhusage_t to a prusage_t.
2982  * This means convert each hrtime_t to a timestruc_t
2983  * and copy the count fields uint64_t => ulong_t.
2984  */
2985 void
2986 prcvtusage(prhusage_t *pup, prusage_t *upup)
2987 {
2988 	uint64_t *ullp;
2989 	ulong_t *ulp;
2990 	int i;
2991 
2992 	upup->pr_lwpid = pup->pr_lwpid;
2993 	upup->pr_count = pup->pr_count;
2994 
2995 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
2996 	hrt2ts(pup->pr_create,	&upup->pr_create);
2997 	hrt2ts(pup->pr_term,	&upup->pr_term);
2998 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
2999 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3000 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3001 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3002 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3003 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3004 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3005 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3006 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3007 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3008 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3009 	bzero(upup->filltime, sizeof (upup->filltime));
3010 
3011 	ullp = &pup->pr_minf;
3012 	ulp = &upup->pr_minf;
3013 	for (i = 0; i < 22; i++)
3014 		*ulp++ = (ulong_t)*ullp++;
3015 }
3016 
3017 #ifdef _SYSCALL32_IMPL
3018 void
3019 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3020 {
3021 	uint64_t *ullp;
3022 	uint32_t *ulp;
3023 	int i;
3024 
3025 	upup->pr_lwpid = pup->pr_lwpid;
3026 	upup->pr_count = pup->pr_count;
3027 
3028 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3029 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3030 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3031 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3032 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3033 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3034 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3035 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3036 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3037 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3038 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3039 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3040 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3041 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3042 	bzero(upup->filltime, sizeof (upup->filltime));
3043 
3044 	ullp = &pup->pr_minf;
3045 	ulp = &upup->pr_minf;
3046 	for (i = 0; i < 22; i++)
3047 		*ulp++ = (uint32_t)*ullp++;
3048 }
3049 #endif	/* _SYSCALL32_IMPL */
3050 
3051 /*
3052  * Determine whether a set is empty.
3053  */
3054 int
3055 setisempty(uint32_t *sp, uint_t n)
3056 {
3057 	while (n--)
3058 		if (*sp++)
3059 			return (0);
3060 	return (1);
3061 }
3062 
3063 /*
3064  * Utility routine for establishing a watched area in the process.
3065  * Keep the list of watched areas sorted by virtual address.
3066  */
3067 int
3068 set_watched_area(proc_t *p, struct watched_area *pwa)
3069 {
3070 	caddr_t vaddr = pwa->wa_vaddr;
3071 	caddr_t eaddr = pwa->wa_eaddr;
3072 	ulong_t flags = pwa->wa_flags;
3073 	struct watched_area *target;
3074 	avl_index_t where;
3075 	int error = 0;
3076 
3077 	/* we must not be holding p->p_lock, but the process must be locked */
3078 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3079 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3080 
3081 	/*
3082 	 * If this is our first watchpoint, enable watchpoints for the process.
3083 	 */
3084 	if (!pr_watch_active(p)) {
3085 		kthread_t *t;
3086 
3087 		mutex_enter(&p->p_lock);
3088 		if ((t = p->p_tlist) != NULL) {
3089 			do {
3090 				watch_enable(t);
3091 			} while ((t = t->t_forw) != p->p_tlist);
3092 		}
3093 		mutex_exit(&p->p_lock);
3094 	}
3095 
3096 	target = pr_find_watched_area(p, pwa, &where);
3097 	if (target != NULL) {
3098 		/*
3099 		 * We discovered an existing, overlapping watched area.
3100 		 * Allow it only if it is an exact match.
3101 		 */
3102 		if (target->wa_vaddr != vaddr ||
3103 		    target->wa_eaddr != eaddr)
3104 			error = EINVAL;
3105 		else if (target->wa_flags != flags) {
3106 			error = set_watched_page(p, vaddr, eaddr,
3107 			    flags, target->wa_flags);
3108 			target->wa_flags = flags;
3109 		}
3110 		kmem_free(pwa, sizeof (struct watched_area));
3111 	} else {
3112 		avl_insert(&p->p_warea, pwa, where);
3113 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3114 	}
3115 
3116 	return (error);
3117 }
3118 
3119 /*
3120  * Utility routine for clearing a watched area in the process.
3121  * Must be an exact match of the virtual address.
3122  * size and flags don't matter.
3123  */
3124 int
3125 clear_watched_area(proc_t *p, struct watched_area *pwa)
3126 {
3127 	struct watched_area *found;
3128 
3129 	/* we must not be holding p->p_lock, but the process must be locked */
3130 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3131 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3132 
3133 
3134 	if (!pr_watch_active(p)) {
3135 		kmem_free(pwa, sizeof (struct watched_area));
3136 		return (0);
3137 	}
3138 
3139 	/*
3140 	 * Look for a matching address in the watched areas.  If a match is
3141 	 * found, clear the old watched area and adjust the watched page(s).  It
3142 	 * is not an error if there is no match.
3143 	 */
3144 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3145 	    found->wa_vaddr == pwa->wa_vaddr) {
3146 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3147 		    found->wa_flags);
3148 		avl_remove(&p->p_warea, found);
3149 		kmem_free(found, sizeof (struct watched_area));
3150 	}
3151 
3152 	kmem_free(pwa, sizeof (struct watched_area));
3153 
3154 	/*
3155 	 * If we removed the last watched area from the process, disable
3156 	 * watchpoints.
3157 	 */
3158 	if (!pr_watch_active(p)) {
3159 		kthread_t *t;
3160 
3161 		mutex_enter(&p->p_lock);
3162 		if ((t = p->p_tlist) != NULL) {
3163 			do {
3164 				watch_disable(t);
3165 			} while ((t = t->t_forw) != p->p_tlist);
3166 		}
3167 		mutex_exit(&p->p_lock);
3168 	}
3169 
3170 	return (0);
3171 }
3172 
3173 /*
3174  * Frees all the watched_area structures
3175  */
3176 void
3177 pr_free_watchpoints(proc_t *p)
3178 {
3179 	struct watched_area *delp;
3180 	void *cookie;
3181 
3182 	cookie = NULL;
3183 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3184 		kmem_free(delp, sizeof (struct watched_area));
3185 
3186 	avl_destroy(&p->p_warea);
3187 }
3188 
3189 /*
3190  * This one is called by the traced process to unwatch all the
3191  * pages while deallocating the list of watched_page structs.
3192  */
3193 void
3194 pr_free_watched_pages(proc_t *p)
3195 {
3196 	struct as *as = p->p_as;
3197 	struct watched_page *pwp;
3198 	uint_t prot;
3199 	int    retrycnt, err;
3200 	void *cookie;
3201 
3202 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3203 		return;
3204 
3205 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3206 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3207 
3208 	pwp = avl_first(&as->a_wpage);
3209 
3210 	cookie = NULL;
3211 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3212 		retrycnt = 0;
3213 		if ((prot = pwp->wp_oprot) != 0) {
3214 			caddr_t addr = pwp->wp_vaddr;
3215 			struct seg *seg;
3216 		retry:
3217 
3218 			if ((pwp->wp_prot != prot ||
3219 			    (pwp->wp_flags & WP_NOWATCH)) &&
3220 			    (seg = as_segat(as, addr)) != NULL) {
3221 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3222 				if (err == IE_RETRY) {
3223 					ASSERT(retrycnt == 0);
3224 					retrycnt++;
3225 					goto retry;
3226 				}
3227 			}
3228 		}
3229 		kmem_free(pwp, sizeof (struct watched_page));
3230 	}
3231 
3232 	avl_destroy(&as->a_wpage);
3233 	p->p_wprot = NULL;
3234 
3235 	AS_LOCK_EXIT(as, &as->a_lock);
3236 }
3237 
3238 /*
3239  * Insert a watched area into the list of watched pages.
3240  * If oflags is zero then we are adding a new watched area.
3241  * Otherwise we are changing the flags of an existing watched area.
3242  */
3243 static int
3244 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3245 	ulong_t flags, ulong_t oflags)
3246 {
3247 	struct as *as = p->p_as;
3248 	avl_tree_t *pwp_tree;
3249 	struct watched_page *pwp, *newpwp;
3250 	struct watched_page tpw;
3251 	avl_index_t where;
3252 	struct seg *seg;
3253 	uint_t prot;
3254 	caddr_t addr;
3255 
3256 	/*
3257 	 * We need to pre-allocate a list of structures before we grab the
3258 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3259 	 * held.
3260 	 */
3261 	newpwp = NULL;
3262 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3263 	    addr < eaddr; addr += PAGESIZE) {
3264 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3265 		pwp->wp_list = newpwp;
3266 		newpwp = pwp;
3267 	}
3268 
3269 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3270 
3271 	/*
3272 	 * Search for an existing watched page to contain the watched area.
3273 	 * If none is found, grab a new one from the available list
3274 	 * and insert it in the active list, keeping the list sorted
3275 	 * by user-level virtual address.
3276 	 */
3277 	if (p->p_flag & SVFWAIT)
3278 		pwp_tree = &p->p_wpage;
3279 	else
3280 		pwp_tree = &as->a_wpage;
3281 
3282 again:
3283 	if (avl_numnodes(pwp_tree) > prnwatch) {
3284 		AS_LOCK_EXIT(as, &as->a_lock);
3285 		while (newpwp != NULL) {
3286 			pwp = newpwp->wp_list;
3287 			kmem_free(newpwp, sizeof (struct watched_page));
3288 			newpwp = pwp;
3289 		}
3290 		return (E2BIG);
3291 	}
3292 
3293 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3294 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3295 		pwp = newpwp;
3296 		newpwp = newpwp->wp_list;
3297 		pwp->wp_list = NULL;
3298 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3299 		    (uintptr_t)PAGEMASK);
3300 		avl_insert(pwp_tree, pwp, where);
3301 	}
3302 
3303 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3304 
3305 	if (oflags & WA_READ)
3306 		pwp->wp_read--;
3307 	if (oflags & WA_WRITE)
3308 		pwp->wp_write--;
3309 	if (oflags & WA_EXEC)
3310 		pwp->wp_exec--;
3311 
3312 	ASSERT(pwp->wp_read >= 0);
3313 	ASSERT(pwp->wp_write >= 0);
3314 	ASSERT(pwp->wp_exec >= 0);
3315 
3316 	if (flags & WA_READ)
3317 		pwp->wp_read++;
3318 	if (flags & WA_WRITE)
3319 		pwp->wp_write++;
3320 	if (flags & WA_EXEC)
3321 		pwp->wp_exec++;
3322 
3323 	if (!(p->p_flag & SVFWAIT)) {
3324 		vaddr = pwp->wp_vaddr;
3325 		if (pwp->wp_oprot == 0 &&
3326 		    (seg = as_segat(as, vaddr)) != NULL) {
3327 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3328 			pwp->wp_oprot = (uchar_t)prot;
3329 			pwp->wp_prot = (uchar_t)prot;
3330 		}
3331 		if (pwp->wp_oprot != 0) {
3332 			prot = pwp->wp_oprot;
3333 			if (pwp->wp_read)
3334 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3335 			if (pwp->wp_write)
3336 				prot &= ~PROT_WRITE;
3337 			if (pwp->wp_exec)
3338 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3339 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3340 			    pwp->wp_prot != prot &&
3341 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3342 				pwp->wp_flags |= WP_SETPROT;
3343 				pwp->wp_list = p->p_wprot;
3344 				p->p_wprot = pwp;
3345 			}
3346 			pwp->wp_prot = (uchar_t)prot;
3347 		}
3348 	}
3349 
3350 	/*
3351 	 * If the watched area extends into the next page then do
3352 	 * it over again with the virtual address of the next page.
3353 	 */
3354 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3355 		goto again;
3356 
3357 	AS_LOCK_EXIT(as, &as->a_lock);
3358 
3359 	/*
3360 	 * Free any pages we may have over-allocated
3361 	 */
3362 	while (newpwp != NULL) {
3363 		pwp = newpwp->wp_list;
3364 		kmem_free(newpwp, sizeof (struct watched_page));
3365 		newpwp = pwp;
3366 	}
3367 
3368 	return (0);
3369 }
3370 
3371 /*
3372  * Remove a watched area from the list of watched pages.
3373  * A watched area may extend over more than one page.
3374  */
3375 static void
3376 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3377 {
3378 	struct as *as = p->p_as;
3379 	struct watched_page *pwp;
3380 	struct watched_page tpw;
3381 	avl_tree_t *tree;
3382 	avl_index_t where;
3383 
3384 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3385 
3386 	if (p->p_flag & SVFWAIT)
3387 		tree = &p->p_wpage;
3388 	else
3389 		tree = &as->a_wpage;
3390 
3391 	tpw.wp_vaddr = vaddr =
3392 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3393 	pwp = avl_find(tree, &tpw, &where);
3394 	if (pwp == NULL)
3395 		pwp = avl_nearest(tree, where, AVL_AFTER);
3396 
3397 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3398 		ASSERT(vaddr <=  pwp->wp_vaddr);
3399 
3400 		if (flags & WA_READ)
3401 			pwp->wp_read--;
3402 		if (flags & WA_WRITE)
3403 			pwp->wp_write--;
3404 		if (flags & WA_EXEC)
3405 			pwp->wp_exec--;
3406 
3407 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3408 			/*
3409 			 * Reset the hat layer's protections on this page.
3410 			 */
3411 			if (pwp->wp_oprot != 0) {
3412 				uint_t prot = pwp->wp_oprot;
3413 
3414 				if (pwp->wp_read)
3415 					prot &=
3416 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3417 				if (pwp->wp_write)
3418 					prot &= ~PROT_WRITE;
3419 				if (pwp->wp_exec)
3420 					prot &=
3421 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3422 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3423 				    pwp->wp_prot != prot &&
3424 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3425 					pwp->wp_flags |= WP_SETPROT;
3426 					pwp->wp_list = p->p_wprot;
3427 					p->p_wprot = pwp;
3428 				}
3429 				pwp->wp_prot = (uchar_t)prot;
3430 			}
3431 		} else {
3432 			/*
3433 			 * No watched areas remain in this page.
3434 			 * Reset everything to normal.
3435 			 */
3436 			if (pwp->wp_oprot != 0) {
3437 				pwp->wp_prot = pwp->wp_oprot;
3438 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3439 					pwp->wp_flags |= WP_SETPROT;
3440 					pwp->wp_list = p->p_wprot;
3441 					p->p_wprot = pwp;
3442 				}
3443 			}
3444 		}
3445 
3446 		pwp = AVL_NEXT(tree, pwp);
3447 	}
3448 
3449 	AS_LOCK_EXIT(as, &as->a_lock);
3450 }
3451 
3452 /*
3453  * Return the original protections for the specified page.
3454  */
3455 static void
3456 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3457 {
3458 	struct watched_page *pwp;
3459 	struct watched_page tpw;
3460 
3461 	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3462 
3463 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3464 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3465 		*prot = pwp->wp_oprot;
3466 }
3467 
3468 static prpagev_t *
3469 pr_pagev_create(struct seg *seg, int check_noreserve)
3470 {
3471 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3472 	size_t total_pages = seg_pages(seg);
3473 
3474 	/*
3475 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3476 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3477 	 * to about a megabyte of kernel heap by default.
3478 	 */
3479 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3480 	pagev->pg_pnbase = 0;
3481 
3482 	pagev->pg_protv =
3483 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3484 
3485 	if (check_noreserve)
3486 		pagev->pg_incore =
3487 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3488 	else
3489 		pagev->pg_incore = NULL;
3490 
3491 	return (pagev);
3492 }
3493 
3494 static void
3495 pr_pagev_destroy(prpagev_t *pagev)
3496 {
3497 	if (pagev->pg_incore != NULL)
3498 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3499 
3500 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3501 	kmem_free(pagev, sizeof (prpagev_t));
3502 }
3503 
3504 static caddr_t
3505 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3506 {
3507 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3508 	ulong_t pn, pnlim;
3509 	caddr_t saddr;
3510 	size_t len;
3511 
3512 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3513 
3514 	if (addr == eaddr)
3515 		return (eaddr);
3516 
3517 refill:
3518 	ASSERT(addr < eaddr);
3519 	pagev->pg_pnbase = seg_page(seg, addr);
3520 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3521 	saddr = addr;
3522 
3523 	if (lastpg < pnlim)
3524 		len = (size_t)(eaddr - addr);
3525 	else
3526 		len = pagev->pg_npages * PAGESIZE;
3527 
3528 	if (pagev->pg_incore != NULL) {
3529 		/*
3530 		 * INCORE cleverly has different semantics than GETPROT:
3531 		 * it returns info on pages up to but NOT including addr + len.
3532 		 */
3533 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3534 		pn = pagev->pg_pnbase;
3535 
3536 		do {
3537 			/*
3538 			 * Guilty knowledge here:  We know that segvn_incore
3539 			 * returns more than just the low-order bit that
3540 			 * indicates the page is actually in memory.  If any
3541 			 * bits are set, then the page has backing store.
3542 			 */
3543 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3544 				goto out;
3545 
3546 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3547 
3548 		/*
3549 		 * If we examined all the pages in the vector but we're not
3550 		 * at the end of the segment, take another lap.
3551 		 */
3552 		if (addr < eaddr)
3553 			goto refill;
3554 	}
3555 
3556 	/*
3557 	 * Need to take len - 1 because addr + len is the address of the
3558 	 * first byte of the page just past the end of what we want.
3559 	 */
3560 out:
3561 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3562 	return (addr);
3563 }
3564 
3565 static caddr_t
3566 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3567     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3568 {
3569 	/*
3570 	 * Our starting address is either the specified address, or the base
3571 	 * address from the start of the pagev.  If the latter is greater,
3572 	 * this means a previous call to pr_pagev_fill has already scanned
3573 	 * further than the end of the previous mapping.
3574 	 */
3575 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3576 	caddr_t addr = MAX(*saddrp, base);
3577 	ulong_t pn = seg_page(seg, addr);
3578 	uint_t prot, nprot;
3579 
3580 	/*
3581 	 * If we're dealing with noreserve pages, then advance addr to
3582 	 * the address of the next page which has backing store.
3583 	 */
3584 	if (pagev->pg_incore != NULL) {
3585 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3586 			if ((addr += PAGESIZE) == eaddr) {
3587 				*saddrp = addr;
3588 				prot = 0;
3589 				goto out;
3590 			}
3591 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3592 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3593 				if (addr == eaddr) {
3594 					*saddrp = addr;
3595 					prot = 0;
3596 					goto out;
3597 				}
3598 				pn = seg_page(seg, addr);
3599 			}
3600 		}
3601 	}
3602 
3603 	/*
3604 	 * Get the protections on the page corresponding to addr.
3605 	 */
3606 	pn = seg_page(seg, addr);
3607 	ASSERT(pn >= pagev->pg_pnbase);
3608 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3609 
3610 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3611 	getwatchprot(seg->s_as, addr, &prot);
3612 	*saddrp = addr;
3613 
3614 	/*
3615 	 * Now loop until we find a backed page with different protections
3616 	 * or we reach the end of this segment.
3617 	 */
3618 	while ((addr += PAGESIZE) < eaddr) {
3619 		/*
3620 		 * If pn has advanced to the page number following what we
3621 		 * have information on, refill the page vector and reset
3622 		 * addr and pn.  If pr_pagev_fill does not return the
3623 		 * address of the next page, we have a discontiguity and
3624 		 * thus have reached the end of the current mapping.
3625 		 */
3626 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3627 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3628 			if (naddr != addr)
3629 				goto out;
3630 			pn = seg_page(seg, addr);
3631 		}
3632 
3633 		/*
3634 		 * The previous page's protections are in prot, and it has
3635 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3636 		 * then end this mapping and return the previous protections.
3637 		 */
3638 		if (pagev->pg_incore != NULL &&
3639 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3640 			break;
3641 
3642 		/*
3643 		 * Otherwise end the mapping if this page's protections (nprot)
3644 		 * are different than those in the previous page (prot).
3645 		 */
3646 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3647 		getwatchprot(seg->s_as, addr, &nprot);
3648 
3649 		if (nprot != prot)
3650 			break;
3651 	}
3652 
3653 out:
3654 	*protp = prot;
3655 	return (addr);
3656 }
3657 
3658 size_t
3659 pr_getsegsize(struct seg *seg, int reserved)
3660 {
3661 	size_t size = seg->s_size;
3662 
3663 	/*
3664 	 * If we're interested in the reserved space, return the size of the
3665 	 * segment itself.  Everything else in this function is a special case
3666 	 * to determine the actual underlying size of various segment types.
3667 	 */
3668 	if (reserved)
3669 		return (size);
3670 
3671 	/*
3672 	 * If this is a segvn mapping of a regular file, return the smaller
3673 	 * of the segment size and the remaining size of the file beyond
3674 	 * the file offset corresponding to seg->s_base.
3675 	 */
3676 	if (seg->s_ops == &segvn_ops) {
3677 		vattr_t vattr;
3678 		vnode_t *vp;
3679 
3680 		vattr.va_mask = AT_SIZE;
3681 
3682 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3683 		    vp != NULL && vp->v_type == VREG &&
3684 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3685 
3686 			u_offset_t fsize = vattr.va_size;
3687 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3688 
3689 			if (fsize < offset)
3690 				fsize = 0;
3691 			else
3692 				fsize -= offset;
3693 
3694 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3695 
3696 			if (fsize < (u_offset_t)size)
3697 				size = (size_t)fsize;
3698 		}
3699 
3700 		return (size);
3701 	}
3702 
3703 	/*
3704 	 * If this is an ISM shared segment, don't include pages that are
3705 	 * beyond the real size of the spt segment that backs it.
3706 	 */
3707 	if (seg->s_ops == &segspt_shmops)
3708 		return (MIN(spt_realsize(seg), size));
3709 
3710 	/*
3711 	 * If this is segment is a mapping from /dev/null, then this is a
3712 	 * reservation of virtual address space and has no actual size.
3713 	 * Such segments are backed by segdev and have type set to neither
3714 	 * MAP_SHARED nor MAP_PRIVATE.
3715 	 */
3716 	if (seg->s_ops == &segdev_ops &&
3717 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3718 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3719 		return (0);
3720 
3721 	/*
3722 	 * If this segment doesn't match one of the special types we handle,
3723 	 * just return the size of the segment itself.
3724 	 */
3725 	return (size);
3726 }
3727 
3728 uint_t
3729 pr_getprot(struct seg *seg, int reserved, void **tmp,
3730 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3731 {
3732 	struct as *as = seg->s_as;
3733 
3734 	caddr_t saddr = *saddrp;
3735 	caddr_t naddr;
3736 
3737 	int check_noreserve;
3738 	uint_t prot;
3739 
3740 	union {
3741 		struct segvn_data *svd;
3742 		struct segdev_data *sdp;
3743 		void *data;
3744 	} s;
3745 
3746 	s.data = seg->s_data;
3747 
3748 	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3749 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3750 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3751 
3752 	/*
3753 	 * Don't include MAP_NORESERVE pages in the address range
3754 	 * unless their mappings have actually materialized.
3755 	 * We cheat by knowing that segvn is the only segment
3756 	 * driver that supports MAP_NORESERVE.
3757 	 */
3758 	check_noreserve =
3759 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3760 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3761 	    (s.svd->flags & MAP_NORESERVE));
3762 
3763 	/*
3764 	 * Examine every page only as a last resort.  We use guilty knowledge
3765 	 * of segvn and segdev to avoid this: if there are no per-page
3766 	 * protections present in the segment and we don't care about
3767 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3768 	 */
3769 	if (!check_noreserve && saddr == seg->s_base &&
3770 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3771 		prot = s.svd->prot;
3772 		getwatchprot(as, saddr, &prot);
3773 		naddr = eaddr;
3774 
3775 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3776 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3777 		prot = s.sdp->prot;
3778 		getwatchprot(as, saddr, &prot);
3779 		naddr = eaddr;
3780 
3781 	} else {
3782 		prpagev_t *pagev;
3783 
3784 		/*
3785 		 * If addr is sitting at the start of the segment, then
3786 		 * create a page vector to store protection and incore
3787 		 * information for pages in the segment, and fill it.
3788 		 * Otherwise, we expect *tmp to address the prpagev_t
3789 		 * allocated by a previous call to this function.
3790 		 */
3791 		if (saddr == seg->s_base) {
3792 			pagev = pr_pagev_create(seg, check_noreserve);
3793 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3794 
3795 			ASSERT(*tmp == NULL);
3796 			*tmp = pagev;
3797 
3798 			ASSERT(saddr <= eaddr);
3799 			*saddrp = saddr;
3800 
3801 			if (saddr == eaddr) {
3802 				naddr = saddr;
3803 				prot = 0;
3804 				goto out;
3805 			}
3806 
3807 		} else {
3808 			ASSERT(*tmp != NULL);
3809 			pagev = (prpagev_t *)*tmp;
3810 		}
3811 
3812 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3813 		ASSERT(naddr <= eaddr);
3814 	}
3815 
3816 out:
3817 	if (naddr == eaddr)
3818 		pr_getprot_done(tmp);
3819 	*naddrp = naddr;
3820 	return (prot);
3821 }
3822 
3823 void
3824 pr_getprot_done(void **tmp)
3825 {
3826 	if (*tmp != NULL) {
3827 		pr_pagev_destroy((prpagev_t *)*tmp);
3828 		*tmp = NULL;
3829 	}
3830 }
3831 
3832 /*
3833  * Return true iff the vnode is a /proc file from the object directory.
3834  */
3835 int
3836 pr_isobject(vnode_t *vp)
3837 {
3838 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3839 }
3840 
3841 /*
3842  * Return true iff the vnode is a /proc file opened by the process itself.
3843  */
3844 int
3845 pr_isself(vnode_t *vp)
3846 {
3847 	/*
3848 	 * XXX: To retain binary compatibility with the old
3849 	 * ioctl()-based version of /proc, we exempt self-opens
3850 	 * of /proc/<pid> from being marked close-on-exec.
3851 	 */
3852 	return (vn_matchops(vp, prvnodeops) &&
3853 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3854 	    VTOP(vp)->pr_type != PR_PIDDIR);
3855 }
3856 
3857 static ssize_t
3858 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3859 {
3860 	ssize_t pagesize, hatsize;
3861 
3862 	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3863 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3864 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3865 	ASSERT(saddr < eaddr);
3866 
3867 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3868 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3869 	ASSERT(pagesize != 0);
3870 
3871 	if (pagesize == -1)
3872 		pagesize = PAGESIZE;
3873 
3874 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3875 
3876 	while (saddr < eaddr) {
3877 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3878 			break;
3879 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3880 		saddr += pagesize;
3881 	}
3882 
3883 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3884 	return (hatsize);
3885 }
3886 
3887 /*
3888  * Return an array of structures with extended memory map information.
3889  * We allocate here; the caller must deallocate.
3890  */
3891 int
3892 prgetxmap(proc_t *p, list_t *iolhead)
3893 {
3894 	struct as *as = p->p_as;
3895 	prxmap_t *mp;
3896 	struct seg *seg;
3897 	struct seg *brkseg, *stkseg;
3898 	struct vnode *vp;
3899 	struct vattr vattr;
3900 	uint_t prot;
3901 
3902 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3903 
3904 	/*
3905 	 * Request an initial buffer size that doesn't waste memory
3906 	 * if the address space has only a small number of segments.
3907 	 */
3908 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3909 
3910 	if ((seg = AS_SEGFIRST(as)) == NULL)
3911 		return (0);
3912 
3913 	brkseg = break_seg(p);
3914 	stkseg = as_segat(as, prgetstackbase(p));
3915 
3916 	do {
3917 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3918 		caddr_t saddr, naddr, baddr;
3919 		void *tmp = NULL;
3920 		ssize_t psz;
3921 		char *parr;
3922 		uint64_t npages;
3923 		uint64_t pagenum;
3924 
3925 		/*
3926 		 * Segment loop part one: iterate from the base of the segment
3927 		 * to its end, pausing at each address boundary (baddr) between
3928 		 * ranges that have different virtual memory protections.
3929 		 */
3930 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3931 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3932 			ASSERT(baddr >= saddr && baddr <= eaddr);
3933 
3934 			/*
3935 			 * Segment loop part two: iterate from the current
3936 			 * position to the end of the protection boundary,
3937 			 * pausing at each address boundary (naddr) between
3938 			 * ranges that have different underlying page sizes.
3939 			 */
3940 			for (; saddr < baddr; saddr = naddr) {
3941 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3942 				ASSERT(naddr >= saddr && naddr <= baddr);
3943 
3944 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3945 
3946 				mp->pr_vaddr = (uintptr_t)saddr;
3947 				mp->pr_size = naddr - saddr;
3948 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3949 				mp->pr_mflags = 0;
3950 				if (prot & PROT_READ)
3951 					mp->pr_mflags |= MA_READ;
3952 				if (prot & PROT_WRITE)
3953 					mp->pr_mflags |= MA_WRITE;
3954 				if (prot & PROT_EXEC)
3955 					mp->pr_mflags |= MA_EXEC;
3956 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3957 					mp->pr_mflags |= MA_SHARED;
3958 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3959 					mp->pr_mflags |= MA_NORESERVE;
3960 				if (seg->s_ops == &segspt_shmops ||
3961 				    (seg->s_ops == &segvn_ops &&
3962 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3963 				    vp == NULL)))
3964 					mp->pr_mflags |= MA_ANON;
3965 				if (seg == brkseg)
3966 					mp->pr_mflags |= MA_BREAK;
3967 				else if (seg == stkseg)
3968 					mp->pr_mflags |= MA_STACK;
3969 				if (seg->s_ops == &segspt_shmops)
3970 					mp->pr_mflags |= MA_ISM | MA_SHM;
3971 
3972 				mp->pr_pagesize = PAGESIZE;
3973 				if (psz == -1) {
3974 					mp->pr_hatpagesize = 0;
3975 				} else {
3976 					mp->pr_hatpagesize = psz;
3977 				}
3978 
3979 				/*
3980 				 * Manufacture a filename for the "object" dir.
3981 				 */
3982 				mp->pr_dev = PRNODEV;
3983 				vattr.va_mask = AT_FSID|AT_NODEID;
3984 				if (seg->s_ops == &segvn_ops &&
3985 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
3986 				    vp != NULL && vp->v_type == VREG &&
3987 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
3988 				    NULL) == 0) {
3989 					mp->pr_dev = vattr.va_fsid;
3990 					mp->pr_ino = vattr.va_nodeid;
3991 					if (vp == p->p_exec)
3992 						(void) strcpy(mp->pr_mapname,
3993 						    "a.out");
3994 					else
3995 						pr_object_name(mp->pr_mapname,
3996 						    vp, &vattr);
3997 				}
3998 
3999 				/*
4000 				 * Get the SysV shared memory id, if any.
4001 				 */
4002 				if ((mp->pr_mflags & MA_SHARED) &&
4003 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4004 				    seg->s_base)) != SHMID_NONE) {
4005 					if (mp->pr_shmid == SHMID_FREE)
4006 						mp->pr_shmid = -1;
4007 
4008 					mp->pr_mflags |= MA_SHM;
4009 				} else {
4010 					mp->pr_shmid = -1;
4011 				}
4012 
4013 				npages = ((uintptr_t)(naddr - saddr)) >>
4014 				    PAGESHIFT;
4015 				parr = kmem_zalloc(npages, KM_SLEEP);
4016 
4017 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4018 
4019 				for (pagenum = 0; pagenum < npages; pagenum++) {
4020 					if (parr[pagenum] & SEG_PAGE_INCORE)
4021 						mp->pr_rss++;
4022 					if (parr[pagenum] & SEG_PAGE_ANON)
4023 						mp->pr_anon++;
4024 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4025 						mp->pr_locked++;
4026 				}
4027 				kmem_free(parr, npages);
4028 			}
4029 		}
4030 		ASSERT(tmp == NULL);
4031 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4032 
4033 	return (0);
4034 }
4035 
4036 /*
4037  * Return the process's credentials.  We don't need a 32-bit equivalent of
4038  * this function because prcred_t and prcred32_t are actually the same.
4039  */
4040 void
4041 prgetcred(proc_t *p, prcred_t *pcrp)
4042 {
4043 	mutex_enter(&p->p_crlock);
4044 	cred2prcred(p->p_cred, pcrp);
4045 	mutex_exit(&p->p_crlock);
4046 }
4047 
4048 /*
4049  * Compute actual size of the prpriv_t structure.
4050  */
4051 
4052 size_t
4053 prgetprivsize(void)
4054 {
4055 	return (priv_prgetprivsize(NULL));
4056 }
4057 
4058 /*
4059  * Return the process's privileges.  We don't need a 32-bit equivalent of
4060  * this function because prpriv_t and prpriv32_t are actually the same.
4061  */
4062 void
4063 prgetpriv(proc_t *p, prpriv_t *pprp)
4064 {
4065 	mutex_enter(&p->p_crlock);
4066 	cred2prpriv(p->p_cred, pprp);
4067 	mutex_exit(&p->p_crlock);
4068 }
4069 
4070 #ifdef _SYSCALL32_IMPL
4071 /*
4072  * Return an array of structures with HAT memory map information.
4073  * We allocate here; the caller must deallocate.
4074  */
4075 int
4076 prgetxmap32(proc_t *p, list_t *iolhead)
4077 {
4078 	struct as *as = p->p_as;
4079 	prxmap32_t *mp;
4080 	struct seg *seg;
4081 	struct seg *brkseg, *stkseg;
4082 	struct vnode *vp;
4083 	struct vattr vattr;
4084 	uint_t prot;
4085 
4086 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4087 
4088 	/*
4089 	 * Request an initial buffer size that doesn't waste memory
4090 	 * if the address space has only a small number of segments.
4091 	 */
4092 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4093 
4094 	if ((seg = AS_SEGFIRST(as)) == NULL)
4095 		return (0);
4096 
4097 	brkseg = break_seg(p);
4098 	stkseg = as_segat(as, prgetstackbase(p));
4099 
4100 	do {
4101 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4102 		caddr_t saddr, naddr, baddr;
4103 		void *tmp = NULL;
4104 		ssize_t psz;
4105 		char *parr;
4106 		uint64_t npages;
4107 		uint64_t pagenum;
4108 
4109 		/*
4110 		 * Segment loop part one: iterate from the base of the segment
4111 		 * to its end, pausing at each address boundary (baddr) between
4112 		 * ranges that have different virtual memory protections.
4113 		 */
4114 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4115 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4116 			ASSERT(baddr >= saddr && baddr <= eaddr);
4117 
4118 			/*
4119 			 * Segment loop part two: iterate from the current
4120 			 * position to the end of the protection boundary,
4121 			 * pausing at each address boundary (naddr) between
4122 			 * ranges that have different underlying page sizes.
4123 			 */
4124 			for (; saddr < baddr; saddr = naddr) {
4125 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4126 				ASSERT(naddr >= saddr && naddr <= baddr);
4127 
4128 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4129 
4130 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4131 				mp->pr_size = (size32_t)(naddr - saddr);
4132 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4133 				mp->pr_mflags = 0;
4134 				if (prot & PROT_READ)
4135 					mp->pr_mflags |= MA_READ;
4136 				if (prot & PROT_WRITE)
4137 					mp->pr_mflags |= MA_WRITE;
4138 				if (prot & PROT_EXEC)
4139 					mp->pr_mflags |= MA_EXEC;
4140 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4141 					mp->pr_mflags |= MA_SHARED;
4142 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4143 					mp->pr_mflags |= MA_NORESERVE;
4144 				if (seg->s_ops == &segspt_shmops ||
4145 				    (seg->s_ops == &segvn_ops &&
4146 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4147 				    vp == NULL)))
4148 					mp->pr_mflags |= MA_ANON;
4149 				if (seg == brkseg)
4150 					mp->pr_mflags |= MA_BREAK;
4151 				else if (seg == stkseg)
4152 					mp->pr_mflags |= MA_STACK;
4153 				if (seg->s_ops == &segspt_shmops)
4154 					mp->pr_mflags |= MA_ISM | MA_SHM;
4155 
4156 				mp->pr_pagesize = PAGESIZE;
4157 				if (psz == -1) {
4158 					mp->pr_hatpagesize = 0;
4159 				} else {
4160 					mp->pr_hatpagesize = psz;
4161 				}
4162 
4163 				/*
4164 				 * Manufacture a filename for the "object" dir.
4165 				 */
4166 				mp->pr_dev = PRNODEV32;
4167 				vattr.va_mask = AT_FSID|AT_NODEID;
4168 				if (seg->s_ops == &segvn_ops &&
4169 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4170 				    vp != NULL && vp->v_type == VREG &&
4171 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4172 				    NULL) == 0) {
4173 					(void) cmpldev(&mp->pr_dev,
4174 					    vattr.va_fsid);
4175 					mp->pr_ino = vattr.va_nodeid;
4176 					if (vp == p->p_exec)
4177 						(void) strcpy(mp->pr_mapname,
4178 						    "a.out");
4179 					else
4180 						pr_object_name(mp->pr_mapname,
4181 						    vp, &vattr);
4182 				}
4183 
4184 				/*
4185 				 * Get the SysV shared memory id, if any.
4186 				 */
4187 				if ((mp->pr_mflags & MA_SHARED) &&
4188 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4189 				    seg->s_base)) != SHMID_NONE) {
4190 					if (mp->pr_shmid == SHMID_FREE)
4191 						mp->pr_shmid = -1;
4192 
4193 					mp->pr_mflags |= MA_SHM;
4194 				} else {
4195 					mp->pr_shmid = -1;
4196 				}
4197 
4198 				npages = ((uintptr_t)(naddr - saddr)) >>
4199 				    PAGESHIFT;
4200 				parr = kmem_zalloc(npages, KM_SLEEP);
4201 
4202 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4203 
4204 				for (pagenum = 0; pagenum < npages; pagenum++) {
4205 					if (parr[pagenum] & SEG_PAGE_INCORE)
4206 						mp->pr_rss++;
4207 					if (parr[pagenum] & SEG_PAGE_ANON)
4208 						mp->pr_anon++;
4209 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4210 						mp->pr_locked++;
4211 				}
4212 				kmem_free(parr, npages);
4213 			}
4214 		}
4215 		ASSERT(tmp == NULL);
4216 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4217 
4218 	return (0);
4219 }
4220 #endif	/* _SYSCALL32_IMPL */
4221