xref: /titanic_41/usr/src/uts/common/fs/proc/prsubr.c (revision 989f28072d20c73ae0955d6a1e3e2fc74831cb39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/cmn_err.h>
33 #include <sys/cred.h>
34 #include <sys/priv.h>
35 #include <sys/debug.h>
36 #include <sys/errno.h>
37 #include <sys/inline.h>
38 #include <sys/kmem.h>
39 #include <sys/mman.h>
40 #include <sys/proc.h>
41 #include <sys/brand.h>
42 #include <sys/sobject.h>
43 #include <sys/sysmacros.h>
44 #include <sys/systm.h>
45 #include <sys/uio.h>
46 #include <sys/var.h>
47 #include <sys/vfs.h>
48 #include <sys/vnode.h>
49 #include <sys/session.h>
50 #include <sys/pcb.h>
51 #include <sys/signal.h>
52 #include <sys/user.h>
53 #include <sys/disp.h>
54 #include <sys/class.h>
55 #include <sys/ts.h>
56 #include <sys/bitmap.h>
57 #include <sys/poll.h>
58 #include <sys/shm_impl.h>
59 #include <sys/fault.h>
60 #include <sys/syscall.h>
61 #include <sys/procfs.h>
62 #include <sys/processor.h>
63 #include <sys/cpuvar.h>
64 #include <sys/copyops.h>
65 #include <sys/time.h>
66 #include <sys/msacct.h>
67 #include <vm/as.h>
68 #include <vm/rm.h>
69 #include <vm/seg.h>
70 #include <vm/seg_vn.h>
71 #include <vm/seg_dev.h>
72 #include <vm/seg_spt.h>
73 #include <vm/page.h>
74 #include <sys/vmparam.h>
75 #include <sys/swap.h>
76 #include <fs/proc/prdata.h>
77 #include <sys/task.h>
78 #include <sys/project.h>
79 #include <sys/contract_impl.h>
80 #include <sys/contract/process.h>
81 #include <sys/contract/process_impl.h>
82 #include <sys/schedctl.h>
83 #include <sys/pool.h>
84 #include <sys/zone.h>
85 #include <sys/atomic.h>
86 #include <sys/sdt.h>
87 
88 #define	MAX_ITERS_SPIN	5
89 
90 typedef struct prpagev {
91 	uint_t *pg_protv;	/* vector of page permissions */
92 	char *pg_incore;	/* vector of incore flags */
93 	size_t pg_npages;	/* number of pages in protv and incore */
94 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
95 } prpagev_t;
96 
97 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
98 
99 extern struct seg_ops segdev_ops;	/* needs a header file */
100 extern struct seg_ops segspt_shmops;	/* needs a header file */
101 
102 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
103 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
104 
105 /*
106  * Choose an lwp from the complete set of lwps for the process.
107  * This is called for any operation applied to the process
108  * file descriptor that requires an lwp to operate upon.
109  *
110  * Returns a pointer to the thread for the selected LWP,
111  * and with the dispatcher lock held for the thread.
112  *
113  * The algorithm for choosing an lwp is critical for /proc semantics;
114  * don't touch this code unless you know all of the implications.
115  */
116 kthread_t *
117 prchoose(proc_t *p)
118 {
119 	kthread_t *t;
120 	kthread_t *t_onproc = NULL;	/* running on processor */
121 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
122 	kthread_t *t_sleep = NULL;	/* sleeping */
123 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
124 	kthread_t *t_susp = NULL;	/* suspended stop */
125 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
126 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
127 	kthread_t *t_req = NULL;	/* requested stop */
128 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
129 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
130 
131 	ASSERT(MUTEX_HELD(&p->p_lock));
132 
133 	/*
134 	 * If the agent lwp exists, it takes precedence over all others.
135 	 */
136 	if ((t = p->p_agenttp) != NULL) {
137 		thread_lock(t);
138 		return (t);
139 	}
140 
141 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
142 		return (t);
143 	do {		/* for eacn lwp in the process */
144 		if (VSTOPPED(t)) {	/* virtually stopped */
145 			if (t_req == NULL)
146 				t_req = t;
147 			continue;
148 		}
149 
150 		thread_lock(t);		/* make sure thread is in good state */
151 		switch (t->t_state) {
152 		default:
153 			panic("prchoose: bad thread state %d, thread 0x%p",
154 			    t->t_state, (void *)t);
155 			/*NOTREACHED*/
156 		case TS_SLEEP:
157 			/* this is filthy */
158 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
159 			    t->t_wchan0 == NULL) {
160 				if (t_hold == NULL)
161 					t_hold = t;
162 			} else {
163 				if (t_sleep == NULL)
164 					t_sleep = t;
165 			}
166 			break;
167 		case TS_RUN:
168 		case TS_WAIT:
169 			if (t_run == NULL)
170 				t_run = t;
171 			break;
172 		case TS_ONPROC:
173 			if (t_onproc == NULL)
174 				t_onproc = t;
175 			break;
176 		case TS_ZOMB:		/* last possible choice */
177 			break;
178 		case TS_STOPPED:
179 			switch (t->t_whystop) {
180 			case PR_SUSPENDED:
181 				if (t_susp == NULL)
182 					t_susp = t;
183 				break;
184 			case PR_JOBCONTROL:
185 				if (t->t_proc_flag & TP_PRSTOP) {
186 					if (t_jdstop == NULL)
187 						t_jdstop = t;
188 				} else {
189 					if (t_jstop == NULL)
190 						t_jstop = t;
191 				}
192 				break;
193 			case PR_REQUESTED:
194 				if (t->t_dtrace_stop && t_dtrace == NULL)
195 					t_dtrace = t;
196 				else if (t_req == NULL)
197 					t_req = t;
198 				break;
199 			case PR_SYSENTRY:
200 			case PR_SYSEXIT:
201 			case PR_SIGNALLED:
202 			case PR_FAULTED:
203 				/*
204 				 * Make an lwp calling exit() be the
205 				 * last lwp seen in the process.
206 				 */
207 				if (t_istop == NULL ||
208 				    (t_istop->t_whystop == PR_SYSENTRY &&
209 				    t_istop->t_whatstop == SYS_exit))
210 					t_istop = t;
211 				break;
212 			case PR_CHECKPOINT:	/* can't happen? */
213 				break;
214 			default:
215 				panic("prchoose: bad t_whystop %d, thread 0x%p",
216 				    t->t_whystop, (void *)t);
217 				/*NOTREACHED*/
218 			}
219 			break;
220 		}
221 		thread_unlock(t);
222 	} while ((t = t->t_forw) != p->p_tlist);
223 
224 	if (t_onproc)
225 		t = t_onproc;
226 	else if (t_run)
227 		t = t_run;
228 	else if (t_sleep)
229 		t = t_sleep;
230 	else if (t_jstop)
231 		t = t_jstop;
232 	else if (t_jdstop)
233 		t = t_jdstop;
234 	else if (t_istop)
235 		t = t_istop;
236 	else if (t_dtrace)
237 		t = t_dtrace;
238 	else if (t_req)
239 		t = t_req;
240 	else if (t_hold)
241 		t = t_hold;
242 	else if (t_susp)
243 		t = t_susp;
244 	else			/* TS_ZOMB */
245 		t = p->p_tlist;
246 
247 	if (t != NULL)
248 		thread_lock(t);
249 	return (t);
250 }
251 
252 /*
253  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
254  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
255  * on the /proc file descriptor.  Called from stop() when a traced
256  * process stops on an event of interest.  Also called from exit()
257  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
258  */
259 void
260 prnotify(struct vnode *vp)
261 {
262 	prcommon_t *pcp = VTOP(vp)->pr_common;
263 
264 	mutex_enter(&pcp->prc_mutex);
265 	cv_broadcast(&pcp->prc_wait);
266 	mutex_exit(&pcp->prc_mutex);
267 	if (pcp->prc_flags & PRC_POLL) {
268 		/*
269 		 * We call pollwakeup() with POLLHUP to ensure that
270 		 * the pollers are awakened even if they are polling
271 		 * for nothing (i.e., waiting for the process to exit).
272 		 * This enables the use of the PRC_POLL flag for optimization
273 		 * (we can turn off PRC_POLL only if we know no pollers remain).
274 		 */
275 		pcp->prc_flags &= ~PRC_POLL;
276 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
277 	}
278 }
279 
280 /* called immediately below, in prfree() */
281 static void
282 prfreenotify(vnode_t *vp)
283 {
284 	prnode_t *pnp;
285 	prcommon_t *pcp;
286 
287 	while (vp != NULL) {
288 		pnp = VTOP(vp);
289 		pcp = pnp->pr_common;
290 		ASSERT(pcp->prc_thread == NULL);
291 		pcp->prc_proc = NULL;
292 		/*
293 		 * We can't call prnotify() here because we are holding
294 		 * pidlock.  We assert that there is no need to.
295 		 */
296 		mutex_enter(&pcp->prc_mutex);
297 		cv_broadcast(&pcp->prc_wait);
298 		mutex_exit(&pcp->prc_mutex);
299 		ASSERT(!(pcp->prc_flags & PRC_POLL));
300 
301 		vp = pnp->pr_next;
302 		pnp->pr_next = NULL;
303 	}
304 }
305 
306 /*
307  * Called from a hook in freeproc() when a traced process is removed
308  * from the process table.  The proc-table pointers of all associated
309  * /proc vnodes are cleared to indicate that the process has gone away.
310  */
311 void
312 prfree(proc_t *p)
313 {
314 	uint_t slot = p->p_slot;
315 
316 	ASSERT(MUTEX_HELD(&pidlock));
317 
318 	/*
319 	 * Block the process against /proc so it can be freed.
320 	 * It cannot be freed while locked by some controlling process.
321 	 * Lock ordering:
322 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
323 	 */
324 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
325 	mutex_enter(&p->p_lock);
326 	while (p->p_proc_flag & P_PR_LOCK) {
327 		mutex_exit(&pr_pidlock);
328 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
329 		mutex_exit(&p->p_lock);
330 		mutex_enter(&pr_pidlock);
331 		mutex_enter(&p->p_lock);
332 	}
333 
334 	ASSERT(p->p_tlist == NULL);
335 
336 	prfreenotify(p->p_plist);
337 	p->p_plist = NULL;
338 
339 	prfreenotify(p->p_trace);
340 	p->p_trace = NULL;
341 
342 	/*
343 	 * We broadcast to wake up everyone waiting for this process.
344 	 * No one can reach this process from this point on.
345 	 */
346 	cv_broadcast(&pr_pid_cv[slot]);
347 
348 	mutex_exit(&p->p_lock);
349 	mutex_exit(&pr_pidlock);
350 }
351 
352 /*
353  * Called from a hook in exit() when a traced process is becoming a zombie.
354  */
355 void
356 prexit(proc_t *p)
357 {
358 	ASSERT(MUTEX_HELD(&p->p_lock));
359 
360 	if (pr_watch_active(p)) {
361 		pr_free_watchpoints(p);
362 		watch_disable(curthread);
363 	}
364 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
365 	if (p->p_trace) {
366 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
367 		prnotify(p->p_trace);
368 	}
369 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
370 }
371 
372 /*
373  * Called when a thread calls lwp_exit().
374  */
375 void
376 prlwpexit(kthread_t *t)
377 {
378 	vnode_t *vp;
379 	prnode_t *pnp;
380 	prcommon_t *pcp;
381 	proc_t *p = ttoproc(t);
382 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
383 
384 	ASSERT(t == curthread);
385 	ASSERT(MUTEX_HELD(&p->p_lock));
386 
387 	/*
388 	 * The process must be blocked against /proc to do this safely.
389 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
390 	 * It is the caller's responsibility to have called prbarrier(p).
391 	 */
392 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
393 
394 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
395 		pnp = VTOP(vp);
396 		pcp = pnp->pr_common;
397 		if (pcp->prc_thread == t) {
398 			pcp->prc_thread = NULL;
399 			pcp->prc_flags |= PRC_DESTROY;
400 		}
401 	}
402 
403 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
404 		pnp = VTOP(vp);
405 		pcp = pnp->pr_common;
406 		pcp->prc_thread = NULL;
407 		pcp->prc_flags |= PRC_DESTROY;
408 		prnotify(vp);
409 	}
410 
411 	if (p->p_trace)
412 		prnotify(p->p_trace);
413 }
414 
415 /*
416  * Called when a zombie thread is joined or when a
417  * detached lwp exits.  Called from lwp_hash_out().
418  */
419 void
420 prlwpfree(proc_t *p, lwpent_t *lep)
421 {
422 	vnode_t *vp;
423 	prnode_t *pnp;
424 	prcommon_t *pcp;
425 
426 	ASSERT(MUTEX_HELD(&p->p_lock));
427 
428 	/*
429 	 * The process must be blocked against /proc to do this safely.
430 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
431 	 * It is the caller's responsibility to have called prbarrier(p).
432 	 */
433 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
434 
435 	vp = lep->le_trace;
436 	lep->le_trace = NULL;
437 	while (vp) {
438 		prnotify(vp);
439 		pnp = VTOP(vp);
440 		pcp = pnp->pr_common;
441 		ASSERT(pcp->prc_thread == NULL &&
442 		    (pcp->prc_flags & PRC_DESTROY));
443 		pcp->prc_tslot = -1;
444 		vp = pnp->pr_next;
445 		pnp->pr_next = NULL;
446 	}
447 
448 	if (p->p_trace)
449 		prnotify(p->p_trace);
450 }
451 
452 /*
453  * Called from a hook in exec() when a thread starts exec().
454  */
455 void
456 prexecstart(void)
457 {
458 	proc_t *p = ttoproc(curthread);
459 	klwp_t *lwp = ttolwp(curthread);
460 
461 	/*
462 	 * The P_PR_EXEC flag blocks /proc operations for
463 	 * the duration of the exec().
464 	 * We can't start exec() while the process is
465 	 * locked by /proc, so we call prbarrier().
466 	 * lwp_nostop keeps the process from being stopped
467 	 * via job control for the duration of the exec().
468 	 */
469 
470 	ASSERT(MUTEX_HELD(&p->p_lock));
471 	prbarrier(p);
472 	lwp->lwp_nostop++;
473 	p->p_proc_flag |= P_PR_EXEC;
474 }
475 
476 /*
477  * Called from a hook in exec() when a thread finishes exec().
478  * The thread may or may not have succeeded.  Some other thread
479  * may have beat it to the punch.
480  */
481 void
482 prexecend(void)
483 {
484 	proc_t *p = ttoproc(curthread);
485 	klwp_t *lwp = ttolwp(curthread);
486 	vnode_t *vp;
487 	prnode_t *pnp;
488 	prcommon_t *pcp;
489 	model_t model = p->p_model;
490 	id_t tid = curthread->t_tid;
491 	int tslot = curthread->t_dslot;
492 
493 	ASSERT(MUTEX_HELD(&p->p_lock));
494 
495 	lwp->lwp_nostop--;
496 	if (p->p_flag & SEXITLWPS) {
497 		/*
498 		 * We are on our way to exiting because some
499 		 * other thread beat us in the race to exec().
500 		 * Don't clear the P_PR_EXEC flag in this case.
501 		 */
502 		return;
503 	}
504 
505 	/*
506 	 * Wake up anyone waiting in /proc for the process to complete exec().
507 	 */
508 	p->p_proc_flag &= ~P_PR_EXEC;
509 	if ((vp = p->p_trace) != NULL) {
510 		pcp = VTOP(vp)->pr_common;
511 		mutex_enter(&pcp->prc_mutex);
512 		cv_broadcast(&pcp->prc_wait);
513 		mutex_exit(&pcp->prc_mutex);
514 		for (; vp != NULL; vp = pnp->pr_next) {
515 			pnp = VTOP(vp);
516 			pnp->pr_common->prc_datamodel = model;
517 		}
518 	}
519 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
520 		/*
521 		 * We dealt with the process common above.
522 		 */
523 		ASSERT(p->p_trace != NULL);
524 		pcp = VTOP(vp)->pr_common;
525 		mutex_enter(&pcp->prc_mutex);
526 		cv_broadcast(&pcp->prc_wait);
527 		mutex_exit(&pcp->prc_mutex);
528 		for (; vp != NULL; vp = pnp->pr_next) {
529 			pnp = VTOP(vp);
530 			pcp = pnp->pr_common;
531 			pcp->prc_datamodel = model;
532 			pcp->prc_tid = tid;
533 			pcp->prc_tslot = tslot;
534 		}
535 	}
536 }
537 
538 /*
539  * Called from a hook in relvm() just before freeing the address space.
540  * We free all the watched areas now.
541  */
542 void
543 prrelvm(void)
544 {
545 	proc_t *p = ttoproc(curthread);
546 
547 	mutex_enter(&p->p_lock);
548 	prbarrier(p);	/* block all other /proc operations */
549 	if (pr_watch_active(p)) {
550 		pr_free_watchpoints(p);
551 		watch_disable(curthread);
552 	}
553 	mutex_exit(&p->p_lock);
554 	pr_free_watched_pages(p);
555 }
556 
557 /*
558  * Called from hooks in exec-related code when a traced process
559  * attempts to exec(2) a setuid/setgid program or an unreadable
560  * file.  Rather than fail the exec we invalidate the associated
561  * /proc vnodes so that subsequent attempts to use them will fail.
562  *
563  * All /proc vnodes, except directory vnodes, are retained on a linked
564  * list (rooted at p_plist in the process structure) until last close.
565  *
566  * A controlling process must re-open the /proc files in order to
567  * regain control.
568  */
569 void
570 prinvalidate(struct user *up)
571 {
572 	kthread_t *t = curthread;
573 	proc_t *p = ttoproc(t);
574 	vnode_t *vp;
575 	prnode_t *pnp;
576 	int writers = 0;
577 
578 	mutex_enter(&p->p_lock);
579 	prbarrier(p);	/* block all other /proc operations */
580 
581 	/*
582 	 * At this moment, there can be only one lwp in the process.
583 	 */
584 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
585 
586 	/*
587 	 * Invalidate any currently active /proc vnodes.
588 	 */
589 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
590 		pnp = VTOP(vp);
591 		switch (pnp->pr_type) {
592 		case PR_PSINFO:		/* these files can read by anyone */
593 		case PR_LPSINFO:
594 		case PR_LWPSINFO:
595 		case PR_LWPDIR:
596 		case PR_LWPIDDIR:
597 		case PR_USAGE:
598 		case PR_LUSAGE:
599 		case PR_LWPUSAGE:
600 			break;
601 		default:
602 			pnp->pr_flags |= PR_INVAL;
603 			break;
604 		}
605 	}
606 	/*
607 	 * Wake up anyone waiting for the process or lwp.
608 	 * p->p_trace is guaranteed to be non-NULL if there
609 	 * are any open /proc files for this process.
610 	 */
611 	if ((vp = p->p_trace) != NULL) {
612 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
613 
614 		prnotify(vp);
615 		/*
616 		 * Are there any writers?
617 		 */
618 		if ((writers = pcp->prc_writers) != 0) {
619 			/*
620 			 * Clear the exclusive open flag (old /proc interface).
621 			 * Set prc_selfopens equal to prc_writers so that
622 			 * the next O_EXCL|O_WRITE open will succeed
623 			 * even with existing (though invalid) writers.
624 			 * prclose() must decrement prc_selfopens when
625 			 * the invalid files are closed.
626 			 */
627 			pcp->prc_flags &= ~PRC_EXCL;
628 			ASSERT(pcp->prc_selfopens <= writers);
629 			pcp->prc_selfopens = writers;
630 		}
631 	}
632 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
633 	while (vp != NULL) {
634 		/*
635 		 * We should not invalidate the lwpiddir vnodes,
636 		 * but the necessities of maintaining the old
637 		 * ioctl()-based version of /proc require it.
638 		 */
639 		pnp = VTOP(vp);
640 		pnp->pr_flags |= PR_INVAL;
641 		prnotify(vp);
642 		vp = pnp->pr_next;
643 	}
644 
645 	/*
646 	 * If any tracing flags are in effect and any vnodes are open for
647 	 * writing then set the requested-stop and run-on-last-close flags.
648 	 * Otherwise, clear all tracing flags.
649 	 */
650 	t->t_proc_flag &= ~TP_PAUSE;
651 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
652 		t->t_proc_flag |= TP_PRSTOP;
653 		aston(t);		/* so ISSIG will see the flag */
654 		p->p_proc_flag |= P_PR_RUNLCL;
655 	} else {
656 		premptyset(&up->u_entrymask);		/* syscalls */
657 		premptyset(&up->u_exitmask);
658 		up->u_systrap = 0;
659 		premptyset(&p->p_sigmask);		/* signals */
660 		premptyset(&p->p_fltmask);		/* faults */
661 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
662 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
663 		prnostep(ttolwp(t));
664 	}
665 
666 	mutex_exit(&p->p_lock);
667 }
668 
669 /*
670  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
671  * Return with pr_pidlock held in all cases.
672  * Return with p_lock held if the the process still exists.
673  * Return value is the process pointer if the process still exists, else NULL.
674  * If we lock the process, give ourself kernel priority to avoid deadlocks;
675  * this is undone in prunlock().
676  */
677 proc_t *
678 pr_p_lock(prnode_t *pnp)
679 {
680 	proc_t *p;
681 	prcommon_t *pcp;
682 
683 	mutex_enter(&pr_pidlock);
684 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
685 		return (NULL);
686 	mutex_enter(&p->p_lock);
687 	while (p->p_proc_flag & P_PR_LOCK) {
688 		/*
689 		 * This cv/mutex pair is persistent even if
690 		 * the process disappears while we sleep.
691 		 */
692 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
693 		kmutex_t *mp = &p->p_lock;
694 
695 		mutex_exit(&pr_pidlock);
696 		cv_wait(cv, mp);
697 		mutex_exit(mp);
698 		mutex_enter(&pr_pidlock);
699 		if (pcp->prc_proc == NULL)
700 			return (NULL);
701 		ASSERT(p == pcp->prc_proc);
702 		mutex_enter(&p->p_lock);
703 	}
704 	p->p_proc_flag |= P_PR_LOCK;
705 	THREAD_KPRI_REQUEST();
706 	return (p);
707 }
708 
709 /*
710  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
711  * This prevents any lwp of the process from disappearing and
712  * blocks most operations that a process can perform on itself.
713  * Returns 0 on success, a non-zero error number on failure.
714  *
715  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
716  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
717  *
718  * error returns:
719  *	ENOENT: process or lwp has disappeared or process is exiting
720  *		(or has become a zombie and zdisp == ZNO).
721  *	EAGAIN: procfs vnode has become invalid.
722  *	EINTR:  signal arrived while waiting for exec to complete.
723  */
724 int
725 prlock(prnode_t *pnp, int zdisp)
726 {
727 	prcommon_t *pcp;
728 	proc_t *p;
729 
730 again:
731 	pcp = pnp->pr_common;
732 	p = pr_p_lock(pnp);
733 	mutex_exit(&pr_pidlock);
734 
735 	/*
736 	 * Return ENOENT immediately if there is no process.
737 	 */
738 	if (p == NULL)
739 		return (ENOENT);
740 
741 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
742 
743 	/*
744 	 * Return ENOENT if process entered zombie state or is exiting
745 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
746 	 */
747 	if (zdisp == ZNO &&
748 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
749 		prunlock(pnp);
750 		return (ENOENT);
751 	}
752 
753 	/*
754 	 * If lwp-specific, check to see if lwp has disappeared.
755 	 */
756 	if (pcp->prc_flags & PRC_LWP) {
757 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
758 		    pcp->prc_tslot == -1) {
759 			prunlock(pnp);
760 			return (ENOENT);
761 		}
762 	}
763 
764 	/*
765 	 * Return EAGAIN if we have encountered a security violation.
766 	 * (The process exec'd a set-id or unreadable executable file.)
767 	 */
768 	if (pnp->pr_flags & PR_INVAL) {
769 		prunlock(pnp);
770 		return (EAGAIN);
771 	}
772 
773 	/*
774 	 * If process is undergoing an exec(), wait for
775 	 * completion and then start all over again.
776 	 */
777 	if (p->p_proc_flag & P_PR_EXEC) {
778 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
779 		mutex_enter(&pcp->prc_mutex);
780 		prunlock(pnp);
781 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
782 			mutex_exit(&pcp->prc_mutex);
783 			return (EINTR);
784 		}
785 		mutex_exit(&pcp->prc_mutex);
786 		goto again;
787 	}
788 
789 	/*
790 	 * We return holding p->p_lock.
791 	 */
792 	return (0);
793 }
794 
795 /*
796  * Undo prlock() and pr_p_lock().
797  * p->p_lock is still held; pr_pidlock is no longer held.
798  *
799  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
800  * if any, waiting for the flag to be dropped; it retains p->p_lock.
801  *
802  * prunlock() calls prunmark() and then drops p->p_lock.
803  */
804 void
805 prunmark(proc_t *p)
806 {
807 	ASSERT(p->p_proc_flag & P_PR_LOCK);
808 	ASSERT(MUTEX_HELD(&p->p_lock));
809 
810 	cv_signal(&pr_pid_cv[p->p_slot]);
811 	p->p_proc_flag &= ~P_PR_LOCK;
812 	THREAD_KPRI_RELEASE();
813 }
814 
815 void
816 prunlock(prnode_t *pnp)
817 {
818 	prcommon_t *pcp = pnp->pr_common;
819 	proc_t *p = pcp->prc_proc;
820 
821 	/*
822 	 * If we (or someone) gave it a SIGKILL, and it is not
823 	 * already a zombie, set it running unconditionally.
824 	 */
825 	if ((p->p_flag & SKILLED) &&
826 	    !(p->p_flag & SEXITING) &&
827 	    !(pcp->prc_flags & PRC_DESTROY) &&
828 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
829 		(void) pr_setrun(pnp, 0);
830 	prunmark(p);
831 	mutex_exit(&p->p_lock);
832 }
833 
834 /*
835  * Called while holding p->p_lock to delay until the process is unlocked.
836  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
837  * The process cannot become locked again until p->p_lock is dropped.
838  */
839 void
840 prbarrier(proc_t *p)
841 {
842 	ASSERT(MUTEX_HELD(&p->p_lock));
843 
844 	if (p->p_proc_flag & P_PR_LOCK) {
845 		/* The process is locked; delay until not locked */
846 		uint_t slot = p->p_slot;
847 
848 		while (p->p_proc_flag & P_PR_LOCK)
849 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
850 		cv_signal(&pr_pid_cv[slot]);
851 	}
852 }
853 
854 /*
855  * Return process/lwp status.
856  * The u-block is mapped in by this routine and unmapped at the end.
857  */
858 void
859 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
860 {
861 	kthread_t *t;
862 
863 	ASSERT(MUTEX_HELD(&p->p_lock));
864 
865 	t = prchoose(p);	/* returns locked thread */
866 	ASSERT(t != NULL);
867 	thread_unlock(t);
868 
869 	/* just bzero the process part, prgetlwpstatus() does the rest */
870 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
871 	sp->pr_nlwp = p->p_lwpcnt;
872 	sp->pr_nzomb = p->p_zombcnt;
873 	prassignset(&sp->pr_sigpend, &p->p_sig);
874 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
875 	sp->pr_brksize = p->p_brksize;
876 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
877 	sp->pr_stksize = p->p_stksize;
878 	sp->pr_pid = p->p_pid;
879 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
880 	    (p->p_flag & SZONETOP)) {
881 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
882 		/*
883 		 * Inside local zones, fake zsched's pid as parent pids for
884 		 * processes which reference processes outside of the zone.
885 		 */
886 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
887 	} else {
888 		sp->pr_ppid = p->p_ppid;
889 	}
890 	sp->pr_pgid  = p->p_pgrp;
891 	sp->pr_sid   = p->p_sessp->s_sid;
892 	sp->pr_taskid = p->p_task->tk_tkid;
893 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
894 	sp->pr_zoneid = p->p_zone->zone_id;
895 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
896 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
897 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
898 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
899 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
900 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
901 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
902 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
903 	switch (p->p_model) {
904 	case DATAMODEL_ILP32:
905 		sp->pr_dmodel = PR_MODEL_ILP32;
906 		break;
907 	case DATAMODEL_LP64:
908 		sp->pr_dmodel = PR_MODEL_LP64;
909 		break;
910 	}
911 	if (p->p_agenttp)
912 		sp->pr_agentid = p->p_agenttp->t_tid;
913 
914 	/* get the chosen lwp's status */
915 	prgetlwpstatus(t, &sp->pr_lwp, zp);
916 
917 	/* replicate the flags */
918 	sp->pr_flags = sp->pr_lwp.pr_flags;
919 }
920 
921 #ifdef _SYSCALL32_IMPL
922 void
923 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
924 {
925 	proc_t *p = ttoproc(t);
926 	klwp_t *lwp = ttolwp(t);
927 	struct mstate *ms = &lwp->lwp_mstate;
928 	hrtime_t usr, sys;
929 	int flags;
930 	ulong_t instr;
931 
932 	ASSERT(MUTEX_HELD(&p->p_lock));
933 
934 	bzero(sp, sizeof (*sp));
935 	flags = 0L;
936 	if (t->t_state == TS_STOPPED) {
937 		flags |= PR_STOPPED;
938 		if ((t->t_schedflag & TS_PSTART) == 0)
939 			flags |= PR_ISTOP;
940 	} else if (VSTOPPED(t)) {
941 		flags |= PR_STOPPED|PR_ISTOP;
942 	}
943 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
944 		flags |= PR_DSTOP;
945 	if (lwp->lwp_asleep)
946 		flags |= PR_ASLEEP;
947 	if (t == p->p_agenttp)
948 		flags |= PR_AGENT;
949 	if (!(t->t_proc_flag & TP_TWAIT))
950 		flags |= PR_DETACH;
951 	if (t->t_proc_flag & TP_DAEMON)
952 		flags |= PR_DAEMON;
953 	if (p->p_proc_flag & P_PR_FORK)
954 		flags |= PR_FORK;
955 	if (p->p_proc_flag & P_PR_RUNLCL)
956 		flags |= PR_RLC;
957 	if (p->p_proc_flag & P_PR_KILLCL)
958 		flags |= PR_KLC;
959 	if (p->p_proc_flag & P_PR_ASYNC)
960 		flags |= PR_ASYNC;
961 	if (p->p_proc_flag & P_PR_BPTADJ)
962 		flags |= PR_BPTADJ;
963 	if (p->p_proc_flag & P_PR_PTRACE)
964 		flags |= PR_PTRACE;
965 	if (p->p_flag & SMSACCT)
966 		flags |= PR_MSACCT;
967 	if (p->p_flag & SMSFORK)
968 		flags |= PR_MSFORK;
969 	if (p->p_flag & SVFWAIT)
970 		flags |= PR_VFORKP;
971 	sp->pr_flags = flags;
972 	if (VSTOPPED(t)) {
973 		sp->pr_why   = PR_REQUESTED;
974 		sp->pr_what  = 0;
975 	} else {
976 		sp->pr_why   = t->t_whystop;
977 		sp->pr_what  = t->t_whatstop;
978 	}
979 	sp->pr_lwpid = t->t_tid;
980 	sp->pr_cursig  = lwp->lwp_cursig;
981 	prassignset(&sp->pr_lwppend, &t->t_sig);
982 	schedctl_finish_sigblock(t);
983 	prassignset(&sp->pr_lwphold, &t->t_hold);
984 	if (t->t_whystop == PR_FAULTED) {
985 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
986 		if (t->t_whatstop == FLTPAGE)
987 			sp->pr_info.si_addr =
988 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
989 	} else if (lwp->lwp_curinfo)
990 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
991 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
992 	    sp->pr_info.si_zoneid != zp->zone_id) {
993 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
994 		sp->pr_info.si_uid = 0;
995 		sp->pr_info.si_ctid = -1;
996 		sp->pr_info.si_zoneid = zp->zone_id;
997 	}
998 	sp->pr_altstack.ss_sp =
999 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1000 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1001 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1002 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1003 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1004 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1005 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1006 	    sizeof (sp->pr_clname) - 1);
1007 	if (flags & PR_STOPPED)
1008 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1009 	usr = ms->ms_acct[LMS_USER];
1010 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1011 	scalehrtime(&usr);
1012 	scalehrtime(&sys);
1013 	hrt2ts32(usr, &sp->pr_utime);
1014 	hrt2ts32(sys, &sp->pr_stime);
1015 
1016 	/*
1017 	 * Fetch the current instruction, if not a system process.
1018 	 * We don't attempt this unless the lwp is stopped.
1019 	 */
1020 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1021 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1022 	else if (!(flags & PR_STOPPED))
1023 		sp->pr_flags |= PR_PCINVAL;
1024 	else if (!prfetchinstr(lwp, &instr))
1025 		sp->pr_flags |= PR_PCINVAL;
1026 	else
1027 		sp->pr_instr = (uint32_t)instr;
1028 
1029 	/*
1030 	 * Drop p_lock while touching the lwp's stack.
1031 	 */
1032 	mutex_exit(&p->p_lock);
1033 	if (prisstep(lwp))
1034 		sp->pr_flags |= PR_STEP;
1035 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1036 		int i;
1037 
1038 		sp->pr_syscall = get_syscall32_args(lwp,
1039 		    (int *)sp->pr_sysarg, &i);
1040 		sp->pr_nsysarg = (ushort_t)i;
1041 	}
1042 	if ((flags & PR_STOPPED) || t == curthread)
1043 		prgetprregs32(lwp, sp->pr_reg);
1044 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1045 	    (flags & PR_VFORKP)) {
1046 		long r1, r2;
1047 		user_t *up;
1048 		auxv_t *auxp;
1049 		int i;
1050 
1051 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1052 		if (sp->pr_errno == 0) {
1053 			sp->pr_rval1 = (int32_t)r1;
1054 			sp->pr_rval2 = (int32_t)r2;
1055 			sp->pr_errpriv = PRIV_NONE;
1056 		} else
1057 			sp->pr_errpriv = lwp->lwp_badpriv;
1058 
1059 		if (t->t_sysnum == SYS_execve) {
1060 			up = PTOU(p);
1061 			sp->pr_sysarg[0] = 0;
1062 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1063 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1064 			for (i = 0, auxp = up->u_auxv;
1065 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1066 			    i++, auxp++) {
1067 				if (auxp->a_type == AT_SUN_EXECNAME) {
1068 					sp->pr_sysarg[0] =
1069 					    (caddr32_t)
1070 					    (uintptr_t)auxp->a_un.a_ptr;
1071 					break;
1072 				}
1073 			}
1074 		}
1075 	}
1076 	if (prhasfp())
1077 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1078 	mutex_enter(&p->p_lock);
1079 }
1080 
1081 void
1082 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1083 {
1084 	kthread_t *t;
1085 
1086 	ASSERT(MUTEX_HELD(&p->p_lock));
1087 
1088 	t = prchoose(p);	/* returns locked thread */
1089 	ASSERT(t != NULL);
1090 	thread_unlock(t);
1091 
1092 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1093 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1094 	sp->pr_nlwp = p->p_lwpcnt;
1095 	sp->pr_nzomb = p->p_zombcnt;
1096 	prassignset(&sp->pr_sigpend, &p->p_sig);
1097 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1098 	sp->pr_brksize = (uint32_t)p->p_brksize;
1099 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1100 	sp->pr_stksize = (uint32_t)p->p_stksize;
1101 	sp->pr_pid   = p->p_pid;
1102 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1103 	    (p->p_flag & SZONETOP)) {
1104 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1105 		/*
1106 		 * Inside local zones, fake zsched's pid as parent pids for
1107 		 * processes which reference processes outside of the zone.
1108 		 */
1109 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1110 	} else {
1111 		sp->pr_ppid = p->p_ppid;
1112 	}
1113 	sp->pr_pgid  = p->p_pgrp;
1114 	sp->pr_sid   = p->p_sessp->s_sid;
1115 	sp->pr_taskid = p->p_task->tk_tkid;
1116 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1117 	sp->pr_zoneid = p->p_zone->zone_id;
1118 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1119 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1120 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1121 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1122 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1123 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1124 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1125 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1126 	switch (p->p_model) {
1127 	case DATAMODEL_ILP32:
1128 		sp->pr_dmodel = PR_MODEL_ILP32;
1129 		break;
1130 	case DATAMODEL_LP64:
1131 		sp->pr_dmodel = PR_MODEL_LP64;
1132 		break;
1133 	}
1134 	if (p->p_agenttp)
1135 		sp->pr_agentid = p->p_agenttp->t_tid;
1136 
1137 	/* get the chosen lwp's status */
1138 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1139 
1140 	/* replicate the flags */
1141 	sp->pr_flags = sp->pr_lwp.pr_flags;
1142 }
1143 #endif	/* _SYSCALL32_IMPL */
1144 
1145 /*
1146  * Return lwp status.
1147  */
1148 void
1149 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1150 {
1151 	proc_t *p = ttoproc(t);
1152 	klwp_t *lwp = ttolwp(t);
1153 	struct mstate *ms = &lwp->lwp_mstate;
1154 	hrtime_t usr, sys;
1155 	int flags;
1156 	ulong_t instr;
1157 
1158 	ASSERT(MUTEX_HELD(&p->p_lock));
1159 
1160 	bzero(sp, sizeof (*sp));
1161 	flags = 0L;
1162 	if (t->t_state == TS_STOPPED) {
1163 		flags |= PR_STOPPED;
1164 		if ((t->t_schedflag & TS_PSTART) == 0)
1165 			flags |= PR_ISTOP;
1166 	} else if (VSTOPPED(t)) {
1167 		flags |= PR_STOPPED|PR_ISTOP;
1168 	}
1169 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1170 		flags |= PR_DSTOP;
1171 	if (lwp->lwp_asleep)
1172 		flags |= PR_ASLEEP;
1173 	if (t == p->p_agenttp)
1174 		flags |= PR_AGENT;
1175 	if (!(t->t_proc_flag & TP_TWAIT))
1176 		flags |= PR_DETACH;
1177 	if (t->t_proc_flag & TP_DAEMON)
1178 		flags |= PR_DAEMON;
1179 	if (p->p_proc_flag & P_PR_FORK)
1180 		flags |= PR_FORK;
1181 	if (p->p_proc_flag & P_PR_RUNLCL)
1182 		flags |= PR_RLC;
1183 	if (p->p_proc_flag & P_PR_KILLCL)
1184 		flags |= PR_KLC;
1185 	if (p->p_proc_flag & P_PR_ASYNC)
1186 		flags |= PR_ASYNC;
1187 	if (p->p_proc_flag & P_PR_BPTADJ)
1188 		flags |= PR_BPTADJ;
1189 	if (p->p_proc_flag & P_PR_PTRACE)
1190 		flags |= PR_PTRACE;
1191 	if (p->p_flag & SMSACCT)
1192 		flags |= PR_MSACCT;
1193 	if (p->p_flag & SMSFORK)
1194 		flags |= PR_MSFORK;
1195 	if (p->p_flag & SVFWAIT)
1196 		flags |= PR_VFORKP;
1197 	if (p->p_pgidp->pid_pgorphaned)
1198 		flags |= PR_ORPHAN;
1199 	if (p->p_pidflag & CLDNOSIGCHLD)
1200 		flags |= PR_NOSIGCHLD;
1201 	if (p->p_pidflag & CLDWAITPID)
1202 		flags |= PR_WAITPID;
1203 	sp->pr_flags = flags;
1204 	if (VSTOPPED(t)) {
1205 		sp->pr_why   = PR_REQUESTED;
1206 		sp->pr_what  = 0;
1207 	} else {
1208 		sp->pr_why   = t->t_whystop;
1209 		sp->pr_what  = t->t_whatstop;
1210 	}
1211 	sp->pr_lwpid = t->t_tid;
1212 	sp->pr_cursig  = lwp->lwp_cursig;
1213 	prassignset(&sp->pr_lwppend, &t->t_sig);
1214 	schedctl_finish_sigblock(t);
1215 	prassignset(&sp->pr_lwphold, &t->t_hold);
1216 	if (t->t_whystop == PR_FAULTED)
1217 		bcopy(&lwp->lwp_siginfo,
1218 		    &sp->pr_info, sizeof (k_siginfo_t));
1219 	else if (lwp->lwp_curinfo)
1220 		bcopy(&lwp->lwp_curinfo->sq_info,
1221 		    &sp->pr_info, sizeof (k_siginfo_t));
1222 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1223 	    sp->pr_info.si_zoneid != zp->zone_id) {
1224 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1225 		sp->pr_info.si_uid = 0;
1226 		sp->pr_info.si_ctid = -1;
1227 		sp->pr_info.si_zoneid = zp->zone_id;
1228 	}
1229 	sp->pr_altstack = lwp->lwp_sigaltstack;
1230 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1231 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1232 	sp->pr_ustack = lwp->lwp_ustack;
1233 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1234 	    sizeof (sp->pr_clname) - 1);
1235 	if (flags & PR_STOPPED)
1236 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1237 	usr = ms->ms_acct[LMS_USER];
1238 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1239 	scalehrtime(&usr);
1240 	scalehrtime(&sys);
1241 	hrt2ts(usr, &sp->pr_utime);
1242 	hrt2ts(sys, &sp->pr_stime);
1243 
1244 	/*
1245 	 * Fetch the current instruction, if not a system process.
1246 	 * We don't attempt this unless the lwp is stopped.
1247 	 */
1248 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1249 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1250 	else if (!(flags & PR_STOPPED))
1251 		sp->pr_flags |= PR_PCINVAL;
1252 	else if (!prfetchinstr(lwp, &instr))
1253 		sp->pr_flags |= PR_PCINVAL;
1254 	else
1255 		sp->pr_instr = instr;
1256 
1257 	/*
1258 	 * Drop p_lock while touching the lwp's stack.
1259 	 */
1260 	mutex_exit(&p->p_lock);
1261 	if (prisstep(lwp))
1262 		sp->pr_flags |= PR_STEP;
1263 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1264 		int i;
1265 
1266 		sp->pr_syscall = get_syscall_args(lwp,
1267 		    (long *)sp->pr_sysarg, &i);
1268 		sp->pr_nsysarg = (ushort_t)i;
1269 	}
1270 	if ((flags & PR_STOPPED) || t == curthread)
1271 		prgetprregs(lwp, sp->pr_reg);
1272 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1273 	    (flags & PR_VFORKP)) {
1274 		user_t *up;
1275 		auxv_t *auxp;
1276 		int i;
1277 
1278 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1279 		if (sp->pr_errno == 0)
1280 			sp->pr_errpriv = PRIV_NONE;
1281 		else
1282 			sp->pr_errpriv = lwp->lwp_badpriv;
1283 
1284 		if (t->t_sysnum == SYS_execve) {
1285 			up = PTOU(p);
1286 			sp->pr_sysarg[0] = 0;
1287 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1288 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1289 			for (i = 0, auxp = up->u_auxv;
1290 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1291 			    i++, auxp++) {
1292 				if (auxp->a_type == AT_SUN_EXECNAME) {
1293 					sp->pr_sysarg[0] =
1294 					    (uintptr_t)auxp->a_un.a_ptr;
1295 					break;
1296 				}
1297 			}
1298 		}
1299 	}
1300 	if (prhasfp())
1301 		prgetprfpregs(lwp, &sp->pr_fpreg);
1302 	mutex_enter(&p->p_lock);
1303 }
1304 
1305 /*
1306  * Get the sigaction structure for the specified signal.  The u-block
1307  * must already have been mapped in by the caller.
1308  */
1309 void
1310 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1311 {
1312 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1313 
1314 	bzero(sp, sizeof (*sp));
1315 
1316 	if (sig != 0 && (unsigned)sig < nsig) {
1317 		sp->sa_handler = up->u_signal[sig-1];
1318 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1319 		if (sigismember(&up->u_sigonstack, sig))
1320 			sp->sa_flags |= SA_ONSTACK;
1321 		if (sigismember(&up->u_sigresethand, sig))
1322 			sp->sa_flags |= SA_RESETHAND;
1323 		if (sigismember(&up->u_sigrestart, sig))
1324 			sp->sa_flags |= SA_RESTART;
1325 		if (sigismember(&p->p_siginfo, sig))
1326 			sp->sa_flags |= SA_SIGINFO;
1327 		if (sigismember(&up->u_signodefer, sig))
1328 			sp->sa_flags |= SA_NODEFER;
1329 		if (sig == SIGCLD) {
1330 			if (p->p_flag & SNOWAIT)
1331 				sp->sa_flags |= SA_NOCLDWAIT;
1332 			if ((p->p_flag & SJCTL) == 0)
1333 				sp->sa_flags |= SA_NOCLDSTOP;
1334 		}
1335 	}
1336 }
1337 
1338 #ifdef _SYSCALL32_IMPL
1339 void
1340 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1341 {
1342 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1343 
1344 	bzero(sp, sizeof (*sp));
1345 
1346 	if (sig != 0 && (unsigned)sig < nsig) {
1347 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1348 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1349 		if (sigismember(&up->u_sigonstack, sig))
1350 			sp->sa_flags |= SA_ONSTACK;
1351 		if (sigismember(&up->u_sigresethand, sig))
1352 			sp->sa_flags |= SA_RESETHAND;
1353 		if (sigismember(&up->u_sigrestart, sig))
1354 			sp->sa_flags |= SA_RESTART;
1355 		if (sigismember(&p->p_siginfo, sig))
1356 			sp->sa_flags |= SA_SIGINFO;
1357 		if (sigismember(&up->u_signodefer, sig))
1358 			sp->sa_flags |= SA_NODEFER;
1359 		if (sig == SIGCLD) {
1360 			if (p->p_flag & SNOWAIT)
1361 				sp->sa_flags |= SA_NOCLDWAIT;
1362 			if ((p->p_flag & SJCTL) == 0)
1363 				sp->sa_flags |= SA_NOCLDSTOP;
1364 		}
1365 	}
1366 }
1367 #endif	/* _SYSCALL32_IMPL */
1368 
1369 /*
1370  * Count the number of segments in this process's address space.
1371  */
1372 int
1373 prnsegs(struct as *as, int reserved)
1374 {
1375 	int n = 0;
1376 	struct seg *seg;
1377 
1378 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1379 
1380 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1381 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1382 		caddr_t saddr, naddr;
1383 		void *tmp = NULL;
1384 
1385 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1386 			(void) pr_getprot(seg, reserved, &tmp,
1387 			    &saddr, &naddr, eaddr);
1388 			if (saddr != naddr)
1389 				n++;
1390 		}
1391 
1392 		ASSERT(tmp == NULL);
1393 	}
1394 
1395 	return (n);
1396 }
1397 
1398 /*
1399  * Convert uint32_t to decimal string w/o leading zeros.
1400  * Add trailing null characters if 'len' is greater than string length.
1401  * Return the string length.
1402  */
1403 int
1404 pr_u32tos(uint32_t n, char *s, int len)
1405 {
1406 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1407 	char *cp = cbuf;
1408 	char *end = s + len;
1409 
1410 	do {
1411 		*cp++ = (char)(n % 10 + '0');
1412 		n /= 10;
1413 	} while (n);
1414 
1415 	len = (int)(cp - cbuf);
1416 
1417 	do {
1418 		*s++ = *--cp;
1419 	} while (cp > cbuf);
1420 
1421 	while (s < end)		/* optional pad */
1422 		*s++ = '\0';
1423 
1424 	return (len);
1425 }
1426 
1427 /*
1428  * Convert uint64_t to decimal string w/o leading zeros.
1429  * Return the string length.
1430  */
1431 static int
1432 pr_u64tos(uint64_t n, char *s)
1433 {
1434 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1435 	char *cp = cbuf;
1436 	int len;
1437 
1438 	do {
1439 		*cp++ = (char)(n % 10 + '0');
1440 		n /= 10;
1441 	} while (n);
1442 
1443 	len = (int)(cp - cbuf);
1444 
1445 	do {
1446 		*s++ = *--cp;
1447 	} while (cp > cbuf);
1448 
1449 	return (len);
1450 }
1451 
1452 void
1453 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1454 {
1455 	char *s = name;
1456 	struct vfs *vfsp;
1457 	struct vfssw *vfsswp;
1458 
1459 	if ((vfsp = vp->v_vfsp) != NULL &&
1460 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1461 	    *vfsswp->vsw_name) {
1462 		(void) strcpy(s, vfsswp->vsw_name);
1463 		s += strlen(s);
1464 		*s++ = '.';
1465 	}
1466 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1467 	*s++ = '.';
1468 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1469 	*s++ = '.';
1470 	s += pr_u64tos(vattr->va_nodeid, s);
1471 	*s++ = '\0';
1472 }
1473 
1474 struct seg *
1475 break_seg(proc_t *p)
1476 {
1477 	caddr_t addr = p->p_brkbase;
1478 	struct seg *seg;
1479 	struct vnode *vp;
1480 
1481 	if (p->p_brksize != 0)
1482 		addr += p->p_brksize - 1;
1483 	seg = as_segat(p->p_as, addr);
1484 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1485 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1486 		return (seg);
1487 	return (NULL);
1488 }
1489 
1490 /*
1491  * Implementation of service functions to handle procfs generic chained
1492  * copyout buffers.
1493  */
1494 typedef struct pr_iobuf_list {
1495 	list_node_t	piol_link;	/* buffer linkage */
1496 	size_t		piol_size;	/* total size (header + data) */
1497 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1498 } piol_t;
1499 
1500 #define	MAPSIZE	(64 * 1024)
1501 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1502 
1503 void
1504 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1505 {
1506 	piol_t	*iol;
1507 	size_t	initial_size = MIN(1, n) * itemsize;
1508 
1509 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1510 
1511 	ASSERT(list_head(iolhead) == NULL);
1512 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1513 	ASSERT(initial_size > 0);
1514 
1515 	/*
1516 	 * Someone creating chained copyout buffers may ask for less than
1517 	 * MAPSIZE if the amount of data to be buffered is known to be
1518 	 * smaller than that.
1519 	 * But in order to prevent involuntary self-denial of service,
1520 	 * the requested input size is clamped at MAPSIZE.
1521 	 */
1522 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1523 	iol = kmem_alloc(initial_size, KM_SLEEP);
1524 	list_insert_head(iolhead, iol);
1525 	iol->piol_usedsize = 0;
1526 	iol->piol_size = initial_size;
1527 }
1528 
1529 void *
1530 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1531 {
1532 	piol_t	*iol;
1533 	char	*new;
1534 
1535 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1536 	ASSERT(list_head(iolhead) != NULL);
1537 
1538 	iol = (piol_t *)list_tail(iolhead);
1539 
1540 	if (iol->piol_size <
1541 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1542 		/*
1543 		 * Out of space in the current buffer. Allocate more.
1544 		 */
1545 		piol_t *newiol;
1546 
1547 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1548 		newiol->piol_size = MAPSIZE;
1549 		newiol->piol_usedsize = 0;
1550 
1551 		list_insert_after(iolhead, iol, newiol);
1552 		iol = list_next(iolhead, iol);
1553 		ASSERT(iol == newiol);
1554 	}
1555 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1556 	iol->piol_usedsize += itemsize;
1557 	bzero(new, itemsize);
1558 	return (new);
1559 }
1560 
1561 int
1562 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1563 {
1564 	int error = errin;
1565 	piol_t	*iol;
1566 
1567 	while ((iol = list_head(iolhead)) != NULL) {
1568 		list_remove(iolhead, iol);
1569 		if (!error) {
1570 			if (copyout(PIOL_DATABUF(iol), *tgt,
1571 			    iol->piol_usedsize))
1572 				error = EFAULT;
1573 			*tgt += iol->piol_usedsize;
1574 		}
1575 		kmem_free(iol, iol->piol_size);
1576 	}
1577 	list_destroy(iolhead);
1578 
1579 	return (error);
1580 }
1581 
1582 int
1583 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1584 {
1585 	offset_t	off = uiop->uio_offset;
1586 	char		*base;
1587 	size_t		size;
1588 	piol_t		*iol;
1589 	int		error = errin;
1590 
1591 	while ((iol = list_head(iolhead)) != NULL) {
1592 		list_remove(iolhead, iol);
1593 		base = PIOL_DATABUF(iol);
1594 		size = iol->piol_usedsize;
1595 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1596 			error = uiomove(base + off, size - off,
1597 			    UIO_READ, uiop);
1598 		off = MAX(0, off - (offset_t)size);
1599 		kmem_free(iol, iol->piol_size);
1600 	}
1601 	list_destroy(iolhead);
1602 
1603 	return (error);
1604 }
1605 
1606 /*
1607  * Return an array of structures with memory map information.
1608  * We allocate here; the caller must deallocate.
1609  */
1610 int
1611 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1612 {
1613 	struct as *as = p->p_as;
1614 	prmap_t *mp;
1615 	struct seg *seg;
1616 	struct seg *brkseg, *stkseg;
1617 	struct vnode *vp;
1618 	struct vattr vattr;
1619 	uint_t prot;
1620 
1621 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1622 
1623 	/*
1624 	 * Request an initial buffer size that doesn't waste memory
1625 	 * if the address space has only a small number of segments.
1626 	 */
1627 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1628 
1629 	if ((seg = AS_SEGFIRST(as)) == NULL)
1630 		return (0);
1631 
1632 	brkseg = break_seg(p);
1633 	stkseg = as_segat(as, prgetstackbase(p));
1634 
1635 	do {
1636 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1637 		caddr_t saddr, naddr;
1638 		void *tmp = NULL;
1639 
1640 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1641 			prot = pr_getprot(seg, reserved, &tmp,
1642 			    &saddr, &naddr, eaddr);
1643 			if (saddr == naddr)
1644 				continue;
1645 
1646 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1647 
1648 			mp->pr_vaddr = (uintptr_t)saddr;
1649 			mp->pr_size = naddr - saddr;
1650 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1651 			mp->pr_mflags = 0;
1652 			if (prot & PROT_READ)
1653 				mp->pr_mflags |= MA_READ;
1654 			if (prot & PROT_WRITE)
1655 				mp->pr_mflags |= MA_WRITE;
1656 			if (prot & PROT_EXEC)
1657 				mp->pr_mflags |= MA_EXEC;
1658 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1659 				mp->pr_mflags |= MA_SHARED;
1660 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1661 				mp->pr_mflags |= MA_NORESERVE;
1662 			if (seg->s_ops == &segspt_shmops ||
1663 			    (seg->s_ops == &segvn_ops &&
1664 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1665 				mp->pr_mflags |= MA_ANON;
1666 			if (seg == brkseg)
1667 				mp->pr_mflags |= MA_BREAK;
1668 			else if (seg == stkseg) {
1669 				mp->pr_mflags |= MA_STACK;
1670 				if (reserved) {
1671 					size_t maxstack =
1672 					    ((size_t)p->p_stk_ctl +
1673 					    PAGEOFFSET) & PAGEMASK;
1674 					mp->pr_vaddr =
1675 					    (uintptr_t)prgetstackbase(p) +
1676 					    p->p_stksize - maxstack;
1677 					mp->pr_size = (uintptr_t)naddr -
1678 					    mp->pr_vaddr;
1679 				}
1680 			}
1681 			if (seg->s_ops == &segspt_shmops)
1682 				mp->pr_mflags |= MA_ISM | MA_SHM;
1683 			mp->pr_pagesize = PAGESIZE;
1684 
1685 			/*
1686 			 * Manufacture a filename for the "object" directory.
1687 			 */
1688 			vattr.va_mask = AT_FSID|AT_NODEID;
1689 			if (seg->s_ops == &segvn_ops &&
1690 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1691 			    vp != NULL && vp->v_type == VREG &&
1692 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1693 				if (vp == p->p_exec)
1694 					(void) strcpy(mp->pr_mapname, "a.out");
1695 				else
1696 					pr_object_name(mp->pr_mapname,
1697 					    vp, &vattr);
1698 			}
1699 
1700 			/*
1701 			 * Get the SysV shared memory id, if any.
1702 			 */
1703 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1704 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1705 			    SHMID_NONE) {
1706 				if (mp->pr_shmid == SHMID_FREE)
1707 					mp->pr_shmid = -1;
1708 
1709 				mp->pr_mflags |= MA_SHM;
1710 			} else {
1711 				mp->pr_shmid = -1;
1712 			}
1713 		}
1714 		ASSERT(tmp == NULL);
1715 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1716 
1717 	return (0);
1718 }
1719 
1720 #ifdef _SYSCALL32_IMPL
1721 int
1722 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1723 {
1724 	struct as *as = p->p_as;
1725 	prmap32_t *mp;
1726 	struct seg *seg;
1727 	struct seg *brkseg, *stkseg;
1728 	struct vnode *vp;
1729 	struct vattr vattr;
1730 	uint_t prot;
1731 
1732 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1733 
1734 	/*
1735 	 * Request an initial buffer size that doesn't waste memory
1736 	 * if the address space has only a small number of segments.
1737 	 */
1738 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1739 
1740 	if ((seg = AS_SEGFIRST(as)) == NULL)
1741 		return (0);
1742 
1743 	brkseg = break_seg(p);
1744 	stkseg = as_segat(as, prgetstackbase(p));
1745 
1746 	do {
1747 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1748 		caddr_t saddr, naddr;
1749 		void *tmp = NULL;
1750 
1751 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1752 			prot = pr_getprot(seg, reserved, &tmp,
1753 			    &saddr, &naddr, eaddr);
1754 			if (saddr == naddr)
1755 				continue;
1756 
1757 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1758 
1759 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1760 			mp->pr_size = (size32_t)(naddr - saddr);
1761 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1762 			mp->pr_mflags = 0;
1763 			if (prot & PROT_READ)
1764 				mp->pr_mflags |= MA_READ;
1765 			if (prot & PROT_WRITE)
1766 				mp->pr_mflags |= MA_WRITE;
1767 			if (prot & PROT_EXEC)
1768 				mp->pr_mflags |= MA_EXEC;
1769 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1770 				mp->pr_mflags |= MA_SHARED;
1771 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1772 				mp->pr_mflags |= MA_NORESERVE;
1773 			if (seg->s_ops == &segspt_shmops ||
1774 			    (seg->s_ops == &segvn_ops &&
1775 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1776 				mp->pr_mflags |= MA_ANON;
1777 			if (seg == brkseg)
1778 				mp->pr_mflags |= MA_BREAK;
1779 			else if (seg == stkseg) {
1780 				mp->pr_mflags |= MA_STACK;
1781 				if (reserved) {
1782 					size_t maxstack =
1783 					    ((size_t)p->p_stk_ctl +
1784 					    PAGEOFFSET) & PAGEMASK;
1785 					uintptr_t vaddr =
1786 					    (uintptr_t)prgetstackbase(p) +
1787 					    p->p_stksize - maxstack;
1788 					mp->pr_vaddr = (caddr32_t)vaddr;
1789 					mp->pr_size = (size32_t)
1790 					    ((uintptr_t)naddr - vaddr);
1791 				}
1792 			}
1793 			if (seg->s_ops == &segspt_shmops)
1794 				mp->pr_mflags |= MA_ISM | MA_SHM;
1795 			mp->pr_pagesize = PAGESIZE;
1796 
1797 			/*
1798 			 * Manufacture a filename for the "object" directory.
1799 			 */
1800 			vattr.va_mask = AT_FSID|AT_NODEID;
1801 			if (seg->s_ops == &segvn_ops &&
1802 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1803 			    vp != NULL && vp->v_type == VREG &&
1804 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1805 				if (vp == p->p_exec)
1806 					(void) strcpy(mp->pr_mapname, "a.out");
1807 				else
1808 					pr_object_name(mp->pr_mapname,
1809 					    vp, &vattr);
1810 			}
1811 
1812 			/*
1813 			 * Get the SysV shared memory id, if any.
1814 			 */
1815 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1816 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1817 			    SHMID_NONE) {
1818 				if (mp->pr_shmid == SHMID_FREE)
1819 					mp->pr_shmid = -1;
1820 
1821 				mp->pr_mflags |= MA_SHM;
1822 			} else {
1823 				mp->pr_shmid = -1;
1824 			}
1825 		}
1826 		ASSERT(tmp == NULL);
1827 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1828 
1829 	return (0);
1830 }
1831 #endif	/* _SYSCALL32_IMPL */
1832 
1833 /*
1834  * Return the size of the /proc page data file.
1835  */
1836 size_t
1837 prpdsize(struct as *as)
1838 {
1839 	struct seg *seg;
1840 	size_t size;
1841 
1842 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1843 
1844 	if ((seg = AS_SEGFIRST(as)) == NULL)
1845 		return (0);
1846 
1847 	size = sizeof (prpageheader_t);
1848 	do {
1849 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1850 		caddr_t saddr, naddr;
1851 		void *tmp = NULL;
1852 		size_t npage;
1853 
1854 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1855 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1856 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1857 				size += sizeof (prasmap_t) + round8(npage);
1858 		}
1859 		ASSERT(tmp == NULL);
1860 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1861 
1862 	return (size);
1863 }
1864 
1865 #ifdef _SYSCALL32_IMPL
1866 size_t
1867 prpdsize32(struct as *as)
1868 {
1869 	struct seg *seg;
1870 	size_t size;
1871 
1872 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1873 
1874 	if ((seg = AS_SEGFIRST(as)) == NULL)
1875 		return (0);
1876 
1877 	size = sizeof (prpageheader32_t);
1878 	do {
1879 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1880 		caddr_t saddr, naddr;
1881 		void *tmp = NULL;
1882 		size_t npage;
1883 
1884 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1885 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1886 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1887 				size += sizeof (prasmap32_t) + round8(npage);
1888 		}
1889 		ASSERT(tmp == NULL);
1890 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1891 
1892 	return (size);
1893 }
1894 #endif	/* _SYSCALL32_IMPL */
1895 
1896 /*
1897  * Read page data information.
1898  */
1899 int
1900 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1901 {
1902 	struct as *as = p->p_as;
1903 	caddr_t buf;
1904 	size_t size;
1905 	prpageheader_t *php;
1906 	prasmap_t *pmp;
1907 	struct seg *seg;
1908 	int error;
1909 
1910 again:
1911 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1912 
1913 	if ((seg = AS_SEGFIRST(as)) == NULL) {
1914 		AS_LOCK_EXIT(as, &as->a_lock);
1915 		return (0);
1916 	}
1917 	size = prpdsize(as);
1918 	if (uiop->uio_resid < size) {
1919 		AS_LOCK_EXIT(as, &as->a_lock);
1920 		return (E2BIG);
1921 	}
1922 
1923 	buf = kmem_zalloc(size, KM_SLEEP);
1924 	php = (prpageheader_t *)buf;
1925 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1926 
1927 	hrt2ts(gethrtime(), &php->pr_tstamp);
1928 	php->pr_nmap = 0;
1929 	php->pr_npage = 0;
1930 	do {
1931 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1932 		caddr_t saddr, naddr;
1933 		void *tmp = NULL;
1934 
1935 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1936 			struct vnode *vp;
1937 			struct vattr vattr;
1938 			size_t len;
1939 			size_t npage;
1940 			uint_t prot;
1941 			uintptr_t next;
1942 
1943 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1944 			if ((len = (size_t)(naddr - saddr)) == 0)
1945 				continue;
1946 			npage = len / PAGESIZE;
1947 			next = (uintptr_t)(pmp + 1) + round8(npage);
1948 			/*
1949 			 * It's possible that the address space can change
1950 			 * subtlely even though we're holding as->a_lock
1951 			 * due to the nondeterminism of page_exists() in
1952 			 * the presence of asychronously flushed pages or
1953 			 * mapped files whose sizes are changing.
1954 			 * page_exists() may be called indirectly from
1955 			 * pr_getprot() by a SEGOP_INCORE() routine.
1956 			 * If this happens we need to make sure we don't
1957 			 * overrun the buffer whose size we computed based
1958 			 * on the initial iteration through the segments.
1959 			 * Once we've detected an overflow, we need to clean
1960 			 * up the temporary memory allocated in pr_getprot()
1961 			 * and retry. If there's a pending signal, we return
1962 			 * EINTR so that this thread can be dislodged if
1963 			 * a latent bug causes us to spin indefinitely.
1964 			 */
1965 			if (next > (uintptr_t)buf + size) {
1966 				pr_getprot_done(&tmp);
1967 				AS_LOCK_EXIT(as, &as->a_lock);
1968 
1969 				kmem_free(buf, size);
1970 
1971 				if (ISSIG(curthread, JUSTLOOKING))
1972 					return (EINTR);
1973 
1974 				goto again;
1975 			}
1976 
1977 			php->pr_nmap++;
1978 			php->pr_npage += npage;
1979 			pmp->pr_vaddr = (uintptr_t)saddr;
1980 			pmp->pr_npage = npage;
1981 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1982 			pmp->pr_mflags = 0;
1983 			if (prot & PROT_READ)
1984 				pmp->pr_mflags |= MA_READ;
1985 			if (prot & PROT_WRITE)
1986 				pmp->pr_mflags |= MA_WRITE;
1987 			if (prot & PROT_EXEC)
1988 				pmp->pr_mflags |= MA_EXEC;
1989 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1990 				pmp->pr_mflags |= MA_SHARED;
1991 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1992 				pmp->pr_mflags |= MA_NORESERVE;
1993 			if (seg->s_ops == &segspt_shmops ||
1994 			    (seg->s_ops == &segvn_ops &&
1995 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1996 				pmp->pr_mflags |= MA_ANON;
1997 			if (seg->s_ops == &segspt_shmops)
1998 				pmp->pr_mflags |= MA_ISM | MA_SHM;
1999 			pmp->pr_pagesize = PAGESIZE;
2000 			/*
2001 			 * Manufacture a filename for the "object" directory.
2002 			 */
2003 			vattr.va_mask = AT_FSID|AT_NODEID;
2004 			if (seg->s_ops == &segvn_ops &&
2005 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2006 			    vp != NULL && vp->v_type == VREG &&
2007 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2008 				if (vp == p->p_exec)
2009 					(void) strcpy(pmp->pr_mapname, "a.out");
2010 				else
2011 					pr_object_name(pmp->pr_mapname,
2012 					    vp, &vattr);
2013 			}
2014 
2015 			/*
2016 			 * Get the SysV shared memory id, if any.
2017 			 */
2018 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2019 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2020 			    SHMID_NONE) {
2021 				if (pmp->pr_shmid == SHMID_FREE)
2022 					pmp->pr_shmid = -1;
2023 
2024 				pmp->pr_mflags |= MA_SHM;
2025 			} else {
2026 				pmp->pr_shmid = -1;
2027 			}
2028 
2029 			hat_getstat(as, saddr, len, hatid,
2030 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2031 			pmp = (prasmap_t *)next;
2032 		}
2033 		ASSERT(tmp == NULL);
2034 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2035 
2036 	AS_LOCK_EXIT(as, &as->a_lock);
2037 
2038 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2039 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2040 	kmem_free(buf, size);
2041 
2042 	return (error);
2043 }
2044 
2045 #ifdef _SYSCALL32_IMPL
2046 int
2047 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2048 {
2049 	struct as *as = p->p_as;
2050 	caddr_t buf;
2051 	size_t size;
2052 	prpageheader32_t *php;
2053 	prasmap32_t *pmp;
2054 	struct seg *seg;
2055 	int error;
2056 
2057 again:
2058 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2059 
2060 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2061 		AS_LOCK_EXIT(as, &as->a_lock);
2062 		return (0);
2063 	}
2064 	size = prpdsize32(as);
2065 	if (uiop->uio_resid < size) {
2066 		AS_LOCK_EXIT(as, &as->a_lock);
2067 		return (E2BIG);
2068 	}
2069 
2070 	buf = kmem_zalloc(size, KM_SLEEP);
2071 	php = (prpageheader32_t *)buf;
2072 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2073 
2074 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2075 	php->pr_nmap = 0;
2076 	php->pr_npage = 0;
2077 	do {
2078 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2079 		caddr_t saddr, naddr;
2080 		void *tmp = NULL;
2081 
2082 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2083 			struct vnode *vp;
2084 			struct vattr vattr;
2085 			size_t len;
2086 			size_t npage;
2087 			uint_t prot;
2088 			uintptr_t next;
2089 
2090 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2091 			if ((len = (size_t)(naddr - saddr)) == 0)
2092 				continue;
2093 			npage = len / PAGESIZE;
2094 			next = (uintptr_t)(pmp + 1) + round8(npage);
2095 			/*
2096 			 * It's possible that the address space can change
2097 			 * subtlely even though we're holding as->a_lock
2098 			 * due to the nondeterminism of page_exists() in
2099 			 * the presence of asychronously flushed pages or
2100 			 * mapped files whose sizes are changing.
2101 			 * page_exists() may be called indirectly from
2102 			 * pr_getprot() by a SEGOP_INCORE() routine.
2103 			 * If this happens we need to make sure we don't
2104 			 * overrun the buffer whose size we computed based
2105 			 * on the initial iteration through the segments.
2106 			 * Once we've detected an overflow, we need to clean
2107 			 * up the temporary memory allocated in pr_getprot()
2108 			 * and retry. If there's a pending signal, we return
2109 			 * EINTR so that this thread can be dislodged if
2110 			 * a latent bug causes us to spin indefinitely.
2111 			 */
2112 			if (next > (uintptr_t)buf + size) {
2113 				pr_getprot_done(&tmp);
2114 				AS_LOCK_EXIT(as, &as->a_lock);
2115 
2116 				kmem_free(buf, size);
2117 
2118 				if (ISSIG(curthread, JUSTLOOKING))
2119 					return (EINTR);
2120 
2121 				goto again;
2122 			}
2123 
2124 			php->pr_nmap++;
2125 			php->pr_npage += npage;
2126 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2127 			pmp->pr_npage = (size32_t)npage;
2128 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2129 			pmp->pr_mflags = 0;
2130 			if (prot & PROT_READ)
2131 				pmp->pr_mflags |= MA_READ;
2132 			if (prot & PROT_WRITE)
2133 				pmp->pr_mflags |= MA_WRITE;
2134 			if (prot & PROT_EXEC)
2135 				pmp->pr_mflags |= MA_EXEC;
2136 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2137 				pmp->pr_mflags |= MA_SHARED;
2138 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2139 				pmp->pr_mflags |= MA_NORESERVE;
2140 			if (seg->s_ops == &segspt_shmops ||
2141 			    (seg->s_ops == &segvn_ops &&
2142 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2143 				pmp->pr_mflags |= MA_ANON;
2144 			if (seg->s_ops == &segspt_shmops)
2145 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2146 			pmp->pr_pagesize = PAGESIZE;
2147 			/*
2148 			 * Manufacture a filename for the "object" directory.
2149 			 */
2150 			vattr.va_mask = AT_FSID|AT_NODEID;
2151 			if (seg->s_ops == &segvn_ops &&
2152 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2153 			    vp != NULL && vp->v_type == VREG &&
2154 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2155 				if (vp == p->p_exec)
2156 					(void) strcpy(pmp->pr_mapname, "a.out");
2157 				else
2158 					pr_object_name(pmp->pr_mapname,
2159 					    vp, &vattr);
2160 			}
2161 
2162 			/*
2163 			 * Get the SysV shared memory id, if any.
2164 			 */
2165 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2166 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2167 			    SHMID_NONE) {
2168 				if (pmp->pr_shmid == SHMID_FREE)
2169 					pmp->pr_shmid = -1;
2170 
2171 				pmp->pr_mflags |= MA_SHM;
2172 			} else {
2173 				pmp->pr_shmid = -1;
2174 			}
2175 
2176 			hat_getstat(as, saddr, len, hatid,
2177 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2178 			pmp = (prasmap32_t *)next;
2179 		}
2180 		ASSERT(tmp == NULL);
2181 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2182 
2183 	AS_LOCK_EXIT(as, &as->a_lock);
2184 
2185 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2186 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2187 	kmem_free(buf, size);
2188 
2189 	return (error);
2190 }
2191 #endif	/* _SYSCALL32_IMPL */
2192 
2193 ushort_t
2194 prgetpctcpu(uint64_t pct)
2195 {
2196 	/*
2197 	 * The value returned will be relevant in the zone of the examiner,
2198 	 * which may not be the same as the zone which performed the procfs
2199 	 * mount.
2200 	 */
2201 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2202 
2203 	/*
2204 	 * Prorate over online cpus so we don't exceed 100%
2205 	 */
2206 	if (nonline > 1)
2207 		pct /= nonline;
2208 	pct >>= 16;		/* convert to 16-bit scaled integer */
2209 	if (pct > 0x8000)	/* might happen, due to rounding */
2210 		pct = 0x8000;
2211 	return ((ushort_t)pct);
2212 }
2213 
2214 /*
2215  * Return information used by ps(1).
2216  */
2217 void
2218 prgetpsinfo(proc_t *p, psinfo_t *psp)
2219 {
2220 	kthread_t *t;
2221 	struct cred *cred;
2222 	hrtime_t hrutime, hrstime;
2223 
2224 	ASSERT(MUTEX_HELD(&p->p_lock));
2225 
2226 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2227 		bzero(psp, sizeof (*psp));
2228 	else {
2229 		thread_unlock(t);
2230 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2231 	}
2232 
2233 	/*
2234 	 * only export SSYS and SMSACCT; everything else is off-limits to
2235 	 * userland apps.
2236 	 */
2237 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2238 	psp->pr_nlwp = p->p_lwpcnt;
2239 	psp->pr_nzomb = p->p_zombcnt;
2240 	mutex_enter(&p->p_crlock);
2241 	cred = p->p_cred;
2242 	psp->pr_uid = crgetruid(cred);
2243 	psp->pr_euid = crgetuid(cred);
2244 	psp->pr_gid = crgetrgid(cred);
2245 	psp->pr_egid = crgetgid(cred);
2246 	mutex_exit(&p->p_crlock);
2247 	psp->pr_pid = p->p_pid;
2248 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2249 	    (p->p_flag & SZONETOP)) {
2250 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2251 		/*
2252 		 * Inside local zones, fake zsched's pid as parent pids for
2253 		 * processes which reference processes outside of the zone.
2254 		 */
2255 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2256 	} else {
2257 		psp->pr_ppid = p->p_ppid;
2258 	}
2259 	psp->pr_pgid = p->p_pgrp;
2260 	psp->pr_sid = p->p_sessp->s_sid;
2261 	psp->pr_taskid = p->p_task->tk_tkid;
2262 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2263 	psp->pr_poolid = p->p_pool->pool_id;
2264 	psp->pr_zoneid = p->p_zone->zone_id;
2265 	if ((psp->pr_contract = PRCTID(p)) == 0)
2266 		psp->pr_contract = -1;
2267 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2268 	switch (p->p_model) {
2269 	case DATAMODEL_ILP32:
2270 		psp->pr_dmodel = PR_MODEL_ILP32;
2271 		break;
2272 	case DATAMODEL_LP64:
2273 		psp->pr_dmodel = PR_MODEL_LP64;
2274 		break;
2275 	}
2276 	hrutime = mstate_aggr_state(p, LMS_USER);
2277 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2278 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2279 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2280 
2281 	if (t == NULL) {
2282 		int wcode = p->p_wcode;		/* must be atomic read */
2283 
2284 		if (wcode)
2285 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2286 		psp->pr_ttydev = PRNODEV;
2287 		psp->pr_lwp.pr_state = SZOMB;
2288 		psp->pr_lwp.pr_sname = 'Z';
2289 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2290 		psp->pr_lwp.pr_bindpset = PS_NONE;
2291 	} else {
2292 		user_t *up = PTOU(p);
2293 		struct as *as;
2294 		dev_t d;
2295 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2296 
2297 		d = cttydev(p);
2298 		/*
2299 		 * If the controlling terminal is the real
2300 		 * or workstation console device, map to what the
2301 		 * user thinks is the console device. Handle case when
2302 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2303 		 */
2304 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2305 			d = uconsdev;
2306 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2307 		psp->pr_start = up->u_start;
2308 		bcopy(up->u_comm, psp->pr_fname,
2309 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2310 		bcopy(up->u_psargs, psp->pr_psargs,
2311 		    MIN(PRARGSZ-1, PSARGSZ));
2312 		psp->pr_argc = up->u_argc;
2313 		psp->pr_argv = up->u_argv;
2314 		psp->pr_envp = up->u_envp;
2315 
2316 		/* get the chosen lwp's lwpsinfo */
2317 		prgetlwpsinfo(t, &psp->pr_lwp);
2318 
2319 		/* compute %cpu for the process */
2320 		if (p->p_lwpcnt == 1)
2321 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2322 		else {
2323 			uint64_t pct = 0;
2324 			hrtime_t cur_time = gethrtime_unscaled();
2325 
2326 			t = p->p_tlist;
2327 			do {
2328 				pct += cpu_update_pct(t, cur_time);
2329 			} while ((t = t->t_forw) != p->p_tlist);
2330 
2331 			psp->pr_pctcpu = prgetpctcpu(pct);
2332 		}
2333 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2334 			psp->pr_size = 0;
2335 			psp->pr_rssize = 0;
2336 		} else {
2337 			mutex_exit(&p->p_lock);
2338 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2339 			psp->pr_size = btopr(as->a_resvsize) *
2340 			    (PAGESIZE / 1024);
2341 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2342 			psp->pr_pctmem = rm_pctmemory(as);
2343 			AS_LOCK_EXIT(as, &as->a_lock);
2344 			mutex_enter(&p->p_lock);
2345 		}
2346 	}
2347 }
2348 
2349 #ifdef _SYSCALL32_IMPL
2350 void
2351 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2352 {
2353 	kthread_t *t;
2354 	struct cred *cred;
2355 	hrtime_t hrutime, hrstime;
2356 
2357 	ASSERT(MUTEX_HELD(&p->p_lock));
2358 
2359 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2360 		bzero(psp, sizeof (*psp));
2361 	else {
2362 		thread_unlock(t);
2363 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2364 	}
2365 
2366 	/*
2367 	 * only export SSYS and SMSACCT; everything else is off-limits to
2368 	 * userland apps.
2369 	 */
2370 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2371 	psp->pr_nlwp = p->p_lwpcnt;
2372 	psp->pr_nzomb = p->p_zombcnt;
2373 	mutex_enter(&p->p_crlock);
2374 	cred = p->p_cred;
2375 	psp->pr_uid = crgetruid(cred);
2376 	psp->pr_euid = crgetuid(cred);
2377 	psp->pr_gid = crgetrgid(cred);
2378 	psp->pr_egid = crgetgid(cred);
2379 	mutex_exit(&p->p_crlock);
2380 	psp->pr_pid = p->p_pid;
2381 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2382 	    (p->p_flag & SZONETOP)) {
2383 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2384 		/*
2385 		 * Inside local zones, fake zsched's pid as parent pids for
2386 		 * processes which reference processes outside of the zone.
2387 		 */
2388 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2389 	} else {
2390 		psp->pr_ppid = p->p_ppid;
2391 	}
2392 	psp->pr_pgid = p->p_pgrp;
2393 	psp->pr_sid = p->p_sessp->s_sid;
2394 	psp->pr_taskid = p->p_task->tk_tkid;
2395 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2396 	psp->pr_poolid = p->p_pool->pool_id;
2397 	psp->pr_zoneid = p->p_zone->zone_id;
2398 	if ((psp->pr_contract = PRCTID(p)) == 0)
2399 		psp->pr_contract = -1;
2400 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2401 	switch (p->p_model) {
2402 	case DATAMODEL_ILP32:
2403 		psp->pr_dmodel = PR_MODEL_ILP32;
2404 		break;
2405 	case DATAMODEL_LP64:
2406 		psp->pr_dmodel = PR_MODEL_LP64;
2407 		break;
2408 	}
2409 	hrutime = mstate_aggr_state(p, LMS_USER);
2410 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2411 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2412 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2413 
2414 	if (t == NULL) {
2415 		extern int wstat(int, int);	/* needs a header file */
2416 		int wcode = p->p_wcode;		/* must be atomic read */
2417 
2418 		if (wcode)
2419 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2420 		psp->pr_ttydev = PRNODEV32;
2421 		psp->pr_lwp.pr_state = SZOMB;
2422 		psp->pr_lwp.pr_sname = 'Z';
2423 	} else {
2424 		user_t *up = PTOU(p);
2425 		struct as *as;
2426 		dev_t d;
2427 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2428 
2429 		d = cttydev(p);
2430 		/*
2431 		 * If the controlling terminal is the real
2432 		 * or workstation console device, map to what the
2433 		 * user thinks is the console device. Handle case when
2434 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2435 		 */
2436 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2437 			d = uconsdev;
2438 		(void) cmpldev(&psp->pr_ttydev, d);
2439 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2440 		bcopy(up->u_comm, psp->pr_fname,
2441 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2442 		bcopy(up->u_psargs, psp->pr_psargs,
2443 		    MIN(PRARGSZ-1, PSARGSZ));
2444 		psp->pr_argc = up->u_argc;
2445 		psp->pr_argv = (caddr32_t)up->u_argv;
2446 		psp->pr_envp = (caddr32_t)up->u_envp;
2447 
2448 		/* get the chosen lwp's lwpsinfo */
2449 		prgetlwpsinfo32(t, &psp->pr_lwp);
2450 
2451 		/* compute %cpu for the process */
2452 		if (p->p_lwpcnt == 1)
2453 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2454 		else {
2455 			uint64_t pct = 0;
2456 			hrtime_t cur_time;
2457 
2458 			t = p->p_tlist;
2459 			cur_time = gethrtime_unscaled();
2460 			do {
2461 				pct += cpu_update_pct(t, cur_time);
2462 			} while ((t = t->t_forw) != p->p_tlist);
2463 
2464 			psp->pr_pctcpu = prgetpctcpu(pct);
2465 		}
2466 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2467 			psp->pr_size = 0;
2468 			psp->pr_rssize = 0;
2469 		} else {
2470 			mutex_exit(&p->p_lock);
2471 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2472 			psp->pr_size = (size32_t)
2473 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2474 			psp->pr_rssize = (size32_t)
2475 			    (rm_asrss(as) * (PAGESIZE / 1024));
2476 			psp->pr_pctmem = rm_pctmemory(as);
2477 			AS_LOCK_EXIT(as, &as->a_lock);
2478 			mutex_enter(&p->p_lock);
2479 		}
2480 	}
2481 
2482 	/*
2483 	 * If we are looking at an LP64 process, zero out
2484 	 * the fields that cannot be represented in ILP32.
2485 	 */
2486 	if (p->p_model != DATAMODEL_ILP32) {
2487 		psp->pr_size = 0;
2488 		psp->pr_rssize = 0;
2489 		psp->pr_argv = 0;
2490 		psp->pr_envp = 0;
2491 	}
2492 }
2493 #endif	/* _SYSCALL32_IMPL */
2494 
2495 void
2496 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2497 {
2498 	klwp_t *lwp = ttolwp(t);
2499 	sobj_ops_t *sobj;
2500 	char c, state;
2501 	uint64_t pct;
2502 	int retval, niceval;
2503 	hrtime_t hrutime, hrstime;
2504 
2505 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2506 
2507 	bzero(psp, sizeof (*psp));
2508 
2509 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2510 	psp->pr_lwpid = t->t_tid;
2511 	psp->pr_addr = (uintptr_t)t;
2512 	psp->pr_wchan = (uintptr_t)t->t_wchan;
2513 
2514 	/* map the thread state enum into a process state enum */
2515 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2516 	switch (state) {
2517 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2518 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2519 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2520 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2521 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2522 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2523 	default:		state = 0;		c = '?';	break;
2524 	}
2525 	psp->pr_state = state;
2526 	psp->pr_sname = c;
2527 	if ((sobj = t->t_sobj_ops) != NULL)
2528 		psp->pr_stype = SOBJ_TYPE(sobj);
2529 	retval = CL_DONICE(t, NULL, 0, &niceval);
2530 	if (retval == 0) {
2531 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2532 		psp->pr_nice = niceval + NZERO;
2533 	}
2534 	psp->pr_syscall = t->t_sysnum;
2535 	psp->pr_pri = t->t_pri;
2536 	psp->pr_start.tv_sec = t->t_start;
2537 	psp->pr_start.tv_nsec = 0L;
2538 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2539 	scalehrtime(&hrutime);
2540 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2541 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2542 	scalehrtime(&hrstime);
2543 	hrt2ts(hrutime + hrstime, &psp->pr_time);
2544 	/* compute %cpu for the lwp */
2545 	pct = cpu_update_pct(t, gethrtime_unscaled());
2546 	psp->pr_pctcpu = prgetpctcpu(pct);
2547 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2548 	if (psp->pr_cpu > 99)
2549 		psp->pr_cpu = 99;
2550 
2551 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2552 	    sizeof (psp->pr_clname) - 1);
2553 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2554 	psp->pr_onpro = t->t_cpu->cpu_id;
2555 	psp->pr_bindpro = t->t_bind_cpu;
2556 	psp->pr_bindpset = t->t_bind_pset;
2557 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2558 }
2559 
2560 #ifdef _SYSCALL32_IMPL
2561 void
2562 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2563 {
2564 	proc_t *p = ttoproc(t);
2565 	klwp_t *lwp = ttolwp(t);
2566 	sobj_ops_t *sobj;
2567 	char c, state;
2568 	uint64_t pct;
2569 	int retval, niceval;
2570 	hrtime_t hrutime, hrstime;
2571 
2572 	ASSERT(MUTEX_HELD(&p->p_lock));
2573 
2574 	bzero(psp, sizeof (*psp));
2575 
2576 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2577 	psp->pr_lwpid = t->t_tid;
2578 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2579 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2580 
2581 	/* map the thread state enum into a process state enum */
2582 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2583 	switch (state) {
2584 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2585 	case TS_RUN:		state = SRUN;		c = 'R';	break;
2586 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2587 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2588 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2589 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2590 	default:		state = 0;		c = '?';	break;
2591 	}
2592 	psp->pr_state = state;
2593 	psp->pr_sname = c;
2594 	if ((sobj = t->t_sobj_ops) != NULL)
2595 		psp->pr_stype = SOBJ_TYPE(sobj);
2596 	retval = CL_DONICE(t, NULL, 0, &niceval);
2597 	if (retval == 0) {
2598 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2599 		psp->pr_nice = niceval + NZERO;
2600 	} else {
2601 		psp->pr_oldpri = 0;
2602 		psp->pr_nice = 0;
2603 	}
2604 	psp->pr_syscall = t->t_sysnum;
2605 	psp->pr_pri = t->t_pri;
2606 	psp->pr_start.tv_sec = (time32_t)t->t_start;
2607 	psp->pr_start.tv_nsec = 0L;
2608 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2609 	scalehrtime(&hrutime);
2610 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2611 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2612 	scalehrtime(&hrstime);
2613 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2614 	/* compute %cpu for the lwp */
2615 	pct = cpu_update_pct(t, gethrtime_unscaled());
2616 	psp->pr_pctcpu = prgetpctcpu(pct);
2617 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2618 	if (psp->pr_cpu > 99)
2619 		psp->pr_cpu = 99;
2620 
2621 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2622 	    sizeof (psp->pr_clname) - 1);
2623 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2624 	psp->pr_onpro = t->t_cpu->cpu_id;
2625 	psp->pr_bindpro = t->t_bind_cpu;
2626 	psp->pr_bindpset = t->t_bind_pset;
2627 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2628 }
2629 #endif	/* _SYSCALL32_IMPL */
2630 
2631 /*
2632  * This used to get called when microstate accounting was disabled but
2633  * microstate information was requested.  Since Microstate accounting is on
2634  * regardless of the proc flags, this simply makes it appear to procfs that
2635  * microstate accounting is on.  This is relatively meaningless since you
2636  * can't turn it off, but this is here for the sake of appearances.
2637  */
2638 
2639 /*ARGSUSED*/
2640 void
2641 estimate_msacct(kthread_t *t, hrtime_t curtime)
2642 {
2643 	proc_t *p;
2644 
2645 	if (t == NULL)
2646 		return;
2647 
2648 	p = ttoproc(t);
2649 	ASSERT(MUTEX_HELD(&p->p_lock));
2650 
2651 	/*
2652 	 * A system process (p0) could be referenced if the thread is
2653 	 * in the process of exiting.  Don't turn on microstate accounting
2654 	 * in that case.
2655 	 */
2656 	if (p->p_flag & SSYS)
2657 		return;
2658 
2659 	/*
2660 	 * Loop through all the LWPs (kernel threads) in the process.
2661 	 */
2662 	t = p->p_tlist;
2663 	do {
2664 		t->t_proc_flag |= TP_MSACCT;
2665 	} while ((t = t->t_forw) != p->p_tlist);
2666 
2667 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2668 }
2669 
2670 /*
2671  * It's not really possible to disable microstate accounting anymore.
2672  * However, this routine simply turns off the ms accounting flags in a process
2673  * This way procfs can still pretend to turn microstate accounting on and
2674  * off for a process, but it actually doesn't do anything.  This is
2675  * a neutered form of preemptive idiot-proofing.
2676  */
2677 void
2678 disable_msacct(proc_t *p)
2679 {
2680 	kthread_t *t;
2681 
2682 	ASSERT(MUTEX_HELD(&p->p_lock));
2683 
2684 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2685 	/*
2686 	 * Loop through all the LWPs (kernel threads) in the process.
2687 	 */
2688 	if ((t = p->p_tlist) != NULL) {
2689 		do {
2690 			/* clear per-thread flag */
2691 			t->t_proc_flag &= ~TP_MSACCT;
2692 		} while ((t = t->t_forw) != p->p_tlist);
2693 	}
2694 }
2695 
2696 /*
2697  * Return resource usage information.
2698  */
2699 void
2700 prgetusage(kthread_t *t, prhusage_t *pup)
2701 {
2702 	klwp_t *lwp = ttolwp(t);
2703 	hrtime_t *mstimep;
2704 	struct mstate *ms = &lwp->lwp_mstate;
2705 	int state;
2706 	int i;
2707 	hrtime_t curtime;
2708 	hrtime_t waitrq;
2709 	hrtime_t tmp1;
2710 
2711 	curtime = gethrtime_unscaled();
2712 
2713 	pup->pr_lwpid	= t->t_tid;
2714 	pup->pr_count	= 1;
2715 	pup->pr_create	= ms->ms_start;
2716 	pup->pr_term    = ms->ms_term;
2717 	scalehrtime(&pup->pr_create);
2718 	scalehrtime(&pup->pr_term);
2719 	if (ms->ms_term == 0) {
2720 		pup->pr_rtime = curtime - ms->ms_start;
2721 		scalehrtime(&pup->pr_rtime);
2722 	} else {
2723 		pup->pr_rtime = ms->ms_term - ms->ms_start;
2724 		scalehrtime(&pup->pr_rtime);
2725 	}
2726 
2727 
2728 	pup->pr_utime    = ms->ms_acct[LMS_USER];
2729 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2730 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2731 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2732 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2733 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2734 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2735 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2736 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2737 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2738 
2739 	prscaleusage(pup);
2740 
2741 	/*
2742 	 * Adjust for time waiting in the dispatcher queue.
2743 	 */
2744 	waitrq = t->t_waitrq;	/* hopefully atomic */
2745 	if (waitrq != 0) {
2746 		if (waitrq > curtime) {
2747 			curtime = gethrtime_unscaled();
2748 		}
2749 		tmp1 = curtime - waitrq;
2750 		scalehrtime(&tmp1);
2751 		pup->pr_wtime += tmp1;
2752 		curtime = waitrq;
2753 	}
2754 
2755 	/*
2756 	 * Adjust for time spent in current microstate.
2757 	 */
2758 	if (ms->ms_state_start > curtime) {
2759 		curtime = gethrtime_unscaled();
2760 	}
2761 
2762 	i = 0;
2763 	do {
2764 		switch (state = t->t_mstate) {
2765 		case LMS_SLEEP:
2766 			/*
2767 			 * Update the timer for the current sleep state.
2768 			 */
2769 			switch (state = ms->ms_prev) {
2770 			case LMS_TFAULT:
2771 			case LMS_DFAULT:
2772 			case LMS_KFAULT:
2773 			case LMS_USER_LOCK:
2774 				break;
2775 			default:
2776 				state = LMS_SLEEP;
2777 				break;
2778 			}
2779 			break;
2780 		case LMS_TFAULT:
2781 		case LMS_DFAULT:
2782 		case LMS_KFAULT:
2783 		case LMS_USER_LOCK:
2784 			state = LMS_SYSTEM;
2785 			break;
2786 		}
2787 		switch (state) {
2788 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2789 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2790 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2791 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2792 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2793 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2794 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2795 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2796 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2797 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2798 		default:		panic("prgetusage: unknown microstate");
2799 		}
2800 		tmp1 = curtime - ms->ms_state_start;
2801 		if (tmp1 < 0) {
2802 			curtime = gethrtime_unscaled();
2803 			i++;
2804 			continue;
2805 		}
2806 		scalehrtime(&tmp1);
2807 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2808 
2809 	*mstimep += tmp1;
2810 
2811 	/* update pup timestamp */
2812 	pup->pr_tstamp = curtime;
2813 	scalehrtime(&pup->pr_tstamp);
2814 
2815 	/*
2816 	 * Resource usage counters.
2817 	 */
2818 	pup->pr_minf  = lwp->lwp_ru.minflt;
2819 	pup->pr_majf  = lwp->lwp_ru.majflt;
2820 	pup->pr_nswap = lwp->lwp_ru.nswap;
2821 	pup->pr_inblk = lwp->lwp_ru.inblock;
2822 	pup->pr_oublk = lwp->lwp_ru.oublock;
2823 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2824 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2825 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2826 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2827 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2828 	pup->pr_sysc  = lwp->lwp_ru.sysc;
2829 	pup->pr_ioch  = lwp->lwp_ru.ioch;
2830 }
2831 
2832 /*
2833  * Convert ms_acct stats from unscaled high-res time to nanoseconds
2834  */
2835 void
2836 prscaleusage(prhusage_t *usg)
2837 {
2838 	scalehrtime(&usg->pr_utime);
2839 	scalehrtime(&usg->pr_stime);
2840 	scalehrtime(&usg->pr_ttime);
2841 	scalehrtime(&usg->pr_tftime);
2842 	scalehrtime(&usg->pr_dftime);
2843 	scalehrtime(&usg->pr_kftime);
2844 	scalehrtime(&usg->pr_ltime);
2845 	scalehrtime(&usg->pr_slptime);
2846 	scalehrtime(&usg->pr_wtime);
2847 	scalehrtime(&usg->pr_stoptime);
2848 }
2849 
2850 
2851 /*
2852  * Sum resource usage information.
2853  */
2854 void
2855 praddusage(kthread_t *t, prhusage_t *pup)
2856 {
2857 	klwp_t *lwp = ttolwp(t);
2858 	hrtime_t *mstimep;
2859 	struct mstate *ms = &lwp->lwp_mstate;
2860 	int state;
2861 	int i;
2862 	hrtime_t curtime;
2863 	hrtime_t waitrq;
2864 	hrtime_t tmp;
2865 	prhusage_t conv;
2866 
2867 	curtime = gethrtime_unscaled();
2868 
2869 	if (ms->ms_term == 0) {
2870 		tmp = curtime - ms->ms_start;
2871 		scalehrtime(&tmp);
2872 		pup->pr_rtime += tmp;
2873 	} else {
2874 		tmp = ms->ms_term - ms->ms_start;
2875 		scalehrtime(&tmp);
2876 		pup->pr_rtime += tmp;
2877 	}
2878 
2879 	conv.pr_utime = ms->ms_acct[LMS_USER];
2880 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2881 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2882 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2883 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2884 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2885 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2886 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2887 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2888 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2889 
2890 	prscaleusage(&conv);
2891 
2892 	pup->pr_utime	+= conv.pr_utime;
2893 	pup->pr_stime	+= conv.pr_stime;
2894 	pup->pr_ttime	+= conv.pr_ttime;
2895 	pup->pr_tftime	+= conv.pr_tftime;
2896 	pup->pr_dftime	+= conv.pr_dftime;
2897 	pup->pr_kftime	+= conv.pr_kftime;
2898 	pup->pr_ltime	+= conv.pr_ltime;
2899 	pup->pr_slptime	+= conv.pr_slptime;
2900 	pup->pr_wtime	+= conv.pr_wtime;
2901 	pup->pr_stoptime += conv.pr_stoptime;
2902 
2903 	/*
2904 	 * Adjust for time waiting in the dispatcher queue.
2905 	 */
2906 	waitrq = t->t_waitrq;	/* hopefully atomic */
2907 	if (waitrq != 0) {
2908 		if (waitrq > curtime) {
2909 			curtime = gethrtime_unscaled();
2910 		}
2911 		tmp = curtime - waitrq;
2912 		scalehrtime(&tmp);
2913 		pup->pr_wtime += tmp;
2914 		curtime = waitrq;
2915 	}
2916 
2917 	/*
2918 	 * Adjust for time spent in current microstate.
2919 	 */
2920 	if (ms->ms_state_start > curtime) {
2921 		curtime = gethrtime_unscaled();
2922 	}
2923 
2924 	i = 0;
2925 	do {
2926 		switch (state = t->t_mstate) {
2927 		case LMS_SLEEP:
2928 			/*
2929 			 * Update the timer for the current sleep state.
2930 			 */
2931 			switch (state = ms->ms_prev) {
2932 			case LMS_TFAULT:
2933 			case LMS_DFAULT:
2934 			case LMS_KFAULT:
2935 			case LMS_USER_LOCK:
2936 				break;
2937 			default:
2938 				state = LMS_SLEEP;
2939 				break;
2940 			}
2941 			break;
2942 		case LMS_TFAULT:
2943 		case LMS_DFAULT:
2944 		case LMS_KFAULT:
2945 		case LMS_USER_LOCK:
2946 			state = LMS_SYSTEM;
2947 			break;
2948 		}
2949 		switch (state) {
2950 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2951 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2952 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2953 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2954 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2955 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2956 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2957 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2958 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2959 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2960 		default:		panic("praddusage: unknown microstate");
2961 		}
2962 		tmp = curtime - ms->ms_state_start;
2963 		if (tmp < 0) {
2964 			curtime = gethrtime_unscaled();
2965 			i++;
2966 			continue;
2967 		}
2968 		scalehrtime(&tmp);
2969 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2970 
2971 	*mstimep += tmp;
2972 
2973 	/* update pup timestamp */
2974 	pup->pr_tstamp = curtime;
2975 	scalehrtime(&pup->pr_tstamp);
2976 
2977 	/*
2978 	 * Resource usage counters.
2979 	 */
2980 	pup->pr_minf  += lwp->lwp_ru.minflt;
2981 	pup->pr_majf  += lwp->lwp_ru.majflt;
2982 	pup->pr_nswap += lwp->lwp_ru.nswap;
2983 	pup->pr_inblk += lwp->lwp_ru.inblock;
2984 	pup->pr_oublk += lwp->lwp_ru.oublock;
2985 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2986 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2987 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2988 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2989 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2990 	pup->pr_sysc  += lwp->lwp_ru.sysc;
2991 	pup->pr_ioch  += lwp->lwp_ru.ioch;
2992 }
2993 
2994 /*
2995  * Convert a prhusage_t to a prusage_t.
2996  * This means convert each hrtime_t to a timestruc_t
2997  * and copy the count fields uint64_t => ulong_t.
2998  */
2999 void
3000 prcvtusage(prhusage_t *pup, prusage_t *upup)
3001 {
3002 	uint64_t *ullp;
3003 	ulong_t *ulp;
3004 	int i;
3005 
3006 	upup->pr_lwpid = pup->pr_lwpid;
3007 	upup->pr_count = pup->pr_count;
3008 
3009 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3010 	hrt2ts(pup->pr_create,	&upup->pr_create);
3011 	hrt2ts(pup->pr_term,	&upup->pr_term);
3012 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3013 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3014 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3015 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3016 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3017 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3018 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3019 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3020 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3021 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3022 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3023 	bzero(upup->filltime, sizeof (upup->filltime));
3024 
3025 	ullp = &pup->pr_minf;
3026 	ulp = &upup->pr_minf;
3027 	for (i = 0; i < 22; i++)
3028 		*ulp++ = (ulong_t)*ullp++;
3029 }
3030 
3031 #ifdef _SYSCALL32_IMPL
3032 void
3033 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3034 {
3035 	uint64_t *ullp;
3036 	uint32_t *ulp;
3037 	int i;
3038 
3039 	upup->pr_lwpid = pup->pr_lwpid;
3040 	upup->pr_count = pup->pr_count;
3041 
3042 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3043 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3044 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3045 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3046 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3047 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3048 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3049 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3050 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3051 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3052 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3053 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3054 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3055 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3056 	bzero(upup->filltime, sizeof (upup->filltime));
3057 
3058 	ullp = &pup->pr_minf;
3059 	ulp = &upup->pr_minf;
3060 	for (i = 0; i < 22; i++)
3061 		*ulp++ = (uint32_t)*ullp++;
3062 }
3063 #endif	/* _SYSCALL32_IMPL */
3064 
3065 /*
3066  * Determine whether a set is empty.
3067  */
3068 int
3069 setisempty(uint32_t *sp, uint_t n)
3070 {
3071 	while (n--)
3072 		if (*sp++)
3073 			return (0);
3074 	return (1);
3075 }
3076 
3077 /*
3078  * Utility routine for establishing a watched area in the process.
3079  * Keep the list of watched areas sorted by virtual address.
3080  */
3081 int
3082 set_watched_area(proc_t *p, struct watched_area *pwa)
3083 {
3084 	caddr_t vaddr = pwa->wa_vaddr;
3085 	caddr_t eaddr = pwa->wa_eaddr;
3086 	ulong_t flags = pwa->wa_flags;
3087 	struct watched_area *target;
3088 	avl_index_t where;
3089 	int error = 0;
3090 
3091 	/* we must not be holding p->p_lock, but the process must be locked */
3092 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3093 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3094 
3095 	/*
3096 	 * If this is our first watchpoint, enable watchpoints for the process.
3097 	 */
3098 	if (!pr_watch_active(p)) {
3099 		kthread_t *t;
3100 
3101 		mutex_enter(&p->p_lock);
3102 		if ((t = p->p_tlist) != NULL) {
3103 			do {
3104 				watch_enable(t);
3105 			} while ((t = t->t_forw) != p->p_tlist);
3106 		}
3107 		mutex_exit(&p->p_lock);
3108 	}
3109 
3110 	target = pr_find_watched_area(p, pwa, &where);
3111 	if (target != NULL) {
3112 		/*
3113 		 * We discovered an existing, overlapping watched area.
3114 		 * Allow it only if it is an exact match.
3115 		 */
3116 		if (target->wa_vaddr != vaddr ||
3117 		    target->wa_eaddr != eaddr)
3118 			error = EINVAL;
3119 		else if (target->wa_flags != flags) {
3120 			error = set_watched_page(p, vaddr, eaddr,
3121 			    flags, target->wa_flags);
3122 			target->wa_flags = flags;
3123 		}
3124 		kmem_free(pwa, sizeof (struct watched_area));
3125 	} else {
3126 		avl_insert(&p->p_warea, pwa, where);
3127 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3128 	}
3129 
3130 	return (error);
3131 }
3132 
3133 /*
3134  * Utility routine for clearing a watched area in the process.
3135  * Must be an exact match of the virtual address.
3136  * size and flags don't matter.
3137  */
3138 int
3139 clear_watched_area(proc_t *p, struct watched_area *pwa)
3140 {
3141 	struct watched_area *found;
3142 
3143 	/* we must not be holding p->p_lock, but the process must be locked */
3144 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3145 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3146 
3147 
3148 	if (!pr_watch_active(p)) {
3149 		kmem_free(pwa, sizeof (struct watched_area));
3150 		return (0);
3151 	}
3152 
3153 	/*
3154 	 * Look for a matching address in the watched areas.  If a match is
3155 	 * found, clear the old watched area and adjust the watched page(s).  It
3156 	 * is not an error if there is no match.
3157 	 */
3158 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3159 	    found->wa_vaddr == pwa->wa_vaddr) {
3160 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3161 		    found->wa_flags);
3162 		avl_remove(&p->p_warea, found);
3163 		kmem_free(found, sizeof (struct watched_area));
3164 	}
3165 
3166 	kmem_free(pwa, sizeof (struct watched_area));
3167 
3168 	/*
3169 	 * If we removed the last watched area from the process, disable
3170 	 * watchpoints.
3171 	 */
3172 	if (!pr_watch_active(p)) {
3173 		kthread_t *t;
3174 
3175 		mutex_enter(&p->p_lock);
3176 		if ((t = p->p_tlist) != NULL) {
3177 			do {
3178 				watch_disable(t);
3179 			} while ((t = t->t_forw) != p->p_tlist);
3180 		}
3181 		mutex_exit(&p->p_lock);
3182 	}
3183 
3184 	return (0);
3185 }
3186 
3187 /*
3188  * Frees all the watched_area structures
3189  */
3190 void
3191 pr_free_watchpoints(proc_t *p)
3192 {
3193 	struct watched_area *delp;
3194 	void *cookie;
3195 
3196 	cookie = NULL;
3197 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3198 		kmem_free(delp, sizeof (struct watched_area));
3199 
3200 	avl_destroy(&p->p_warea);
3201 }
3202 
3203 /*
3204  * This one is called by the traced process to unwatch all the
3205  * pages while deallocating the list of watched_page structs.
3206  */
3207 void
3208 pr_free_watched_pages(proc_t *p)
3209 {
3210 	struct as *as = p->p_as;
3211 	struct watched_page *pwp;
3212 	uint_t prot;
3213 	int    retrycnt, err;
3214 	void *cookie;
3215 
3216 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3217 		return;
3218 
3219 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3220 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3221 
3222 	pwp = avl_first(&as->a_wpage);
3223 
3224 	cookie = NULL;
3225 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3226 		retrycnt = 0;
3227 		if ((prot = pwp->wp_oprot) != 0) {
3228 			caddr_t addr = pwp->wp_vaddr;
3229 			struct seg *seg;
3230 		retry:
3231 
3232 			if ((pwp->wp_prot != prot ||
3233 			    (pwp->wp_flags & WP_NOWATCH)) &&
3234 			    (seg = as_segat(as, addr)) != NULL) {
3235 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3236 				if (err == IE_RETRY) {
3237 					ASSERT(retrycnt == 0);
3238 					retrycnt++;
3239 					goto retry;
3240 				}
3241 			}
3242 		}
3243 		kmem_free(pwp, sizeof (struct watched_page));
3244 	}
3245 
3246 	avl_destroy(&as->a_wpage);
3247 	p->p_wprot = NULL;
3248 
3249 	AS_LOCK_EXIT(as, &as->a_lock);
3250 }
3251 
3252 /*
3253  * Insert a watched area into the list of watched pages.
3254  * If oflags is zero then we are adding a new watched area.
3255  * Otherwise we are changing the flags of an existing watched area.
3256  */
3257 static int
3258 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3259 	ulong_t flags, ulong_t oflags)
3260 {
3261 	struct as *as = p->p_as;
3262 	avl_tree_t *pwp_tree;
3263 	struct watched_page *pwp, *newpwp;
3264 	struct watched_page tpw;
3265 	avl_index_t where;
3266 	struct seg *seg;
3267 	uint_t prot;
3268 	caddr_t addr;
3269 
3270 	/*
3271 	 * We need to pre-allocate a list of structures before we grab the
3272 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3273 	 * held.
3274 	 */
3275 	newpwp = NULL;
3276 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3277 	    addr < eaddr; addr += PAGESIZE) {
3278 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3279 		pwp->wp_list = newpwp;
3280 		newpwp = pwp;
3281 	}
3282 
3283 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3284 
3285 	/*
3286 	 * Search for an existing watched page to contain the watched area.
3287 	 * If none is found, grab a new one from the available list
3288 	 * and insert it in the active list, keeping the list sorted
3289 	 * by user-level virtual address.
3290 	 */
3291 	if (p->p_flag & SVFWAIT)
3292 		pwp_tree = &p->p_wpage;
3293 	else
3294 		pwp_tree = &as->a_wpage;
3295 
3296 again:
3297 	if (avl_numnodes(pwp_tree) > prnwatch) {
3298 		AS_LOCK_EXIT(as, &as->a_lock);
3299 		while (newpwp != NULL) {
3300 			pwp = newpwp->wp_list;
3301 			kmem_free(newpwp, sizeof (struct watched_page));
3302 			newpwp = pwp;
3303 		}
3304 		return (E2BIG);
3305 	}
3306 
3307 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3308 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3309 		pwp = newpwp;
3310 		newpwp = newpwp->wp_list;
3311 		pwp->wp_list = NULL;
3312 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3313 		    (uintptr_t)PAGEMASK);
3314 		avl_insert(pwp_tree, pwp, where);
3315 	}
3316 
3317 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3318 
3319 	if (oflags & WA_READ)
3320 		pwp->wp_read--;
3321 	if (oflags & WA_WRITE)
3322 		pwp->wp_write--;
3323 	if (oflags & WA_EXEC)
3324 		pwp->wp_exec--;
3325 
3326 	ASSERT(pwp->wp_read >= 0);
3327 	ASSERT(pwp->wp_write >= 0);
3328 	ASSERT(pwp->wp_exec >= 0);
3329 
3330 	if (flags & WA_READ)
3331 		pwp->wp_read++;
3332 	if (flags & WA_WRITE)
3333 		pwp->wp_write++;
3334 	if (flags & WA_EXEC)
3335 		pwp->wp_exec++;
3336 
3337 	if (!(p->p_flag & SVFWAIT)) {
3338 		vaddr = pwp->wp_vaddr;
3339 		if (pwp->wp_oprot == 0 &&
3340 		    (seg = as_segat(as, vaddr)) != NULL) {
3341 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3342 			pwp->wp_oprot = (uchar_t)prot;
3343 			pwp->wp_prot = (uchar_t)prot;
3344 		}
3345 		if (pwp->wp_oprot != 0) {
3346 			prot = pwp->wp_oprot;
3347 			if (pwp->wp_read)
3348 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3349 			if (pwp->wp_write)
3350 				prot &= ~PROT_WRITE;
3351 			if (pwp->wp_exec)
3352 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3353 			if (!(pwp->wp_flags & WP_NOWATCH) &&
3354 			    pwp->wp_prot != prot &&
3355 			    (pwp->wp_flags & WP_SETPROT) == 0) {
3356 				pwp->wp_flags |= WP_SETPROT;
3357 				pwp->wp_list = p->p_wprot;
3358 				p->p_wprot = pwp;
3359 			}
3360 			pwp->wp_prot = (uchar_t)prot;
3361 		}
3362 	}
3363 
3364 	/*
3365 	 * If the watched area extends into the next page then do
3366 	 * it over again with the virtual address of the next page.
3367 	 */
3368 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3369 		goto again;
3370 
3371 	AS_LOCK_EXIT(as, &as->a_lock);
3372 
3373 	/*
3374 	 * Free any pages we may have over-allocated
3375 	 */
3376 	while (newpwp != NULL) {
3377 		pwp = newpwp->wp_list;
3378 		kmem_free(newpwp, sizeof (struct watched_page));
3379 		newpwp = pwp;
3380 	}
3381 
3382 	return (0);
3383 }
3384 
3385 /*
3386  * Remove a watched area from the list of watched pages.
3387  * A watched area may extend over more than one page.
3388  */
3389 static void
3390 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3391 {
3392 	struct as *as = p->p_as;
3393 	struct watched_page *pwp;
3394 	struct watched_page tpw;
3395 	avl_tree_t *tree;
3396 	avl_index_t where;
3397 
3398 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3399 
3400 	if (p->p_flag & SVFWAIT)
3401 		tree = &p->p_wpage;
3402 	else
3403 		tree = &as->a_wpage;
3404 
3405 	tpw.wp_vaddr = vaddr =
3406 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3407 	pwp = avl_find(tree, &tpw, &where);
3408 	if (pwp == NULL)
3409 		pwp = avl_nearest(tree, where, AVL_AFTER);
3410 
3411 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3412 		ASSERT(vaddr <=  pwp->wp_vaddr);
3413 
3414 		if (flags & WA_READ)
3415 			pwp->wp_read--;
3416 		if (flags & WA_WRITE)
3417 			pwp->wp_write--;
3418 		if (flags & WA_EXEC)
3419 			pwp->wp_exec--;
3420 
3421 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3422 			/*
3423 			 * Reset the hat layer's protections on this page.
3424 			 */
3425 			if (pwp->wp_oprot != 0) {
3426 				uint_t prot = pwp->wp_oprot;
3427 
3428 				if (pwp->wp_read)
3429 					prot &=
3430 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3431 				if (pwp->wp_write)
3432 					prot &= ~PROT_WRITE;
3433 				if (pwp->wp_exec)
3434 					prot &=
3435 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3436 				if (!(pwp->wp_flags & WP_NOWATCH) &&
3437 				    pwp->wp_prot != prot &&
3438 				    (pwp->wp_flags & WP_SETPROT) == 0) {
3439 					pwp->wp_flags |= WP_SETPROT;
3440 					pwp->wp_list = p->p_wprot;
3441 					p->p_wprot = pwp;
3442 				}
3443 				pwp->wp_prot = (uchar_t)prot;
3444 			}
3445 		} else {
3446 			/*
3447 			 * No watched areas remain in this page.
3448 			 * Reset everything to normal.
3449 			 */
3450 			if (pwp->wp_oprot != 0) {
3451 				pwp->wp_prot = pwp->wp_oprot;
3452 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3453 					pwp->wp_flags |= WP_SETPROT;
3454 					pwp->wp_list = p->p_wprot;
3455 					p->p_wprot = pwp;
3456 				}
3457 			}
3458 		}
3459 
3460 		pwp = AVL_NEXT(tree, pwp);
3461 	}
3462 
3463 	AS_LOCK_EXIT(as, &as->a_lock);
3464 }
3465 
3466 /*
3467  * Return the original protections for the specified page.
3468  */
3469 static void
3470 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3471 {
3472 	struct watched_page *pwp;
3473 	struct watched_page tpw;
3474 
3475 	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3476 
3477 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3478 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3479 		*prot = pwp->wp_oprot;
3480 }
3481 
3482 static prpagev_t *
3483 pr_pagev_create(struct seg *seg, int check_noreserve)
3484 {
3485 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3486 	size_t total_pages = seg_pages(seg);
3487 
3488 	/*
3489 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3490 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3491 	 * to about a megabyte of kernel heap by default.
3492 	 */
3493 	pagev->pg_npages = MIN(total_pages, pagev_lim);
3494 	pagev->pg_pnbase = 0;
3495 
3496 	pagev->pg_protv =
3497 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3498 
3499 	if (check_noreserve)
3500 		pagev->pg_incore =
3501 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3502 	else
3503 		pagev->pg_incore = NULL;
3504 
3505 	return (pagev);
3506 }
3507 
3508 static void
3509 pr_pagev_destroy(prpagev_t *pagev)
3510 {
3511 	if (pagev->pg_incore != NULL)
3512 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3513 
3514 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3515 	kmem_free(pagev, sizeof (prpagev_t));
3516 }
3517 
3518 static caddr_t
3519 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3520 {
3521 	ulong_t lastpg = seg_page(seg, eaddr - 1);
3522 	ulong_t pn, pnlim;
3523 	caddr_t saddr;
3524 	size_t len;
3525 
3526 	ASSERT(addr >= seg->s_base && addr <= eaddr);
3527 
3528 	if (addr == eaddr)
3529 		return (eaddr);
3530 
3531 refill:
3532 	ASSERT(addr < eaddr);
3533 	pagev->pg_pnbase = seg_page(seg, addr);
3534 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3535 	saddr = addr;
3536 
3537 	if (lastpg < pnlim)
3538 		len = (size_t)(eaddr - addr);
3539 	else
3540 		len = pagev->pg_npages * PAGESIZE;
3541 
3542 	if (pagev->pg_incore != NULL) {
3543 		/*
3544 		 * INCORE cleverly has different semantics than GETPROT:
3545 		 * it returns info on pages up to but NOT including addr + len.
3546 		 */
3547 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3548 		pn = pagev->pg_pnbase;
3549 
3550 		do {
3551 			/*
3552 			 * Guilty knowledge here:  We know that segvn_incore
3553 			 * returns more than just the low-order bit that
3554 			 * indicates the page is actually in memory.  If any
3555 			 * bits are set, then the page has backing store.
3556 			 */
3557 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3558 				goto out;
3559 
3560 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3561 
3562 		/*
3563 		 * If we examined all the pages in the vector but we're not
3564 		 * at the end of the segment, take another lap.
3565 		 */
3566 		if (addr < eaddr)
3567 			goto refill;
3568 	}
3569 
3570 	/*
3571 	 * Need to take len - 1 because addr + len is the address of the
3572 	 * first byte of the page just past the end of what we want.
3573 	 */
3574 out:
3575 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3576 	return (addr);
3577 }
3578 
3579 static caddr_t
3580 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3581     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3582 {
3583 	/*
3584 	 * Our starting address is either the specified address, or the base
3585 	 * address from the start of the pagev.  If the latter is greater,
3586 	 * this means a previous call to pr_pagev_fill has already scanned
3587 	 * further than the end of the previous mapping.
3588 	 */
3589 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3590 	caddr_t addr = MAX(*saddrp, base);
3591 	ulong_t pn = seg_page(seg, addr);
3592 	uint_t prot, nprot;
3593 
3594 	/*
3595 	 * If we're dealing with noreserve pages, then advance addr to
3596 	 * the address of the next page which has backing store.
3597 	 */
3598 	if (pagev->pg_incore != NULL) {
3599 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3600 			if ((addr += PAGESIZE) == eaddr) {
3601 				*saddrp = addr;
3602 				prot = 0;
3603 				goto out;
3604 			}
3605 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3606 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3607 				if (addr == eaddr) {
3608 					*saddrp = addr;
3609 					prot = 0;
3610 					goto out;
3611 				}
3612 				pn = seg_page(seg, addr);
3613 			}
3614 		}
3615 	}
3616 
3617 	/*
3618 	 * Get the protections on the page corresponding to addr.
3619 	 */
3620 	pn = seg_page(seg, addr);
3621 	ASSERT(pn >= pagev->pg_pnbase);
3622 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3623 
3624 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3625 	getwatchprot(seg->s_as, addr, &prot);
3626 	*saddrp = addr;
3627 
3628 	/*
3629 	 * Now loop until we find a backed page with different protections
3630 	 * or we reach the end of this segment.
3631 	 */
3632 	while ((addr += PAGESIZE) < eaddr) {
3633 		/*
3634 		 * If pn has advanced to the page number following what we
3635 		 * have information on, refill the page vector and reset
3636 		 * addr and pn.  If pr_pagev_fill does not return the
3637 		 * address of the next page, we have a discontiguity and
3638 		 * thus have reached the end of the current mapping.
3639 		 */
3640 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3641 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3642 			if (naddr != addr)
3643 				goto out;
3644 			pn = seg_page(seg, addr);
3645 		}
3646 
3647 		/*
3648 		 * The previous page's protections are in prot, and it has
3649 		 * backing.  If this page is MAP_NORESERVE and has no backing,
3650 		 * then end this mapping and return the previous protections.
3651 		 */
3652 		if (pagev->pg_incore != NULL &&
3653 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3654 			break;
3655 
3656 		/*
3657 		 * Otherwise end the mapping if this page's protections (nprot)
3658 		 * are different than those in the previous page (prot).
3659 		 */
3660 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3661 		getwatchprot(seg->s_as, addr, &nprot);
3662 
3663 		if (nprot != prot)
3664 			break;
3665 	}
3666 
3667 out:
3668 	*protp = prot;
3669 	return (addr);
3670 }
3671 
3672 size_t
3673 pr_getsegsize(struct seg *seg, int reserved)
3674 {
3675 	size_t size = seg->s_size;
3676 
3677 	/*
3678 	 * If we're interested in the reserved space, return the size of the
3679 	 * segment itself.  Everything else in this function is a special case
3680 	 * to determine the actual underlying size of various segment types.
3681 	 */
3682 	if (reserved)
3683 		return (size);
3684 
3685 	/*
3686 	 * If this is a segvn mapping of a regular file, return the smaller
3687 	 * of the segment size and the remaining size of the file beyond
3688 	 * the file offset corresponding to seg->s_base.
3689 	 */
3690 	if (seg->s_ops == &segvn_ops) {
3691 		vattr_t vattr;
3692 		vnode_t *vp;
3693 
3694 		vattr.va_mask = AT_SIZE;
3695 
3696 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3697 		    vp != NULL && vp->v_type == VREG &&
3698 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3699 
3700 			u_offset_t fsize = vattr.va_size;
3701 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3702 
3703 			if (fsize < offset)
3704 				fsize = 0;
3705 			else
3706 				fsize -= offset;
3707 
3708 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3709 
3710 			if (fsize < (u_offset_t)size)
3711 				size = (size_t)fsize;
3712 		}
3713 
3714 		return (size);
3715 	}
3716 
3717 	/*
3718 	 * If this is an ISM shared segment, don't include pages that are
3719 	 * beyond the real size of the spt segment that backs it.
3720 	 */
3721 	if (seg->s_ops == &segspt_shmops)
3722 		return (MIN(spt_realsize(seg), size));
3723 
3724 	/*
3725 	 * If this is segment is a mapping from /dev/null, then this is a
3726 	 * reservation of virtual address space and has no actual size.
3727 	 * Such segments are backed by segdev and have type set to neither
3728 	 * MAP_SHARED nor MAP_PRIVATE.
3729 	 */
3730 	if (seg->s_ops == &segdev_ops &&
3731 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3732 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3733 		return (0);
3734 
3735 	/*
3736 	 * If this segment doesn't match one of the special types we handle,
3737 	 * just return the size of the segment itself.
3738 	 */
3739 	return (size);
3740 }
3741 
3742 uint_t
3743 pr_getprot(struct seg *seg, int reserved, void **tmp,
3744 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3745 {
3746 	struct as *as = seg->s_as;
3747 
3748 	caddr_t saddr = *saddrp;
3749 	caddr_t naddr;
3750 
3751 	int check_noreserve;
3752 	uint_t prot;
3753 
3754 	union {
3755 		struct segvn_data *svd;
3756 		struct segdev_data *sdp;
3757 		void *data;
3758 	} s;
3759 
3760 	s.data = seg->s_data;
3761 
3762 	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3763 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3764 	ASSERT(eaddr <= seg->s_base + seg->s_size);
3765 
3766 	/*
3767 	 * Don't include MAP_NORESERVE pages in the address range
3768 	 * unless their mappings have actually materialized.
3769 	 * We cheat by knowing that segvn is the only segment
3770 	 * driver that supports MAP_NORESERVE.
3771 	 */
3772 	check_noreserve =
3773 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3774 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3775 	    (s.svd->flags & MAP_NORESERVE));
3776 
3777 	/*
3778 	 * Examine every page only as a last resort.  We use guilty knowledge
3779 	 * of segvn and segdev to avoid this: if there are no per-page
3780 	 * protections present in the segment and we don't care about
3781 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3782 	 */
3783 	if (!check_noreserve && saddr == seg->s_base &&
3784 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3785 		prot = s.svd->prot;
3786 		getwatchprot(as, saddr, &prot);
3787 		naddr = eaddr;
3788 
3789 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3790 	    s.sdp != NULL && s.sdp->pageprot == 0) {
3791 		prot = s.sdp->prot;
3792 		getwatchprot(as, saddr, &prot);
3793 		naddr = eaddr;
3794 
3795 	} else {
3796 		prpagev_t *pagev;
3797 
3798 		/*
3799 		 * If addr is sitting at the start of the segment, then
3800 		 * create a page vector to store protection and incore
3801 		 * information for pages in the segment, and fill it.
3802 		 * Otherwise, we expect *tmp to address the prpagev_t
3803 		 * allocated by a previous call to this function.
3804 		 */
3805 		if (saddr == seg->s_base) {
3806 			pagev = pr_pagev_create(seg, check_noreserve);
3807 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3808 
3809 			ASSERT(*tmp == NULL);
3810 			*tmp = pagev;
3811 
3812 			ASSERT(saddr <= eaddr);
3813 			*saddrp = saddr;
3814 
3815 			if (saddr == eaddr) {
3816 				naddr = saddr;
3817 				prot = 0;
3818 				goto out;
3819 			}
3820 
3821 		} else {
3822 			ASSERT(*tmp != NULL);
3823 			pagev = (prpagev_t *)*tmp;
3824 		}
3825 
3826 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3827 		ASSERT(naddr <= eaddr);
3828 	}
3829 
3830 out:
3831 	if (naddr == eaddr)
3832 		pr_getprot_done(tmp);
3833 	*naddrp = naddr;
3834 	return (prot);
3835 }
3836 
3837 void
3838 pr_getprot_done(void **tmp)
3839 {
3840 	if (*tmp != NULL) {
3841 		pr_pagev_destroy((prpagev_t *)*tmp);
3842 		*tmp = NULL;
3843 	}
3844 }
3845 
3846 /*
3847  * Return true iff the vnode is a /proc file from the object directory.
3848  */
3849 int
3850 pr_isobject(vnode_t *vp)
3851 {
3852 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3853 }
3854 
3855 /*
3856  * Return true iff the vnode is a /proc file opened by the process itself.
3857  */
3858 int
3859 pr_isself(vnode_t *vp)
3860 {
3861 	/*
3862 	 * XXX: To retain binary compatibility with the old
3863 	 * ioctl()-based version of /proc, we exempt self-opens
3864 	 * of /proc/<pid> from being marked close-on-exec.
3865 	 */
3866 	return (vn_matchops(vp, prvnodeops) &&
3867 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3868 	    VTOP(vp)->pr_type != PR_PIDDIR);
3869 }
3870 
3871 static ssize_t
3872 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3873 {
3874 	ssize_t pagesize, hatsize;
3875 
3876 	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3877 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3878 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3879 	ASSERT(saddr < eaddr);
3880 
3881 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3882 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3883 	ASSERT(pagesize != 0);
3884 
3885 	if (pagesize == -1)
3886 		pagesize = PAGESIZE;
3887 
3888 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3889 
3890 	while (saddr < eaddr) {
3891 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3892 			break;
3893 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3894 		saddr += pagesize;
3895 	}
3896 
3897 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3898 	return (hatsize);
3899 }
3900 
3901 /*
3902  * Return an array of structures with extended memory map information.
3903  * We allocate here; the caller must deallocate.
3904  */
3905 int
3906 prgetxmap(proc_t *p, list_t *iolhead)
3907 {
3908 	struct as *as = p->p_as;
3909 	prxmap_t *mp;
3910 	struct seg *seg;
3911 	struct seg *brkseg, *stkseg;
3912 	struct vnode *vp;
3913 	struct vattr vattr;
3914 	uint_t prot;
3915 
3916 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3917 
3918 	/*
3919 	 * Request an initial buffer size that doesn't waste memory
3920 	 * if the address space has only a small number of segments.
3921 	 */
3922 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3923 
3924 	if ((seg = AS_SEGFIRST(as)) == NULL)
3925 		return (0);
3926 
3927 	brkseg = break_seg(p);
3928 	stkseg = as_segat(as, prgetstackbase(p));
3929 
3930 	do {
3931 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3932 		caddr_t saddr, naddr, baddr;
3933 		void *tmp = NULL;
3934 		ssize_t psz;
3935 		char *parr;
3936 		uint64_t npages;
3937 		uint64_t pagenum;
3938 
3939 		/*
3940 		 * Segment loop part one: iterate from the base of the segment
3941 		 * to its end, pausing at each address boundary (baddr) between
3942 		 * ranges that have different virtual memory protections.
3943 		 */
3944 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3945 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3946 			ASSERT(baddr >= saddr && baddr <= eaddr);
3947 
3948 			/*
3949 			 * Segment loop part two: iterate from the current
3950 			 * position to the end of the protection boundary,
3951 			 * pausing at each address boundary (naddr) between
3952 			 * ranges that have different underlying page sizes.
3953 			 */
3954 			for (; saddr < baddr; saddr = naddr) {
3955 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3956 				ASSERT(naddr >= saddr && naddr <= baddr);
3957 
3958 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3959 
3960 				mp->pr_vaddr = (uintptr_t)saddr;
3961 				mp->pr_size = naddr - saddr;
3962 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3963 				mp->pr_mflags = 0;
3964 				if (prot & PROT_READ)
3965 					mp->pr_mflags |= MA_READ;
3966 				if (prot & PROT_WRITE)
3967 					mp->pr_mflags |= MA_WRITE;
3968 				if (prot & PROT_EXEC)
3969 					mp->pr_mflags |= MA_EXEC;
3970 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3971 					mp->pr_mflags |= MA_SHARED;
3972 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3973 					mp->pr_mflags |= MA_NORESERVE;
3974 				if (seg->s_ops == &segspt_shmops ||
3975 				    (seg->s_ops == &segvn_ops &&
3976 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3977 				    vp == NULL)))
3978 					mp->pr_mflags |= MA_ANON;
3979 				if (seg == brkseg)
3980 					mp->pr_mflags |= MA_BREAK;
3981 				else if (seg == stkseg)
3982 					mp->pr_mflags |= MA_STACK;
3983 				if (seg->s_ops == &segspt_shmops)
3984 					mp->pr_mflags |= MA_ISM | MA_SHM;
3985 
3986 				mp->pr_pagesize = PAGESIZE;
3987 				if (psz == -1) {
3988 					mp->pr_hatpagesize = 0;
3989 				} else {
3990 					mp->pr_hatpagesize = psz;
3991 				}
3992 
3993 				/*
3994 				 * Manufacture a filename for the "object" dir.
3995 				 */
3996 				mp->pr_dev = PRNODEV;
3997 				vattr.va_mask = AT_FSID|AT_NODEID;
3998 				if (seg->s_ops == &segvn_ops &&
3999 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4000 				    vp != NULL && vp->v_type == VREG &&
4001 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4002 				    NULL) == 0) {
4003 					mp->pr_dev = vattr.va_fsid;
4004 					mp->pr_ino = vattr.va_nodeid;
4005 					if (vp == p->p_exec)
4006 						(void) strcpy(mp->pr_mapname,
4007 						    "a.out");
4008 					else
4009 						pr_object_name(mp->pr_mapname,
4010 						    vp, &vattr);
4011 				}
4012 
4013 				/*
4014 				 * Get the SysV shared memory id, if any.
4015 				 */
4016 				if ((mp->pr_mflags & MA_SHARED) &&
4017 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4018 				    seg->s_base)) != SHMID_NONE) {
4019 					if (mp->pr_shmid == SHMID_FREE)
4020 						mp->pr_shmid = -1;
4021 
4022 					mp->pr_mflags |= MA_SHM;
4023 				} else {
4024 					mp->pr_shmid = -1;
4025 				}
4026 
4027 				npages = ((uintptr_t)(naddr - saddr)) >>
4028 				    PAGESHIFT;
4029 				parr = kmem_zalloc(npages, KM_SLEEP);
4030 
4031 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4032 
4033 				for (pagenum = 0; pagenum < npages; pagenum++) {
4034 					if (parr[pagenum] & SEG_PAGE_INCORE)
4035 						mp->pr_rss++;
4036 					if (parr[pagenum] & SEG_PAGE_ANON)
4037 						mp->pr_anon++;
4038 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4039 						mp->pr_locked++;
4040 				}
4041 				kmem_free(parr, npages);
4042 			}
4043 		}
4044 		ASSERT(tmp == NULL);
4045 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4046 
4047 	return (0);
4048 }
4049 
4050 /*
4051  * Return the process's credentials.  We don't need a 32-bit equivalent of
4052  * this function because prcred_t and prcred32_t are actually the same.
4053  */
4054 void
4055 prgetcred(proc_t *p, prcred_t *pcrp)
4056 {
4057 	mutex_enter(&p->p_crlock);
4058 	cred2prcred(p->p_cred, pcrp);
4059 	mutex_exit(&p->p_crlock);
4060 }
4061 
4062 /*
4063  * Compute actual size of the prpriv_t structure.
4064  */
4065 
4066 size_t
4067 prgetprivsize(void)
4068 {
4069 	return (priv_prgetprivsize(NULL));
4070 }
4071 
4072 /*
4073  * Return the process's privileges.  We don't need a 32-bit equivalent of
4074  * this function because prpriv_t and prpriv32_t are actually the same.
4075  */
4076 void
4077 prgetpriv(proc_t *p, prpriv_t *pprp)
4078 {
4079 	mutex_enter(&p->p_crlock);
4080 	cred2prpriv(p->p_cred, pprp);
4081 	mutex_exit(&p->p_crlock);
4082 }
4083 
4084 #ifdef _SYSCALL32_IMPL
4085 /*
4086  * Return an array of structures with HAT memory map information.
4087  * We allocate here; the caller must deallocate.
4088  */
4089 int
4090 prgetxmap32(proc_t *p, list_t *iolhead)
4091 {
4092 	struct as *as = p->p_as;
4093 	prxmap32_t *mp;
4094 	struct seg *seg;
4095 	struct seg *brkseg, *stkseg;
4096 	struct vnode *vp;
4097 	struct vattr vattr;
4098 	uint_t prot;
4099 
4100 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4101 
4102 	/*
4103 	 * Request an initial buffer size that doesn't waste memory
4104 	 * if the address space has only a small number of segments.
4105 	 */
4106 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4107 
4108 	if ((seg = AS_SEGFIRST(as)) == NULL)
4109 		return (0);
4110 
4111 	brkseg = break_seg(p);
4112 	stkseg = as_segat(as, prgetstackbase(p));
4113 
4114 	do {
4115 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4116 		caddr_t saddr, naddr, baddr;
4117 		void *tmp = NULL;
4118 		ssize_t psz;
4119 		char *parr;
4120 		uint64_t npages;
4121 		uint64_t pagenum;
4122 
4123 		/*
4124 		 * Segment loop part one: iterate from the base of the segment
4125 		 * to its end, pausing at each address boundary (baddr) between
4126 		 * ranges that have different virtual memory protections.
4127 		 */
4128 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4129 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4130 			ASSERT(baddr >= saddr && baddr <= eaddr);
4131 
4132 			/*
4133 			 * Segment loop part two: iterate from the current
4134 			 * position to the end of the protection boundary,
4135 			 * pausing at each address boundary (naddr) between
4136 			 * ranges that have different underlying page sizes.
4137 			 */
4138 			for (; saddr < baddr; saddr = naddr) {
4139 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4140 				ASSERT(naddr >= saddr && naddr <= baddr);
4141 
4142 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4143 
4144 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4145 				mp->pr_size = (size32_t)(naddr - saddr);
4146 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4147 				mp->pr_mflags = 0;
4148 				if (prot & PROT_READ)
4149 					mp->pr_mflags |= MA_READ;
4150 				if (prot & PROT_WRITE)
4151 					mp->pr_mflags |= MA_WRITE;
4152 				if (prot & PROT_EXEC)
4153 					mp->pr_mflags |= MA_EXEC;
4154 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4155 					mp->pr_mflags |= MA_SHARED;
4156 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4157 					mp->pr_mflags |= MA_NORESERVE;
4158 				if (seg->s_ops == &segspt_shmops ||
4159 				    (seg->s_ops == &segvn_ops &&
4160 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4161 				    vp == NULL)))
4162 					mp->pr_mflags |= MA_ANON;
4163 				if (seg == brkseg)
4164 					mp->pr_mflags |= MA_BREAK;
4165 				else if (seg == stkseg)
4166 					mp->pr_mflags |= MA_STACK;
4167 				if (seg->s_ops == &segspt_shmops)
4168 					mp->pr_mflags |= MA_ISM | MA_SHM;
4169 
4170 				mp->pr_pagesize = PAGESIZE;
4171 				if (psz == -1) {
4172 					mp->pr_hatpagesize = 0;
4173 				} else {
4174 					mp->pr_hatpagesize = psz;
4175 				}
4176 
4177 				/*
4178 				 * Manufacture a filename for the "object" dir.
4179 				 */
4180 				mp->pr_dev = PRNODEV32;
4181 				vattr.va_mask = AT_FSID|AT_NODEID;
4182 				if (seg->s_ops == &segvn_ops &&
4183 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4184 				    vp != NULL && vp->v_type == VREG &&
4185 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4186 				    NULL) == 0) {
4187 					(void) cmpldev(&mp->pr_dev,
4188 					    vattr.va_fsid);
4189 					mp->pr_ino = vattr.va_nodeid;
4190 					if (vp == p->p_exec)
4191 						(void) strcpy(mp->pr_mapname,
4192 						    "a.out");
4193 					else
4194 						pr_object_name(mp->pr_mapname,
4195 						    vp, &vattr);
4196 				}
4197 
4198 				/*
4199 				 * Get the SysV shared memory id, if any.
4200 				 */
4201 				if ((mp->pr_mflags & MA_SHARED) &&
4202 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4203 				    seg->s_base)) != SHMID_NONE) {
4204 					if (mp->pr_shmid == SHMID_FREE)
4205 						mp->pr_shmid = -1;
4206 
4207 					mp->pr_mflags |= MA_SHM;
4208 				} else {
4209 					mp->pr_shmid = -1;
4210 				}
4211 
4212 				npages = ((uintptr_t)(naddr - saddr)) >>
4213 				    PAGESHIFT;
4214 				parr = kmem_zalloc(npages, KM_SLEEP);
4215 
4216 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4217 
4218 				for (pagenum = 0; pagenum < npages; pagenum++) {
4219 					if (parr[pagenum] & SEG_PAGE_INCORE)
4220 						mp->pr_rss++;
4221 					if (parr[pagenum] & SEG_PAGE_ANON)
4222 						mp->pr_anon++;
4223 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4224 						mp->pr_locked++;
4225 				}
4226 				kmem_free(parr, npages);
4227 			}
4228 		}
4229 		ASSERT(tmp == NULL);
4230 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4231 
4232 	return (0);
4233 }
4234 #endif	/* _SYSCALL32_IMPL */
4235