xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 82049ff560eed6fbdf4cf222d894467f5809f9b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2017, Joyent, Inc.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <sys/flock_impl.h>
70 #include <sys/stropts.h>
71 #include <sys/strsubr.h>
72 #include <sys/pathname.h>
73 #include <sys/mode.h>
74 #include <sys/socketvar.h>
75 #include <sys/autoconf.h>
76 #include <sys/dtrace.h>
77 #include <sys/timod.h>
78 #include <sys/fs/namenode.h>
79 #include <netinet/udp.h>
80 #include <netinet/tcp.h>
81 #include <inet/cc.h>
82 #include <vm/as.h>
83 #include <vm/rm.h>
84 #include <vm/seg.h>
85 #include <vm/seg_vn.h>
86 #include <vm/seg_dev.h>
87 #include <vm/seg_spt.h>
88 #include <vm/page.h>
89 #include <sys/vmparam.h>
90 #include <sys/swap.h>
91 #include <fs/proc/prdata.h>
92 #include <sys/task.h>
93 #include <sys/project.h>
94 #include <sys/contract_impl.h>
95 #include <sys/contract/process.h>
96 #include <sys/contract/process_impl.h>
97 #include <sys/schedctl.h>
98 #include <sys/pool.h>
99 #include <sys/zone.h>
100 #include <sys/atomic.h>
101 #include <sys/sdt.h>
102 
103 #define	MAX_ITERS_SPIN	5
104 
105 typedef struct prpagev {
106 	uint_t *pg_protv;	/* vector of page permissions */
107 	char *pg_incore;	/* vector of incore flags */
108 	size_t pg_npages;	/* number of pages in protv and incore */
109 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
110 } prpagev_t;
111 
112 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
113 
114 extern struct seg_ops segdev_ops;	/* needs a header file */
115 extern struct seg_ops segspt_shmops;	/* needs a header file */
116 
117 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
118 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
119 
120 /*
121  * Choose an lwp from the complete set of lwps for the process.
122  * This is called for any operation applied to the process
123  * file descriptor that requires an lwp to operate upon.
124  *
125  * Returns a pointer to the thread for the selected LWP,
126  * and with the dispatcher lock held for the thread.
127  *
128  * The algorithm for choosing an lwp is critical for /proc semantics;
129  * don't touch this code unless you know all of the implications.
130  */
131 kthread_t *
132 prchoose(proc_t *p)
133 {
134 	kthread_t *t;
135 	kthread_t *t_onproc = NULL;	/* running on processor */
136 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
137 	kthread_t *t_sleep = NULL;	/* sleeping */
138 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
139 	kthread_t *t_susp = NULL;	/* suspended stop */
140 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
141 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
142 	kthread_t *t_req = NULL;	/* requested stop */
143 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
144 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
145 
146 	ASSERT(MUTEX_HELD(&p->p_lock));
147 
148 	/*
149 	 * If the agent lwp exists, it takes precedence over all others.
150 	 */
151 	if ((t = p->p_agenttp) != NULL) {
152 		thread_lock(t);
153 		return (t);
154 	}
155 
156 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
157 		return (t);
158 	do {		/* for eacn lwp in the process */
159 		if (VSTOPPED(t)) {	/* virtually stopped */
160 			if (t_req == NULL)
161 				t_req = t;
162 			continue;
163 		}
164 
165 		/* If this is a process kernel thread, ignore it. */
166 		if ((t->t_proc_flag & TP_KTHREAD) != 0) {
167 			continue;
168 		}
169 
170 		thread_lock(t);		/* make sure thread is in good state */
171 		switch (t->t_state) {
172 		default:
173 			panic("prchoose: bad thread state %d, thread 0x%p",
174 			    t->t_state, (void *)t);
175 			/*NOTREACHED*/
176 		case TS_SLEEP:
177 			/* this is filthy */
178 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
179 			    t->t_wchan0 == NULL) {
180 				if (t_hold == NULL)
181 					t_hold = t;
182 			} else {
183 				if (t_sleep == NULL)
184 					t_sleep = t;
185 			}
186 			break;
187 		case TS_RUN:
188 		case TS_WAIT:
189 			if (t_run == NULL)
190 				t_run = t;
191 			break;
192 		case TS_ONPROC:
193 			if (t_onproc == NULL)
194 				t_onproc = t;
195 			break;
196 		case TS_ZOMB:		/* last possible choice */
197 			break;
198 		case TS_STOPPED:
199 			switch (t->t_whystop) {
200 			case PR_SUSPENDED:
201 				if (t_susp == NULL)
202 					t_susp = t;
203 				break;
204 			case PR_JOBCONTROL:
205 				if (t->t_proc_flag & TP_PRSTOP) {
206 					if (t_jdstop == NULL)
207 						t_jdstop = t;
208 				} else {
209 					if (t_jstop == NULL)
210 						t_jstop = t;
211 				}
212 				break;
213 			case PR_REQUESTED:
214 				if (t->t_dtrace_stop && t_dtrace == NULL)
215 					t_dtrace = t;
216 				else if (t_req == NULL)
217 					t_req = t;
218 				break;
219 			case PR_SYSENTRY:
220 			case PR_SYSEXIT:
221 			case PR_SIGNALLED:
222 			case PR_FAULTED:
223 				/*
224 				 * Make an lwp calling exit() be the
225 				 * last lwp seen in the process.
226 				 */
227 				if (t_istop == NULL ||
228 				    (t_istop->t_whystop == PR_SYSENTRY &&
229 				    t_istop->t_whatstop == SYS_exit))
230 					t_istop = t;
231 				break;
232 			case PR_CHECKPOINT:	/* can't happen? */
233 				break;
234 			default:
235 				panic("prchoose: bad t_whystop %d, thread 0x%p",
236 				    t->t_whystop, (void *)t);
237 				/*NOTREACHED*/
238 			}
239 			break;
240 		}
241 		thread_unlock(t);
242 	} while ((t = t->t_forw) != p->p_tlist);
243 
244 	if (t_onproc)
245 		t = t_onproc;
246 	else if (t_run)
247 		t = t_run;
248 	else if (t_sleep)
249 		t = t_sleep;
250 	else if (t_jstop)
251 		t = t_jstop;
252 	else if (t_jdstop)
253 		t = t_jdstop;
254 	else if (t_istop)
255 		t = t_istop;
256 	else if (t_dtrace)
257 		t = t_dtrace;
258 	else if (t_req)
259 		t = t_req;
260 	else if (t_hold)
261 		t = t_hold;
262 	else if (t_susp)
263 		t = t_susp;
264 	else			/* TS_ZOMB */
265 		t = p->p_tlist;
266 
267 	if (t != NULL)
268 		thread_lock(t);
269 	return (t);
270 }
271 
272 /*
273  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
274  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
275  * on the /proc file descriptor.  Called from stop() when a traced
276  * process stops on an event of interest.  Also called from exit()
277  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
278  */
279 void
280 prnotify(struct vnode *vp)
281 {
282 	prcommon_t *pcp = VTOP(vp)->pr_common;
283 
284 	mutex_enter(&pcp->prc_mutex);
285 	cv_broadcast(&pcp->prc_wait);
286 	mutex_exit(&pcp->prc_mutex);
287 	if (pcp->prc_flags & PRC_POLL) {
288 		/*
289 		 * We call pollwakeup() with POLLHUP to ensure that
290 		 * the pollers are awakened even if they are polling
291 		 * for nothing (i.e., waiting for the process to exit).
292 		 * This enables the use of the PRC_POLL flag for optimization
293 		 * (we can turn off PRC_POLL only if we know no pollers remain).
294 		 */
295 		pcp->prc_flags &= ~PRC_POLL;
296 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
297 	}
298 }
299 
300 /* called immediately below, in prfree() */
301 static void
302 prfreenotify(vnode_t *vp)
303 {
304 	prnode_t *pnp;
305 	prcommon_t *pcp;
306 
307 	while (vp != NULL) {
308 		pnp = VTOP(vp);
309 		pcp = pnp->pr_common;
310 		ASSERT(pcp->prc_thread == NULL);
311 		pcp->prc_proc = NULL;
312 		/*
313 		 * We can't call prnotify() here because we are holding
314 		 * pidlock.  We assert that there is no need to.
315 		 */
316 		mutex_enter(&pcp->prc_mutex);
317 		cv_broadcast(&pcp->prc_wait);
318 		mutex_exit(&pcp->prc_mutex);
319 		ASSERT(!(pcp->prc_flags & PRC_POLL));
320 
321 		vp = pnp->pr_next;
322 		pnp->pr_next = NULL;
323 	}
324 }
325 
326 /*
327  * Called from a hook in freeproc() when a traced process is removed
328  * from the process table.  The proc-table pointers of all associated
329  * /proc vnodes are cleared to indicate that the process has gone away.
330  */
331 void
332 prfree(proc_t *p)
333 {
334 	uint_t slot = p->p_slot;
335 
336 	ASSERT(MUTEX_HELD(&pidlock));
337 
338 	/*
339 	 * Block the process against /proc so it can be freed.
340 	 * It cannot be freed while locked by some controlling process.
341 	 * Lock ordering:
342 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
343 	 */
344 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
345 	mutex_enter(&p->p_lock);
346 	while (p->p_proc_flag & P_PR_LOCK) {
347 		mutex_exit(&pr_pidlock);
348 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
349 		mutex_exit(&p->p_lock);
350 		mutex_enter(&pr_pidlock);
351 		mutex_enter(&p->p_lock);
352 	}
353 
354 	ASSERT(p->p_tlist == NULL);
355 
356 	prfreenotify(p->p_plist);
357 	p->p_plist = NULL;
358 
359 	prfreenotify(p->p_trace);
360 	p->p_trace = NULL;
361 
362 	/*
363 	 * We broadcast to wake up everyone waiting for this process.
364 	 * No one can reach this process from this point on.
365 	 */
366 	cv_broadcast(&pr_pid_cv[slot]);
367 
368 	mutex_exit(&p->p_lock);
369 	mutex_exit(&pr_pidlock);
370 }
371 
372 /*
373  * Called from a hook in exit() when a traced process is becoming a zombie.
374  */
375 void
376 prexit(proc_t *p)
377 {
378 	ASSERT(MUTEX_HELD(&p->p_lock));
379 
380 	if (pr_watch_active(p)) {
381 		pr_free_watchpoints(p);
382 		watch_disable(curthread);
383 	}
384 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
385 	if (p->p_trace) {
386 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
387 		prnotify(p->p_trace);
388 	}
389 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
390 }
391 
392 /*
393  * Called when a thread calls lwp_exit().
394  */
395 void
396 prlwpexit(kthread_t *t)
397 {
398 	vnode_t *vp;
399 	prnode_t *pnp;
400 	prcommon_t *pcp;
401 	proc_t *p = ttoproc(t);
402 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
403 
404 	ASSERT(t == curthread);
405 	ASSERT(MUTEX_HELD(&p->p_lock));
406 
407 	/*
408 	 * The process must be blocked against /proc to do this safely.
409 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
410 	 * It is the caller's responsibility to have called prbarrier(p).
411 	 */
412 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
413 
414 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
415 		pnp = VTOP(vp);
416 		pcp = pnp->pr_common;
417 		if (pcp->prc_thread == t) {
418 			pcp->prc_thread = NULL;
419 			pcp->prc_flags |= PRC_DESTROY;
420 		}
421 	}
422 
423 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
424 		pnp = VTOP(vp);
425 		pcp = pnp->pr_common;
426 		pcp->prc_thread = NULL;
427 		pcp->prc_flags |= PRC_DESTROY;
428 		prnotify(vp);
429 	}
430 
431 	if (p->p_trace)
432 		prnotify(p->p_trace);
433 }
434 
435 /*
436  * Called when a zombie thread is joined or when a
437  * detached lwp exits.  Called from lwp_hash_out().
438  */
439 void
440 prlwpfree(proc_t *p, lwpent_t *lep)
441 {
442 	vnode_t *vp;
443 	prnode_t *pnp;
444 	prcommon_t *pcp;
445 
446 	ASSERT(MUTEX_HELD(&p->p_lock));
447 
448 	/*
449 	 * The process must be blocked against /proc to do this safely.
450 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
451 	 * It is the caller's responsibility to have called prbarrier(p).
452 	 */
453 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
454 
455 	vp = lep->le_trace;
456 	lep->le_trace = NULL;
457 	while (vp) {
458 		prnotify(vp);
459 		pnp = VTOP(vp);
460 		pcp = pnp->pr_common;
461 		ASSERT(pcp->prc_thread == NULL &&
462 		    (pcp->prc_flags & PRC_DESTROY));
463 		pcp->prc_tslot = -1;
464 		vp = pnp->pr_next;
465 		pnp->pr_next = NULL;
466 	}
467 
468 	if (p->p_trace)
469 		prnotify(p->p_trace);
470 }
471 
472 /*
473  * Called from a hook in exec() when a thread starts exec().
474  */
475 void
476 prexecstart(void)
477 {
478 	proc_t *p = ttoproc(curthread);
479 	klwp_t *lwp = ttolwp(curthread);
480 
481 	/*
482 	 * The P_PR_EXEC flag blocks /proc operations for
483 	 * the duration of the exec().
484 	 * We can't start exec() while the process is
485 	 * locked by /proc, so we call prbarrier().
486 	 * lwp_nostop keeps the process from being stopped
487 	 * via job control for the duration of the exec().
488 	 */
489 
490 	ASSERT(MUTEX_HELD(&p->p_lock));
491 	prbarrier(p);
492 	lwp->lwp_nostop++;
493 	p->p_proc_flag |= P_PR_EXEC;
494 }
495 
496 /*
497  * Called from a hook in exec() when a thread finishes exec().
498  * The thread may or may not have succeeded.  Some other thread
499  * may have beat it to the punch.
500  */
501 void
502 prexecend(void)
503 {
504 	proc_t *p = ttoproc(curthread);
505 	klwp_t *lwp = ttolwp(curthread);
506 	vnode_t *vp;
507 	prnode_t *pnp;
508 	prcommon_t *pcp;
509 	model_t model = p->p_model;
510 	id_t tid = curthread->t_tid;
511 	int tslot = curthread->t_dslot;
512 
513 	ASSERT(MUTEX_HELD(&p->p_lock));
514 
515 	lwp->lwp_nostop--;
516 	if (p->p_flag & SEXITLWPS) {
517 		/*
518 		 * We are on our way to exiting because some
519 		 * other thread beat us in the race to exec().
520 		 * Don't clear the P_PR_EXEC flag in this case.
521 		 */
522 		return;
523 	}
524 
525 	/*
526 	 * Wake up anyone waiting in /proc for the process to complete exec().
527 	 */
528 	p->p_proc_flag &= ~P_PR_EXEC;
529 	if ((vp = p->p_trace) != NULL) {
530 		pcp = VTOP(vp)->pr_common;
531 		mutex_enter(&pcp->prc_mutex);
532 		cv_broadcast(&pcp->prc_wait);
533 		mutex_exit(&pcp->prc_mutex);
534 		for (; vp != NULL; vp = pnp->pr_next) {
535 			pnp = VTOP(vp);
536 			pnp->pr_common->prc_datamodel = model;
537 		}
538 	}
539 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
540 		/*
541 		 * We dealt with the process common above.
542 		 */
543 		ASSERT(p->p_trace != NULL);
544 		pcp = VTOP(vp)->pr_common;
545 		mutex_enter(&pcp->prc_mutex);
546 		cv_broadcast(&pcp->prc_wait);
547 		mutex_exit(&pcp->prc_mutex);
548 		for (; vp != NULL; vp = pnp->pr_next) {
549 			pnp = VTOP(vp);
550 			pcp = pnp->pr_common;
551 			pcp->prc_datamodel = model;
552 			pcp->prc_tid = tid;
553 			pcp->prc_tslot = tslot;
554 		}
555 	}
556 }
557 
558 /*
559  * Called from a hook in relvm() just before freeing the address space.
560  * We free all the watched areas now.
561  */
562 void
563 prrelvm(void)
564 {
565 	proc_t *p = ttoproc(curthread);
566 
567 	mutex_enter(&p->p_lock);
568 	prbarrier(p);	/* block all other /proc operations */
569 	if (pr_watch_active(p)) {
570 		pr_free_watchpoints(p);
571 		watch_disable(curthread);
572 	}
573 	mutex_exit(&p->p_lock);
574 	pr_free_watched_pages(p);
575 }
576 
577 /*
578  * Called from hooks in exec-related code when a traced process
579  * attempts to exec(2) a setuid/setgid program or an unreadable
580  * file.  Rather than fail the exec we invalidate the associated
581  * /proc vnodes so that subsequent attempts to use them will fail.
582  *
583  * All /proc vnodes, except directory vnodes, are retained on a linked
584  * list (rooted at p_plist in the process structure) until last close.
585  *
586  * A controlling process must re-open the /proc files in order to
587  * regain control.
588  */
589 void
590 prinvalidate(struct user *up)
591 {
592 	kthread_t *t = curthread;
593 	proc_t *p = ttoproc(t);
594 	vnode_t *vp;
595 	prnode_t *pnp;
596 	int writers = 0;
597 
598 	mutex_enter(&p->p_lock);
599 	prbarrier(p);	/* block all other /proc operations */
600 
601 	/*
602 	 * At this moment, there can be only one lwp in the process.
603 	 */
604 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
605 
606 	/*
607 	 * Invalidate any currently active /proc vnodes.
608 	 */
609 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
610 		pnp = VTOP(vp);
611 		switch (pnp->pr_type) {
612 		case PR_PSINFO:		/* these files can read by anyone */
613 		case PR_LPSINFO:
614 		case PR_LWPSINFO:
615 		case PR_LWPDIR:
616 		case PR_LWPIDDIR:
617 		case PR_USAGE:
618 		case PR_LUSAGE:
619 		case PR_LWPUSAGE:
620 			break;
621 		default:
622 			pnp->pr_flags |= PR_INVAL;
623 			break;
624 		}
625 	}
626 	/*
627 	 * Wake up anyone waiting for the process or lwp.
628 	 * p->p_trace is guaranteed to be non-NULL if there
629 	 * are any open /proc files for this process.
630 	 */
631 	if ((vp = p->p_trace) != NULL) {
632 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
633 
634 		prnotify(vp);
635 		/*
636 		 * Are there any writers?
637 		 */
638 		if ((writers = pcp->prc_writers) != 0) {
639 			/*
640 			 * Clear the exclusive open flag (old /proc interface).
641 			 * Set prc_selfopens equal to prc_writers so that
642 			 * the next O_EXCL|O_WRITE open will succeed
643 			 * even with existing (though invalid) writers.
644 			 * prclose() must decrement prc_selfopens when
645 			 * the invalid files are closed.
646 			 */
647 			pcp->prc_flags &= ~PRC_EXCL;
648 			ASSERT(pcp->prc_selfopens <= writers);
649 			pcp->prc_selfopens = writers;
650 		}
651 	}
652 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
653 	while (vp != NULL) {
654 		/*
655 		 * We should not invalidate the lwpiddir vnodes,
656 		 * but the necessities of maintaining the old
657 		 * ioctl()-based version of /proc require it.
658 		 */
659 		pnp = VTOP(vp);
660 		pnp->pr_flags |= PR_INVAL;
661 		prnotify(vp);
662 		vp = pnp->pr_next;
663 	}
664 
665 	/*
666 	 * If any tracing flags are in effect and any vnodes are open for
667 	 * writing then set the requested-stop and run-on-last-close flags.
668 	 * Otherwise, clear all tracing flags.
669 	 */
670 	t->t_proc_flag &= ~TP_PAUSE;
671 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
672 		t->t_proc_flag |= TP_PRSTOP;
673 		aston(t);		/* so ISSIG will see the flag */
674 		p->p_proc_flag |= P_PR_RUNLCL;
675 	} else {
676 		premptyset(&up->u_entrymask);		/* syscalls */
677 		premptyset(&up->u_exitmask);
678 		up->u_systrap = 0;
679 		premptyset(&p->p_sigmask);		/* signals */
680 		premptyset(&p->p_fltmask);		/* faults */
681 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
682 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
683 		prnostep(ttolwp(t));
684 	}
685 
686 	mutex_exit(&p->p_lock);
687 }
688 
689 /*
690  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
691  * Return with pr_pidlock held in all cases.
692  * Return with p_lock held if the the process still exists.
693  * Return value is the process pointer if the process still exists, else NULL.
694  * If we lock the process, give ourself kernel priority to avoid deadlocks;
695  * this is undone in prunlock().
696  */
697 proc_t *
698 pr_p_lock(prnode_t *pnp)
699 {
700 	proc_t *p;
701 	prcommon_t *pcp;
702 
703 	mutex_enter(&pr_pidlock);
704 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
705 		return (NULL);
706 	mutex_enter(&p->p_lock);
707 	while (p->p_proc_flag & P_PR_LOCK) {
708 		/*
709 		 * This cv/mutex pair is persistent even if
710 		 * the process disappears while we sleep.
711 		 */
712 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
713 		kmutex_t *mp = &p->p_lock;
714 
715 		mutex_exit(&pr_pidlock);
716 		cv_wait(cv, mp);
717 		mutex_exit(mp);
718 		mutex_enter(&pr_pidlock);
719 		if (pcp->prc_proc == NULL)
720 			return (NULL);
721 		ASSERT(p == pcp->prc_proc);
722 		mutex_enter(&p->p_lock);
723 	}
724 	p->p_proc_flag |= P_PR_LOCK;
725 	return (p);
726 }
727 
728 /*
729  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
730  * This prevents any lwp of the process from disappearing and
731  * blocks most operations that a process can perform on itself.
732  * Returns 0 on success, a non-zero error number on failure.
733  *
734  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
735  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
736  *
737  * error returns:
738  *	ENOENT: process or lwp has disappeared or process is exiting
739  *		(or has become a zombie and zdisp == ZNO).
740  *	EAGAIN: procfs vnode has become invalid.
741  *	EINTR:  signal arrived while waiting for exec to complete.
742  */
743 int
744 prlock(prnode_t *pnp, int zdisp)
745 {
746 	prcommon_t *pcp;
747 	proc_t *p;
748 
749 again:
750 	pcp = pnp->pr_common;
751 	p = pr_p_lock(pnp);
752 	mutex_exit(&pr_pidlock);
753 
754 	/*
755 	 * Return ENOENT immediately if there is no process.
756 	 */
757 	if (p == NULL)
758 		return (ENOENT);
759 
760 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
761 
762 	/*
763 	 * Return ENOENT if process entered zombie state or is exiting
764 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
765 	 */
766 	if (zdisp == ZNO &&
767 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
768 		prunlock(pnp);
769 		return (ENOENT);
770 	}
771 
772 	/*
773 	 * If lwp-specific, check to see if lwp has disappeared.
774 	 */
775 	if (pcp->prc_flags & PRC_LWP) {
776 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
777 		    pcp->prc_tslot == -1) {
778 			prunlock(pnp);
779 			return (ENOENT);
780 		}
781 	}
782 
783 	/*
784 	 * Return EAGAIN if we have encountered a security violation.
785 	 * (The process exec'd a set-id or unreadable executable file.)
786 	 */
787 	if (pnp->pr_flags & PR_INVAL) {
788 		prunlock(pnp);
789 		return (EAGAIN);
790 	}
791 
792 	/*
793 	 * If process is undergoing an exec(), wait for
794 	 * completion and then start all over again.
795 	 */
796 	if (p->p_proc_flag & P_PR_EXEC) {
797 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
798 		mutex_enter(&pcp->prc_mutex);
799 		prunlock(pnp);
800 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
801 			mutex_exit(&pcp->prc_mutex);
802 			return (EINTR);
803 		}
804 		mutex_exit(&pcp->prc_mutex);
805 		goto again;
806 	}
807 
808 	/*
809 	 * We return holding p->p_lock.
810 	 */
811 	return (0);
812 }
813 
814 /*
815  * Undo prlock() and pr_p_lock().
816  * p->p_lock is still held; pr_pidlock is no longer held.
817  *
818  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
819  * if any, waiting for the flag to be dropped; it retains p->p_lock.
820  *
821  * prunlock() calls prunmark() and then drops p->p_lock.
822  */
823 void
824 prunmark(proc_t *p)
825 {
826 	ASSERT(p->p_proc_flag & P_PR_LOCK);
827 	ASSERT(MUTEX_HELD(&p->p_lock));
828 
829 	cv_signal(&pr_pid_cv[p->p_slot]);
830 	p->p_proc_flag &= ~P_PR_LOCK;
831 }
832 
833 void
834 prunlock(prnode_t *pnp)
835 {
836 	prcommon_t *pcp = pnp->pr_common;
837 	proc_t *p = pcp->prc_proc;
838 
839 	/*
840 	 * If we (or someone) gave it a SIGKILL, and it is not
841 	 * already a zombie, set it running unconditionally.
842 	 */
843 	if ((p->p_flag & SKILLED) &&
844 	    !(p->p_flag & SEXITING) &&
845 	    !(pcp->prc_flags & PRC_DESTROY) &&
846 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
847 		(void) pr_setrun(pnp, 0);
848 	prunmark(p);
849 	mutex_exit(&p->p_lock);
850 }
851 
852 /*
853  * Called while holding p->p_lock to delay until the process is unlocked.
854  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
855  * The process cannot become locked again until p->p_lock is dropped.
856  */
857 void
858 prbarrier(proc_t *p)
859 {
860 	ASSERT(MUTEX_HELD(&p->p_lock));
861 
862 	if (p->p_proc_flag & P_PR_LOCK) {
863 		/* The process is locked; delay until not locked */
864 		uint_t slot = p->p_slot;
865 
866 		while (p->p_proc_flag & P_PR_LOCK)
867 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
868 		cv_signal(&pr_pid_cv[slot]);
869 	}
870 }
871 
872 /*
873  * Return process/lwp status.
874  * The u-block is mapped in by this routine and unmapped at the end.
875  */
876 void
877 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
878 {
879 	kthread_t *t;
880 
881 	ASSERT(MUTEX_HELD(&p->p_lock));
882 
883 	t = prchoose(p);	/* returns locked thread */
884 	ASSERT(t != NULL);
885 	thread_unlock(t);
886 
887 	/* just bzero the process part, prgetlwpstatus() does the rest */
888 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
889 	sp->pr_nlwp = p->p_lwpcnt;
890 	sp->pr_nzomb = p->p_zombcnt;
891 	prassignset(&sp->pr_sigpend, &p->p_sig);
892 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
893 	sp->pr_brksize = p->p_brksize;
894 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
895 	sp->pr_stksize = p->p_stksize;
896 	sp->pr_pid = p->p_pid;
897 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
898 	    (p->p_flag & SZONETOP)) {
899 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
900 		/*
901 		 * Inside local zones, fake zsched's pid as parent pids for
902 		 * processes which reference processes outside of the zone.
903 		 */
904 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
905 	} else {
906 		sp->pr_ppid = p->p_ppid;
907 	}
908 	sp->pr_pgid  = p->p_pgrp;
909 	sp->pr_sid   = p->p_sessp->s_sid;
910 	sp->pr_taskid = p->p_task->tk_tkid;
911 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
912 	sp->pr_zoneid = p->p_zone->zone_id;
913 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
914 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
915 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
916 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
917 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
918 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
919 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
920 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
921 	switch (p->p_model) {
922 	case DATAMODEL_ILP32:
923 		sp->pr_dmodel = PR_MODEL_ILP32;
924 		break;
925 	case DATAMODEL_LP64:
926 		sp->pr_dmodel = PR_MODEL_LP64;
927 		break;
928 	}
929 	if (p->p_agenttp)
930 		sp->pr_agentid = p->p_agenttp->t_tid;
931 
932 	/* get the chosen lwp's status */
933 	prgetlwpstatus(t, &sp->pr_lwp, zp);
934 
935 	/* replicate the flags */
936 	sp->pr_flags = sp->pr_lwp.pr_flags;
937 }
938 
939 /*
940  * Query mask of held signals for a given thread.
941  *
942  * This makes use of schedctl_sigblock() to query if userspace has requested
943  * that all maskable signals be held.  While it would be tempting to call
944  * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
945  * done safely without the risk of racing with the thread under consideration.
946  */
947 void
948 prgethold(kthread_t *t, sigset_t *sp)
949 {
950 	k_sigset_t set;
951 
952 	if (schedctl_sigblock(t)) {
953 		set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
954 		set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
955 		set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
956 	} else {
957 		set = t->t_hold;
958 	}
959 	sigktou(&set, sp);
960 }
961 
962 #ifdef _SYSCALL32_IMPL
963 void
964 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
965 {
966 	proc_t *p = ttoproc(t);
967 	klwp_t *lwp = ttolwp(t);
968 	struct mstate *ms = &lwp->lwp_mstate;
969 	hrtime_t usr, sys;
970 	int flags;
971 	ulong_t instr;
972 
973 	ASSERT(MUTEX_HELD(&p->p_lock));
974 
975 	bzero(sp, sizeof (*sp));
976 	flags = 0L;
977 	if (t->t_state == TS_STOPPED) {
978 		flags |= PR_STOPPED;
979 		if ((t->t_schedflag & TS_PSTART) == 0)
980 			flags |= PR_ISTOP;
981 	} else if (VSTOPPED(t)) {
982 		flags |= PR_STOPPED|PR_ISTOP;
983 	}
984 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
985 		flags |= PR_DSTOP;
986 	if (lwp->lwp_asleep)
987 		flags |= PR_ASLEEP;
988 	if (t == p->p_agenttp)
989 		flags |= PR_AGENT;
990 	if (!(t->t_proc_flag & TP_TWAIT))
991 		flags |= PR_DETACH;
992 	if (t->t_proc_flag & TP_DAEMON)
993 		flags |= PR_DAEMON;
994 	if (p->p_proc_flag & P_PR_FORK)
995 		flags |= PR_FORK;
996 	if (p->p_proc_flag & P_PR_RUNLCL)
997 		flags |= PR_RLC;
998 	if (p->p_proc_flag & P_PR_KILLCL)
999 		flags |= PR_KLC;
1000 	if (p->p_proc_flag & P_PR_ASYNC)
1001 		flags |= PR_ASYNC;
1002 	if (p->p_proc_flag & P_PR_BPTADJ)
1003 		flags |= PR_BPTADJ;
1004 	if (p->p_proc_flag & P_PR_PTRACE)
1005 		flags |= PR_PTRACE;
1006 	if (p->p_flag & SMSACCT)
1007 		flags |= PR_MSACCT;
1008 	if (p->p_flag & SMSFORK)
1009 		flags |= PR_MSFORK;
1010 	if (p->p_flag & SVFWAIT)
1011 		flags |= PR_VFORKP;
1012 	sp->pr_flags = flags;
1013 	if (VSTOPPED(t)) {
1014 		sp->pr_why   = PR_REQUESTED;
1015 		sp->pr_what  = 0;
1016 	} else {
1017 		sp->pr_why   = t->t_whystop;
1018 		sp->pr_what  = t->t_whatstop;
1019 	}
1020 	sp->pr_lwpid = t->t_tid;
1021 	sp->pr_cursig  = lwp->lwp_cursig;
1022 	prassignset(&sp->pr_lwppend, &t->t_sig);
1023 	prgethold(t, &sp->pr_lwphold);
1024 	if (t->t_whystop == PR_FAULTED) {
1025 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1026 		if (t->t_whatstop == FLTPAGE)
1027 			sp->pr_info.si_addr =
1028 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1029 	} else if (lwp->lwp_curinfo)
1030 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1031 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1032 	    sp->pr_info.si_zoneid != zp->zone_id) {
1033 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1034 		sp->pr_info.si_uid = 0;
1035 		sp->pr_info.si_ctid = -1;
1036 		sp->pr_info.si_zoneid = zp->zone_id;
1037 	}
1038 	sp->pr_altstack.ss_sp =
1039 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1040 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1041 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1042 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1043 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1044 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1045 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1046 	    sizeof (sp->pr_clname) - 1);
1047 	if (flags & PR_STOPPED)
1048 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1049 	usr = ms->ms_acct[LMS_USER];
1050 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1051 	scalehrtime(&usr);
1052 	scalehrtime(&sys);
1053 	hrt2ts32(usr, &sp->pr_utime);
1054 	hrt2ts32(sys, &sp->pr_stime);
1055 
1056 	/*
1057 	 * Fetch the current instruction, if not a system process.
1058 	 * We don't attempt this unless the lwp is stopped.
1059 	 */
1060 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1061 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1062 	else if (!(flags & PR_STOPPED))
1063 		sp->pr_flags |= PR_PCINVAL;
1064 	else if (!prfetchinstr(lwp, &instr))
1065 		sp->pr_flags |= PR_PCINVAL;
1066 	else
1067 		sp->pr_instr = (uint32_t)instr;
1068 
1069 	/*
1070 	 * Drop p_lock while touching the lwp's stack.
1071 	 */
1072 	mutex_exit(&p->p_lock);
1073 	if (prisstep(lwp))
1074 		sp->pr_flags |= PR_STEP;
1075 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1076 		int i;
1077 
1078 		sp->pr_syscall = get_syscall32_args(lwp,
1079 		    (int *)sp->pr_sysarg, &i);
1080 		sp->pr_nsysarg = (ushort_t)i;
1081 	}
1082 	if ((flags & PR_STOPPED) || t == curthread)
1083 		prgetprregs32(lwp, sp->pr_reg);
1084 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1085 	    (flags & PR_VFORKP)) {
1086 		long r1, r2;
1087 		user_t *up;
1088 		auxv_t *auxp;
1089 		int i;
1090 
1091 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1092 		if (sp->pr_errno == 0) {
1093 			sp->pr_rval1 = (int32_t)r1;
1094 			sp->pr_rval2 = (int32_t)r2;
1095 			sp->pr_errpriv = PRIV_NONE;
1096 		} else
1097 			sp->pr_errpriv = lwp->lwp_badpriv;
1098 
1099 		if (t->t_sysnum == SYS_execve) {
1100 			up = PTOU(p);
1101 			sp->pr_sysarg[0] = 0;
1102 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1103 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1104 			for (i = 0, auxp = up->u_auxv;
1105 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1106 			    i++, auxp++) {
1107 				if (auxp->a_type == AT_SUN_EXECNAME) {
1108 					sp->pr_sysarg[0] =
1109 					    (caddr32_t)
1110 					    (uintptr_t)auxp->a_un.a_ptr;
1111 					break;
1112 				}
1113 			}
1114 		}
1115 	}
1116 	if (prhasfp())
1117 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1118 	mutex_enter(&p->p_lock);
1119 }
1120 
1121 void
1122 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1123 {
1124 	kthread_t *t;
1125 
1126 	ASSERT(MUTEX_HELD(&p->p_lock));
1127 
1128 	t = prchoose(p);	/* returns locked thread */
1129 	ASSERT(t != NULL);
1130 	thread_unlock(t);
1131 
1132 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1133 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1134 	sp->pr_nlwp = p->p_lwpcnt;
1135 	sp->pr_nzomb = p->p_zombcnt;
1136 	prassignset(&sp->pr_sigpend, &p->p_sig);
1137 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1138 	sp->pr_brksize = (uint32_t)p->p_brksize;
1139 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1140 	sp->pr_stksize = (uint32_t)p->p_stksize;
1141 	sp->pr_pid   = p->p_pid;
1142 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1143 	    (p->p_flag & SZONETOP)) {
1144 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1145 		/*
1146 		 * Inside local zones, fake zsched's pid as parent pids for
1147 		 * processes which reference processes outside of the zone.
1148 		 */
1149 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1150 	} else {
1151 		sp->pr_ppid = p->p_ppid;
1152 	}
1153 	sp->pr_pgid  = p->p_pgrp;
1154 	sp->pr_sid   = p->p_sessp->s_sid;
1155 	sp->pr_taskid = p->p_task->tk_tkid;
1156 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1157 	sp->pr_zoneid = p->p_zone->zone_id;
1158 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1159 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1160 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1161 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1162 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1163 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1164 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1165 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1166 	switch (p->p_model) {
1167 	case DATAMODEL_ILP32:
1168 		sp->pr_dmodel = PR_MODEL_ILP32;
1169 		break;
1170 	case DATAMODEL_LP64:
1171 		sp->pr_dmodel = PR_MODEL_LP64;
1172 		break;
1173 	}
1174 	if (p->p_agenttp)
1175 		sp->pr_agentid = p->p_agenttp->t_tid;
1176 
1177 	/* get the chosen lwp's status */
1178 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1179 
1180 	/* replicate the flags */
1181 	sp->pr_flags = sp->pr_lwp.pr_flags;
1182 }
1183 #endif	/* _SYSCALL32_IMPL */
1184 
1185 /*
1186  * Return lwp status.
1187  */
1188 void
1189 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1190 {
1191 	proc_t *p = ttoproc(t);
1192 	klwp_t *lwp = ttolwp(t);
1193 	struct mstate *ms = &lwp->lwp_mstate;
1194 	hrtime_t usr, sys;
1195 	int flags;
1196 	ulong_t instr;
1197 
1198 	ASSERT(MUTEX_HELD(&p->p_lock));
1199 
1200 	bzero(sp, sizeof (*sp));
1201 	flags = 0L;
1202 	if (t->t_state == TS_STOPPED) {
1203 		flags |= PR_STOPPED;
1204 		if ((t->t_schedflag & TS_PSTART) == 0)
1205 			flags |= PR_ISTOP;
1206 	} else if (VSTOPPED(t)) {
1207 		flags |= PR_STOPPED|PR_ISTOP;
1208 	}
1209 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1210 		flags |= PR_DSTOP;
1211 	if (lwp->lwp_asleep)
1212 		flags |= PR_ASLEEP;
1213 	if (t == p->p_agenttp)
1214 		flags |= PR_AGENT;
1215 	if (!(t->t_proc_flag & TP_TWAIT))
1216 		flags |= PR_DETACH;
1217 	if (t->t_proc_flag & TP_DAEMON)
1218 		flags |= PR_DAEMON;
1219 	if (p->p_proc_flag & P_PR_FORK)
1220 		flags |= PR_FORK;
1221 	if (p->p_proc_flag & P_PR_RUNLCL)
1222 		flags |= PR_RLC;
1223 	if (p->p_proc_flag & P_PR_KILLCL)
1224 		flags |= PR_KLC;
1225 	if (p->p_proc_flag & P_PR_ASYNC)
1226 		flags |= PR_ASYNC;
1227 	if (p->p_proc_flag & P_PR_BPTADJ)
1228 		flags |= PR_BPTADJ;
1229 	if (p->p_proc_flag & P_PR_PTRACE)
1230 		flags |= PR_PTRACE;
1231 	if (p->p_flag & SMSACCT)
1232 		flags |= PR_MSACCT;
1233 	if (p->p_flag & SMSFORK)
1234 		flags |= PR_MSFORK;
1235 	if (p->p_flag & SVFWAIT)
1236 		flags |= PR_VFORKP;
1237 	if (p->p_pgidp->pid_pgorphaned)
1238 		flags |= PR_ORPHAN;
1239 	if (p->p_pidflag & CLDNOSIGCHLD)
1240 		flags |= PR_NOSIGCHLD;
1241 	if (p->p_pidflag & CLDWAITPID)
1242 		flags |= PR_WAITPID;
1243 	sp->pr_flags = flags;
1244 	if (VSTOPPED(t)) {
1245 		sp->pr_why   = PR_REQUESTED;
1246 		sp->pr_what  = 0;
1247 	} else {
1248 		sp->pr_why   = t->t_whystop;
1249 		sp->pr_what  = t->t_whatstop;
1250 	}
1251 	sp->pr_lwpid = t->t_tid;
1252 	sp->pr_cursig  = lwp->lwp_cursig;
1253 	prassignset(&sp->pr_lwppend, &t->t_sig);
1254 	prgethold(t, &sp->pr_lwphold);
1255 	if (t->t_whystop == PR_FAULTED)
1256 		bcopy(&lwp->lwp_siginfo,
1257 		    &sp->pr_info, sizeof (k_siginfo_t));
1258 	else if (lwp->lwp_curinfo)
1259 		bcopy(&lwp->lwp_curinfo->sq_info,
1260 		    &sp->pr_info, sizeof (k_siginfo_t));
1261 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1262 	    sp->pr_info.si_zoneid != zp->zone_id) {
1263 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1264 		sp->pr_info.si_uid = 0;
1265 		sp->pr_info.si_ctid = -1;
1266 		sp->pr_info.si_zoneid = zp->zone_id;
1267 	}
1268 	sp->pr_altstack = lwp->lwp_sigaltstack;
1269 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1270 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1271 	sp->pr_ustack = lwp->lwp_ustack;
1272 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1273 	    sizeof (sp->pr_clname) - 1);
1274 	if (flags & PR_STOPPED)
1275 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1276 	usr = ms->ms_acct[LMS_USER];
1277 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1278 	scalehrtime(&usr);
1279 	scalehrtime(&sys);
1280 	hrt2ts(usr, &sp->pr_utime);
1281 	hrt2ts(sys, &sp->pr_stime);
1282 
1283 	/*
1284 	 * Fetch the current instruction, if not a system process.
1285 	 * We don't attempt this unless the lwp is stopped.
1286 	 */
1287 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1288 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1289 	else if (!(flags & PR_STOPPED))
1290 		sp->pr_flags |= PR_PCINVAL;
1291 	else if (!prfetchinstr(lwp, &instr))
1292 		sp->pr_flags |= PR_PCINVAL;
1293 	else
1294 		sp->pr_instr = instr;
1295 
1296 	/*
1297 	 * Drop p_lock while touching the lwp's stack.
1298 	 */
1299 	mutex_exit(&p->p_lock);
1300 	if (prisstep(lwp))
1301 		sp->pr_flags |= PR_STEP;
1302 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1303 		int i;
1304 
1305 		sp->pr_syscall = get_syscall_args(lwp,
1306 		    (long *)sp->pr_sysarg, &i);
1307 		sp->pr_nsysarg = (ushort_t)i;
1308 	}
1309 	if ((flags & PR_STOPPED) || t == curthread)
1310 		prgetprregs(lwp, sp->pr_reg);
1311 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1312 	    (flags & PR_VFORKP)) {
1313 		user_t *up;
1314 		auxv_t *auxp;
1315 		int i;
1316 
1317 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1318 		if (sp->pr_errno == 0)
1319 			sp->pr_errpriv = PRIV_NONE;
1320 		else
1321 			sp->pr_errpriv = lwp->lwp_badpriv;
1322 
1323 		if (t->t_sysnum == SYS_execve) {
1324 			up = PTOU(p);
1325 			sp->pr_sysarg[0] = 0;
1326 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1327 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1328 			for (i = 0, auxp = up->u_auxv;
1329 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1330 			    i++, auxp++) {
1331 				if (auxp->a_type == AT_SUN_EXECNAME) {
1332 					sp->pr_sysarg[0] =
1333 					    (uintptr_t)auxp->a_un.a_ptr;
1334 					break;
1335 				}
1336 			}
1337 		}
1338 	}
1339 	if (prhasfp())
1340 		prgetprfpregs(lwp, &sp->pr_fpreg);
1341 	mutex_enter(&p->p_lock);
1342 }
1343 
1344 /*
1345  * Get the sigaction structure for the specified signal.  The u-block
1346  * must already have been mapped in by the caller.
1347  */
1348 void
1349 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1350 {
1351 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1352 
1353 	bzero(sp, sizeof (*sp));
1354 
1355 	if (sig != 0 && (unsigned)sig < nsig) {
1356 		sp->sa_handler = up->u_signal[sig-1];
1357 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1358 		if (sigismember(&up->u_sigonstack, sig))
1359 			sp->sa_flags |= SA_ONSTACK;
1360 		if (sigismember(&up->u_sigresethand, sig))
1361 			sp->sa_flags |= SA_RESETHAND;
1362 		if (sigismember(&up->u_sigrestart, sig))
1363 			sp->sa_flags |= SA_RESTART;
1364 		if (sigismember(&p->p_siginfo, sig))
1365 			sp->sa_flags |= SA_SIGINFO;
1366 		if (sigismember(&up->u_signodefer, sig))
1367 			sp->sa_flags |= SA_NODEFER;
1368 		if (sig == SIGCLD) {
1369 			if (p->p_flag & SNOWAIT)
1370 				sp->sa_flags |= SA_NOCLDWAIT;
1371 			if ((p->p_flag & SJCTL) == 0)
1372 				sp->sa_flags |= SA_NOCLDSTOP;
1373 		}
1374 	}
1375 }
1376 
1377 #ifdef _SYSCALL32_IMPL
1378 void
1379 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1380 {
1381 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1382 
1383 	bzero(sp, sizeof (*sp));
1384 
1385 	if (sig != 0 && (unsigned)sig < nsig) {
1386 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1387 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1388 		if (sigismember(&up->u_sigonstack, sig))
1389 			sp->sa_flags |= SA_ONSTACK;
1390 		if (sigismember(&up->u_sigresethand, sig))
1391 			sp->sa_flags |= SA_RESETHAND;
1392 		if (sigismember(&up->u_sigrestart, sig))
1393 			sp->sa_flags |= SA_RESTART;
1394 		if (sigismember(&p->p_siginfo, sig))
1395 			sp->sa_flags |= SA_SIGINFO;
1396 		if (sigismember(&up->u_signodefer, sig))
1397 			sp->sa_flags |= SA_NODEFER;
1398 		if (sig == SIGCLD) {
1399 			if (p->p_flag & SNOWAIT)
1400 				sp->sa_flags |= SA_NOCLDWAIT;
1401 			if ((p->p_flag & SJCTL) == 0)
1402 				sp->sa_flags |= SA_NOCLDSTOP;
1403 		}
1404 	}
1405 }
1406 #endif	/* _SYSCALL32_IMPL */
1407 
1408 /*
1409  * Count the number of segments in this process's address space.
1410  */
1411 int
1412 prnsegs(struct as *as, int reserved)
1413 {
1414 	int n = 0;
1415 	struct seg *seg;
1416 
1417 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1418 
1419 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1420 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1421 		caddr_t saddr, naddr;
1422 		void *tmp = NULL;
1423 
1424 		if ((seg->s_flags & S_HOLE) != 0) {
1425 			continue;
1426 		}
1427 
1428 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1429 			(void) pr_getprot(seg, reserved, &tmp,
1430 			    &saddr, &naddr, eaddr);
1431 			if (saddr != naddr)
1432 				n++;
1433 		}
1434 
1435 		ASSERT(tmp == NULL);
1436 	}
1437 
1438 	return (n);
1439 }
1440 
1441 /*
1442  * Convert uint32_t to decimal string w/o leading zeros.
1443  * Add trailing null characters if 'len' is greater than string length.
1444  * Return the string length.
1445  */
1446 int
1447 pr_u32tos(uint32_t n, char *s, int len)
1448 {
1449 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1450 	char *cp = cbuf;
1451 	char *end = s + len;
1452 
1453 	do {
1454 		*cp++ = (char)(n % 10 + '0');
1455 		n /= 10;
1456 	} while (n);
1457 
1458 	len = (int)(cp - cbuf);
1459 
1460 	do {
1461 		*s++ = *--cp;
1462 	} while (cp > cbuf);
1463 
1464 	while (s < end)		/* optional pad */
1465 		*s++ = '\0';
1466 
1467 	return (len);
1468 }
1469 
1470 /*
1471  * Convert uint64_t to decimal string w/o leading zeros.
1472  * Return the string length.
1473  */
1474 static int
1475 pr_u64tos(uint64_t n, char *s)
1476 {
1477 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1478 	char *cp = cbuf;
1479 	int len;
1480 
1481 	do {
1482 		*cp++ = (char)(n % 10 + '0');
1483 		n /= 10;
1484 	} while (n);
1485 
1486 	len = (int)(cp - cbuf);
1487 
1488 	do {
1489 		*s++ = *--cp;
1490 	} while (cp > cbuf);
1491 
1492 	return (len);
1493 }
1494 
1495 file_t *
1496 pr_getf(proc_t *p, uint_t fd, short *flag)
1497 {
1498 	uf_entry_t *ufp;
1499 	uf_info_t *fip;
1500 	file_t *fp;
1501 
1502 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1503 
1504 	fip = P_FINFO(p);
1505 
1506 	if (fd >= fip->fi_nfiles)
1507 		return (NULL);
1508 
1509 	mutex_exit(&p->p_lock);
1510 	mutex_enter(&fip->fi_lock);
1511 	UF_ENTER(ufp, fip, fd);
1512 	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1513 		if (flag != NULL)
1514 			*flag = ufp->uf_flag;
1515 		ufp->uf_refcnt++;
1516 	} else {
1517 		fp = NULL;
1518 	}
1519 	UF_EXIT(ufp);
1520 	mutex_exit(&fip->fi_lock);
1521 	mutex_enter(&p->p_lock);
1522 
1523 	return (fp);
1524 }
1525 
1526 void
1527 pr_releasef(proc_t *p, uint_t fd)
1528 {
1529 	uf_entry_t *ufp;
1530 	uf_info_t *fip;
1531 
1532 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1533 
1534 	fip = P_FINFO(p);
1535 
1536 	mutex_exit(&p->p_lock);
1537 	mutex_enter(&fip->fi_lock);
1538 	UF_ENTER(ufp, fip, fd);
1539 	ASSERT3U(ufp->uf_refcnt, >, 0);
1540 	ufp->uf_refcnt--;
1541 	UF_EXIT(ufp);
1542 	mutex_exit(&fip->fi_lock);
1543 	mutex_enter(&p->p_lock);
1544 }
1545 
1546 void
1547 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1548 {
1549 	char *s = name;
1550 	struct vfs *vfsp;
1551 	struct vfssw *vfsswp;
1552 
1553 	if ((vfsp = vp->v_vfsp) != NULL &&
1554 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1555 	    *vfsswp->vsw_name) {
1556 		(void) strcpy(s, vfsswp->vsw_name);
1557 		s += strlen(s);
1558 		*s++ = '.';
1559 	}
1560 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1561 	*s++ = '.';
1562 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1563 	*s++ = '.';
1564 	s += pr_u64tos(vattr->va_nodeid, s);
1565 	*s++ = '\0';
1566 }
1567 
1568 struct seg *
1569 break_seg(proc_t *p)
1570 {
1571 	caddr_t addr = p->p_brkbase;
1572 	struct seg *seg;
1573 	struct vnode *vp;
1574 
1575 	if (p->p_brksize != 0)
1576 		addr += p->p_brksize - 1;
1577 	seg = as_segat(p->p_as, addr);
1578 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1579 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1580 		return (seg);
1581 	return (NULL);
1582 }
1583 
1584 /*
1585  * Implementation of service functions to handle procfs generic chained
1586  * copyout buffers.
1587  */
1588 typedef struct pr_iobuf_list {
1589 	list_node_t	piol_link;	/* buffer linkage */
1590 	size_t		piol_size;	/* total size (header + data) */
1591 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1592 } piol_t;
1593 
1594 #define	MAPSIZE	(64 * 1024)
1595 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1596 
1597 void
1598 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1599 {
1600 	piol_t	*iol;
1601 	size_t	initial_size = MIN(1, n) * itemsize;
1602 
1603 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1604 
1605 	ASSERT(list_head(iolhead) == NULL);
1606 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1607 	ASSERT(initial_size > 0);
1608 
1609 	/*
1610 	 * Someone creating chained copyout buffers may ask for less than
1611 	 * MAPSIZE if the amount of data to be buffered is known to be
1612 	 * smaller than that.
1613 	 * But in order to prevent involuntary self-denial of service,
1614 	 * the requested input size is clamped at MAPSIZE.
1615 	 */
1616 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1617 	iol = kmem_alloc(initial_size, KM_SLEEP);
1618 	list_insert_head(iolhead, iol);
1619 	iol->piol_usedsize = 0;
1620 	iol->piol_size = initial_size;
1621 }
1622 
1623 void *
1624 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1625 {
1626 	piol_t	*iol;
1627 	char	*new;
1628 
1629 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1630 	ASSERT(list_head(iolhead) != NULL);
1631 
1632 	iol = (piol_t *)list_tail(iolhead);
1633 
1634 	if (iol->piol_size <
1635 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1636 		/*
1637 		 * Out of space in the current buffer. Allocate more.
1638 		 */
1639 		piol_t *newiol;
1640 
1641 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1642 		newiol->piol_size = MAPSIZE;
1643 		newiol->piol_usedsize = 0;
1644 
1645 		list_insert_after(iolhead, iol, newiol);
1646 		iol = list_next(iolhead, iol);
1647 		ASSERT(iol == newiol);
1648 	}
1649 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1650 	iol->piol_usedsize += itemsize;
1651 	bzero(new, itemsize);
1652 	return (new);
1653 }
1654 
1655 void
1656 pr_iol_freelist(list_t *iolhead)
1657 {
1658 	piol_t	*iol;
1659 
1660 	while ((iol = list_head(iolhead)) != NULL) {
1661 		list_remove(iolhead, iol);
1662 		kmem_free(iol, iol->piol_size);
1663 	}
1664 	list_destroy(iolhead);
1665 }
1666 
1667 int
1668 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1669 {
1670 	int error = errin;
1671 	piol_t	*iol;
1672 
1673 	while ((iol = list_head(iolhead)) != NULL) {
1674 		list_remove(iolhead, iol);
1675 		if (!error) {
1676 			if (copyout(PIOL_DATABUF(iol), *tgt,
1677 			    iol->piol_usedsize))
1678 				error = EFAULT;
1679 			*tgt += iol->piol_usedsize;
1680 		}
1681 		kmem_free(iol, iol->piol_size);
1682 	}
1683 	list_destroy(iolhead);
1684 
1685 	return (error);
1686 }
1687 
1688 int
1689 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1690 {
1691 	offset_t	off = uiop->uio_offset;
1692 	char		*base;
1693 	size_t		size;
1694 	piol_t		*iol;
1695 	int		error = errin;
1696 
1697 	while ((iol = list_head(iolhead)) != NULL) {
1698 		list_remove(iolhead, iol);
1699 		base = PIOL_DATABUF(iol);
1700 		size = iol->piol_usedsize;
1701 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1702 			error = uiomove(base + off, size - off,
1703 			    UIO_READ, uiop);
1704 		off = MAX(0, off - (offset_t)size);
1705 		kmem_free(iol, iol->piol_size);
1706 	}
1707 	list_destroy(iolhead);
1708 
1709 	return (error);
1710 }
1711 
1712 /*
1713  * Return an array of structures with memory map information.
1714  * We allocate here; the caller must deallocate.
1715  */
1716 int
1717 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1718 {
1719 	struct as *as = p->p_as;
1720 	prmap_t *mp;
1721 	struct seg *seg;
1722 	struct seg *brkseg, *stkseg;
1723 	struct vnode *vp;
1724 	struct vattr vattr;
1725 	uint_t prot;
1726 
1727 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1728 
1729 	/*
1730 	 * Request an initial buffer size that doesn't waste memory
1731 	 * if the address space has only a small number of segments.
1732 	 */
1733 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1734 
1735 	if ((seg = AS_SEGFIRST(as)) == NULL)
1736 		return (0);
1737 
1738 	brkseg = break_seg(p);
1739 	stkseg = as_segat(as, prgetstackbase(p));
1740 
1741 	do {
1742 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1743 		caddr_t saddr, naddr;
1744 		void *tmp = NULL;
1745 
1746 		if ((seg->s_flags & S_HOLE) != 0) {
1747 			continue;
1748 		}
1749 
1750 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1751 			prot = pr_getprot(seg, reserved, &tmp,
1752 			    &saddr, &naddr, eaddr);
1753 			if (saddr == naddr)
1754 				continue;
1755 
1756 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1757 
1758 			mp->pr_vaddr = (uintptr_t)saddr;
1759 			mp->pr_size = naddr - saddr;
1760 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1761 			mp->pr_mflags = 0;
1762 			if (prot & PROT_READ)
1763 				mp->pr_mflags |= MA_READ;
1764 			if (prot & PROT_WRITE)
1765 				mp->pr_mflags |= MA_WRITE;
1766 			if (prot & PROT_EXEC)
1767 				mp->pr_mflags |= MA_EXEC;
1768 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1769 				mp->pr_mflags |= MA_SHARED;
1770 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1771 				mp->pr_mflags |= MA_NORESERVE;
1772 			if (seg->s_ops == &segspt_shmops ||
1773 			    (seg->s_ops == &segvn_ops &&
1774 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1775 				mp->pr_mflags |= MA_ANON;
1776 			if (seg == brkseg)
1777 				mp->pr_mflags |= MA_BREAK;
1778 			else if (seg == stkseg) {
1779 				mp->pr_mflags |= MA_STACK;
1780 				if (reserved) {
1781 					size_t maxstack =
1782 					    ((size_t)p->p_stk_ctl +
1783 					    PAGEOFFSET) & PAGEMASK;
1784 					mp->pr_vaddr =
1785 					    (uintptr_t)prgetstackbase(p) +
1786 					    p->p_stksize - maxstack;
1787 					mp->pr_size = (uintptr_t)naddr -
1788 					    mp->pr_vaddr;
1789 				}
1790 			}
1791 			if (seg->s_ops == &segspt_shmops)
1792 				mp->pr_mflags |= MA_ISM | MA_SHM;
1793 			mp->pr_pagesize = PAGESIZE;
1794 
1795 			/*
1796 			 * Manufacture a filename for the "object" directory.
1797 			 */
1798 			vattr.va_mask = AT_FSID|AT_NODEID;
1799 			if (seg->s_ops == &segvn_ops &&
1800 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1801 			    vp != NULL && vp->v_type == VREG &&
1802 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1803 				if (vp == p->p_exec)
1804 					(void) strcpy(mp->pr_mapname, "a.out");
1805 				else
1806 					pr_object_name(mp->pr_mapname,
1807 					    vp, &vattr);
1808 			}
1809 
1810 			/*
1811 			 * Get the SysV shared memory id, if any.
1812 			 */
1813 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1814 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1815 			    SHMID_NONE) {
1816 				if (mp->pr_shmid == SHMID_FREE)
1817 					mp->pr_shmid = -1;
1818 
1819 				mp->pr_mflags |= MA_SHM;
1820 			} else {
1821 				mp->pr_shmid = -1;
1822 			}
1823 		}
1824 		ASSERT(tmp == NULL);
1825 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1826 
1827 	return (0);
1828 }
1829 
1830 #ifdef _SYSCALL32_IMPL
1831 int
1832 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1833 {
1834 	struct as *as = p->p_as;
1835 	prmap32_t *mp;
1836 	struct seg *seg;
1837 	struct seg *brkseg, *stkseg;
1838 	struct vnode *vp;
1839 	struct vattr vattr;
1840 	uint_t prot;
1841 
1842 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1843 
1844 	/*
1845 	 * Request an initial buffer size that doesn't waste memory
1846 	 * if the address space has only a small number of segments.
1847 	 */
1848 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1849 
1850 	if ((seg = AS_SEGFIRST(as)) == NULL)
1851 		return (0);
1852 
1853 	brkseg = break_seg(p);
1854 	stkseg = as_segat(as, prgetstackbase(p));
1855 
1856 	do {
1857 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1858 		caddr_t saddr, naddr;
1859 		void *tmp = NULL;
1860 
1861 		if ((seg->s_flags & S_HOLE) != 0) {
1862 			continue;
1863 		}
1864 
1865 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1866 			prot = pr_getprot(seg, reserved, &tmp,
1867 			    &saddr, &naddr, eaddr);
1868 			if (saddr == naddr)
1869 				continue;
1870 
1871 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1872 
1873 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1874 			mp->pr_size = (size32_t)(naddr - saddr);
1875 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1876 			mp->pr_mflags = 0;
1877 			if (prot & PROT_READ)
1878 				mp->pr_mflags |= MA_READ;
1879 			if (prot & PROT_WRITE)
1880 				mp->pr_mflags |= MA_WRITE;
1881 			if (prot & PROT_EXEC)
1882 				mp->pr_mflags |= MA_EXEC;
1883 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1884 				mp->pr_mflags |= MA_SHARED;
1885 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1886 				mp->pr_mflags |= MA_NORESERVE;
1887 			if (seg->s_ops == &segspt_shmops ||
1888 			    (seg->s_ops == &segvn_ops &&
1889 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1890 				mp->pr_mflags |= MA_ANON;
1891 			if (seg == brkseg)
1892 				mp->pr_mflags |= MA_BREAK;
1893 			else if (seg == stkseg) {
1894 				mp->pr_mflags |= MA_STACK;
1895 				if (reserved) {
1896 					size_t maxstack =
1897 					    ((size_t)p->p_stk_ctl +
1898 					    PAGEOFFSET) & PAGEMASK;
1899 					uintptr_t vaddr =
1900 					    (uintptr_t)prgetstackbase(p) +
1901 					    p->p_stksize - maxstack;
1902 					mp->pr_vaddr = (caddr32_t)vaddr;
1903 					mp->pr_size = (size32_t)
1904 					    ((uintptr_t)naddr - vaddr);
1905 				}
1906 			}
1907 			if (seg->s_ops == &segspt_shmops)
1908 				mp->pr_mflags |= MA_ISM | MA_SHM;
1909 			mp->pr_pagesize = PAGESIZE;
1910 
1911 			/*
1912 			 * Manufacture a filename for the "object" directory.
1913 			 */
1914 			vattr.va_mask = AT_FSID|AT_NODEID;
1915 			if (seg->s_ops == &segvn_ops &&
1916 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1917 			    vp != NULL && vp->v_type == VREG &&
1918 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1919 				if (vp == p->p_exec)
1920 					(void) strcpy(mp->pr_mapname, "a.out");
1921 				else
1922 					pr_object_name(mp->pr_mapname,
1923 					    vp, &vattr);
1924 			}
1925 
1926 			/*
1927 			 * Get the SysV shared memory id, if any.
1928 			 */
1929 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1930 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1931 			    SHMID_NONE) {
1932 				if (mp->pr_shmid == SHMID_FREE)
1933 					mp->pr_shmid = -1;
1934 
1935 				mp->pr_mflags |= MA_SHM;
1936 			} else {
1937 				mp->pr_shmid = -1;
1938 			}
1939 		}
1940 		ASSERT(tmp == NULL);
1941 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1942 
1943 	return (0);
1944 }
1945 #endif	/* _SYSCALL32_IMPL */
1946 
1947 /*
1948  * Return the size of the /proc page data file.
1949  */
1950 size_t
1951 prpdsize(struct as *as)
1952 {
1953 	struct seg *seg;
1954 	size_t size;
1955 
1956 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1957 
1958 	if ((seg = AS_SEGFIRST(as)) == NULL)
1959 		return (0);
1960 
1961 	size = sizeof (prpageheader_t);
1962 	do {
1963 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1964 		caddr_t saddr, naddr;
1965 		void *tmp = NULL;
1966 		size_t npage;
1967 
1968 		if ((seg->s_flags & S_HOLE) != 0) {
1969 			continue;
1970 		}
1971 
1972 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1973 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1974 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1975 				size += sizeof (prasmap_t) + round8(npage);
1976 		}
1977 		ASSERT(tmp == NULL);
1978 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1979 
1980 	return (size);
1981 }
1982 
1983 #ifdef _SYSCALL32_IMPL
1984 size_t
1985 prpdsize32(struct as *as)
1986 {
1987 	struct seg *seg;
1988 	size_t size;
1989 
1990 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1991 
1992 	if ((seg = AS_SEGFIRST(as)) == NULL)
1993 		return (0);
1994 
1995 	size = sizeof (prpageheader32_t);
1996 	do {
1997 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1998 		caddr_t saddr, naddr;
1999 		void *tmp = NULL;
2000 		size_t npage;
2001 
2002 		if ((seg->s_flags & S_HOLE) != 0) {
2003 			continue;
2004 		}
2005 
2006 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2007 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2008 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2009 				size += sizeof (prasmap32_t) + round8(npage);
2010 		}
2011 		ASSERT(tmp == NULL);
2012 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2013 
2014 	return (size);
2015 }
2016 #endif	/* _SYSCALL32_IMPL */
2017 
2018 /*
2019  * Read page data information.
2020  */
2021 int
2022 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2023 {
2024 	struct as *as = p->p_as;
2025 	caddr_t buf;
2026 	size_t size;
2027 	prpageheader_t *php;
2028 	prasmap_t *pmp;
2029 	struct seg *seg;
2030 	int error;
2031 
2032 again:
2033 	AS_LOCK_ENTER(as, RW_WRITER);
2034 
2035 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2036 		AS_LOCK_EXIT(as);
2037 		return (0);
2038 	}
2039 	size = prpdsize(as);
2040 	if (uiop->uio_resid < size) {
2041 		AS_LOCK_EXIT(as);
2042 		return (E2BIG);
2043 	}
2044 
2045 	buf = kmem_zalloc(size, KM_SLEEP);
2046 	php = (prpageheader_t *)buf;
2047 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2048 
2049 	hrt2ts(gethrtime(), &php->pr_tstamp);
2050 	php->pr_nmap = 0;
2051 	php->pr_npage = 0;
2052 	do {
2053 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2054 		caddr_t saddr, naddr;
2055 		void *tmp = NULL;
2056 
2057 		if ((seg->s_flags & S_HOLE) != 0) {
2058 			continue;
2059 		}
2060 
2061 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2062 			struct vnode *vp;
2063 			struct vattr vattr;
2064 			size_t len;
2065 			size_t npage;
2066 			uint_t prot;
2067 			uintptr_t next;
2068 
2069 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2070 			if ((len = (size_t)(naddr - saddr)) == 0)
2071 				continue;
2072 			npage = len / PAGESIZE;
2073 			next = (uintptr_t)(pmp + 1) + round8(npage);
2074 			/*
2075 			 * It's possible that the address space can change
2076 			 * subtlely even though we're holding as->a_lock
2077 			 * due to the nondeterminism of page_exists() in
2078 			 * the presence of asychronously flushed pages or
2079 			 * mapped files whose sizes are changing.
2080 			 * page_exists() may be called indirectly from
2081 			 * pr_getprot() by a SEGOP_INCORE() routine.
2082 			 * If this happens we need to make sure we don't
2083 			 * overrun the buffer whose size we computed based
2084 			 * on the initial iteration through the segments.
2085 			 * Once we've detected an overflow, we need to clean
2086 			 * up the temporary memory allocated in pr_getprot()
2087 			 * and retry. If there's a pending signal, we return
2088 			 * EINTR so that this thread can be dislodged if
2089 			 * a latent bug causes us to spin indefinitely.
2090 			 */
2091 			if (next > (uintptr_t)buf + size) {
2092 				pr_getprot_done(&tmp);
2093 				AS_LOCK_EXIT(as);
2094 
2095 				kmem_free(buf, size);
2096 
2097 				if (ISSIG(curthread, JUSTLOOKING))
2098 					return (EINTR);
2099 
2100 				goto again;
2101 			}
2102 
2103 			php->pr_nmap++;
2104 			php->pr_npage += npage;
2105 			pmp->pr_vaddr = (uintptr_t)saddr;
2106 			pmp->pr_npage = npage;
2107 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2108 			pmp->pr_mflags = 0;
2109 			if (prot & PROT_READ)
2110 				pmp->pr_mflags |= MA_READ;
2111 			if (prot & PROT_WRITE)
2112 				pmp->pr_mflags |= MA_WRITE;
2113 			if (prot & PROT_EXEC)
2114 				pmp->pr_mflags |= MA_EXEC;
2115 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2116 				pmp->pr_mflags |= MA_SHARED;
2117 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2118 				pmp->pr_mflags |= MA_NORESERVE;
2119 			if (seg->s_ops == &segspt_shmops ||
2120 			    (seg->s_ops == &segvn_ops &&
2121 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2122 				pmp->pr_mflags |= MA_ANON;
2123 			if (seg->s_ops == &segspt_shmops)
2124 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2125 			pmp->pr_pagesize = PAGESIZE;
2126 			/*
2127 			 * Manufacture a filename for the "object" directory.
2128 			 */
2129 			vattr.va_mask = AT_FSID|AT_NODEID;
2130 			if (seg->s_ops == &segvn_ops &&
2131 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2132 			    vp != NULL && vp->v_type == VREG &&
2133 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2134 				if (vp == p->p_exec)
2135 					(void) strcpy(pmp->pr_mapname, "a.out");
2136 				else
2137 					pr_object_name(pmp->pr_mapname,
2138 					    vp, &vattr);
2139 			}
2140 
2141 			/*
2142 			 * Get the SysV shared memory id, if any.
2143 			 */
2144 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2145 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2146 			    SHMID_NONE) {
2147 				if (pmp->pr_shmid == SHMID_FREE)
2148 					pmp->pr_shmid = -1;
2149 
2150 				pmp->pr_mflags |= MA_SHM;
2151 			} else {
2152 				pmp->pr_shmid = -1;
2153 			}
2154 
2155 			hat_getstat(as, saddr, len, hatid,
2156 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2157 			pmp = (prasmap_t *)next;
2158 		}
2159 		ASSERT(tmp == NULL);
2160 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2161 
2162 	AS_LOCK_EXIT(as);
2163 
2164 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2165 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2166 	kmem_free(buf, size);
2167 
2168 	return (error);
2169 }
2170 
2171 #ifdef _SYSCALL32_IMPL
2172 int
2173 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2174 {
2175 	struct as *as = p->p_as;
2176 	caddr_t buf;
2177 	size_t size;
2178 	prpageheader32_t *php;
2179 	prasmap32_t *pmp;
2180 	struct seg *seg;
2181 	int error;
2182 
2183 again:
2184 	AS_LOCK_ENTER(as, RW_WRITER);
2185 
2186 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2187 		AS_LOCK_EXIT(as);
2188 		return (0);
2189 	}
2190 	size = prpdsize32(as);
2191 	if (uiop->uio_resid < size) {
2192 		AS_LOCK_EXIT(as);
2193 		return (E2BIG);
2194 	}
2195 
2196 	buf = kmem_zalloc(size, KM_SLEEP);
2197 	php = (prpageheader32_t *)buf;
2198 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2199 
2200 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2201 	php->pr_nmap = 0;
2202 	php->pr_npage = 0;
2203 	do {
2204 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2205 		caddr_t saddr, naddr;
2206 		void *tmp = NULL;
2207 
2208 		if ((seg->s_flags & S_HOLE) != 0) {
2209 			continue;
2210 		}
2211 
2212 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2213 			struct vnode *vp;
2214 			struct vattr vattr;
2215 			size_t len;
2216 			size_t npage;
2217 			uint_t prot;
2218 			uintptr_t next;
2219 
2220 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2221 			if ((len = (size_t)(naddr - saddr)) == 0)
2222 				continue;
2223 			npage = len / PAGESIZE;
2224 			next = (uintptr_t)(pmp + 1) + round8(npage);
2225 			/*
2226 			 * It's possible that the address space can change
2227 			 * subtlely even though we're holding as->a_lock
2228 			 * due to the nondeterminism of page_exists() in
2229 			 * the presence of asychronously flushed pages or
2230 			 * mapped files whose sizes are changing.
2231 			 * page_exists() may be called indirectly from
2232 			 * pr_getprot() by a SEGOP_INCORE() routine.
2233 			 * If this happens we need to make sure we don't
2234 			 * overrun the buffer whose size we computed based
2235 			 * on the initial iteration through the segments.
2236 			 * Once we've detected an overflow, we need to clean
2237 			 * up the temporary memory allocated in pr_getprot()
2238 			 * and retry. If there's a pending signal, we return
2239 			 * EINTR so that this thread can be dislodged if
2240 			 * a latent bug causes us to spin indefinitely.
2241 			 */
2242 			if (next > (uintptr_t)buf + size) {
2243 				pr_getprot_done(&tmp);
2244 				AS_LOCK_EXIT(as);
2245 
2246 				kmem_free(buf, size);
2247 
2248 				if (ISSIG(curthread, JUSTLOOKING))
2249 					return (EINTR);
2250 
2251 				goto again;
2252 			}
2253 
2254 			php->pr_nmap++;
2255 			php->pr_npage += npage;
2256 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2257 			pmp->pr_npage = (size32_t)npage;
2258 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2259 			pmp->pr_mflags = 0;
2260 			if (prot & PROT_READ)
2261 				pmp->pr_mflags |= MA_READ;
2262 			if (prot & PROT_WRITE)
2263 				pmp->pr_mflags |= MA_WRITE;
2264 			if (prot & PROT_EXEC)
2265 				pmp->pr_mflags |= MA_EXEC;
2266 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2267 				pmp->pr_mflags |= MA_SHARED;
2268 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2269 				pmp->pr_mflags |= MA_NORESERVE;
2270 			if (seg->s_ops == &segspt_shmops ||
2271 			    (seg->s_ops == &segvn_ops &&
2272 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2273 				pmp->pr_mflags |= MA_ANON;
2274 			if (seg->s_ops == &segspt_shmops)
2275 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2276 			pmp->pr_pagesize = PAGESIZE;
2277 			/*
2278 			 * Manufacture a filename for the "object" directory.
2279 			 */
2280 			vattr.va_mask = AT_FSID|AT_NODEID;
2281 			if (seg->s_ops == &segvn_ops &&
2282 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2283 			    vp != NULL && vp->v_type == VREG &&
2284 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2285 				if (vp == p->p_exec)
2286 					(void) strcpy(pmp->pr_mapname, "a.out");
2287 				else
2288 					pr_object_name(pmp->pr_mapname,
2289 					    vp, &vattr);
2290 			}
2291 
2292 			/*
2293 			 * Get the SysV shared memory id, if any.
2294 			 */
2295 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2296 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2297 			    SHMID_NONE) {
2298 				if (pmp->pr_shmid == SHMID_FREE)
2299 					pmp->pr_shmid = -1;
2300 
2301 				pmp->pr_mflags |= MA_SHM;
2302 			} else {
2303 				pmp->pr_shmid = -1;
2304 			}
2305 
2306 			hat_getstat(as, saddr, len, hatid,
2307 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2308 			pmp = (prasmap32_t *)next;
2309 		}
2310 		ASSERT(tmp == NULL);
2311 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2312 
2313 	AS_LOCK_EXIT(as);
2314 
2315 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2316 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2317 	kmem_free(buf, size);
2318 
2319 	return (error);
2320 }
2321 #endif	/* _SYSCALL32_IMPL */
2322 
2323 ushort_t
2324 prgetpctcpu(uint64_t pct)
2325 {
2326 	/*
2327 	 * The value returned will be relevant in the zone of the examiner,
2328 	 * which may not be the same as the zone which performed the procfs
2329 	 * mount.
2330 	 */
2331 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2332 
2333 	/*
2334 	 * Prorate over online cpus so we don't exceed 100%
2335 	 */
2336 	if (nonline > 1)
2337 		pct /= nonline;
2338 	pct >>= 16;		/* convert to 16-bit scaled integer */
2339 	if (pct > 0x8000)	/* might happen, due to rounding */
2340 		pct = 0x8000;
2341 	return ((ushort_t)pct);
2342 }
2343 
2344 /*
2345  * Return information used by ps(1).
2346  */
2347 void
2348 prgetpsinfo(proc_t *p, psinfo_t *psp)
2349 {
2350 	kthread_t *t;
2351 	struct cred *cred;
2352 	hrtime_t hrutime, hrstime;
2353 
2354 	ASSERT(MUTEX_HELD(&p->p_lock));
2355 
2356 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2357 		bzero(psp, sizeof (*psp));
2358 	else {
2359 		thread_unlock(t);
2360 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2361 	}
2362 
2363 	/*
2364 	 * only export SSYS and SMSACCT; everything else is off-limits to
2365 	 * userland apps.
2366 	 */
2367 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2368 	psp->pr_nlwp = p->p_lwpcnt;
2369 	psp->pr_nzomb = p->p_zombcnt;
2370 	mutex_enter(&p->p_crlock);
2371 	cred = p->p_cred;
2372 	psp->pr_uid = crgetruid(cred);
2373 	psp->pr_euid = crgetuid(cred);
2374 	psp->pr_gid = crgetrgid(cred);
2375 	psp->pr_egid = crgetgid(cred);
2376 	mutex_exit(&p->p_crlock);
2377 	psp->pr_pid = p->p_pid;
2378 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2379 	    (p->p_flag & SZONETOP)) {
2380 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2381 		/*
2382 		 * Inside local zones, fake zsched's pid as parent pids for
2383 		 * processes which reference processes outside of the zone.
2384 		 */
2385 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2386 	} else {
2387 		psp->pr_ppid = p->p_ppid;
2388 	}
2389 	psp->pr_pgid = p->p_pgrp;
2390 	psp->pr_sid = p->p_sessp->s_sid;
2391 	psp->pr_taskid = p->p_task->tk_tkid;
2392 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2393 	psp->pr_poolid = p->p_pool->pool_id;
2394 	psp->pr_zoneid = p->p_zone->zone_id;
2395 	if ((psp->pr_contract = PRCTID(p)) == 0)
2396 		psp->pr_contract = -1;
2397 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2398 	switch (p->p_model) {
2399 	case DATAMODEL_ILP32:
2400 		psp->pr_dmodel = PR_MODEL_ILP32;
2401 		break;
2402 	case DATAMODEL_LP64:
2403 		psp->pr_dmodel = PR_MODEL_LP64;
2404 		break;
2405 	}
2406 	hrutime = mstate_aggr_state(p, LMS_USER);
2407 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2408 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2409 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2410 
2411 	if (t == NULL) {
2412 		int wcode = p->p_wcode;		/* must be atomic read */
2413 
2414 		if (wcode)
2415 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2416 		psp->pr_ttydev = PRNODEV;
2417 		psp->pr_lwp.pr_state = SZOMB;
2418 		psp->pr_lwp.pr_sname = 'Z';
2419 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2420 		psp->pr_lwp.pr_bindpset = PS_NONE;
2421 	} else {
2422 		user_t *up = PTOU(p);
2423 		struct as *as;
2424 		dev_t d;
2425 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2426 
2427 		d = cttydev(p);
2428 		/*
2429 		 * If the controlling terminal is the real
2430 		 * or workstation console device, map to what the
2431 		 * user thinks is the console device. Handle case when
2432 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2433 		 */
2434 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2435 			d = uconsdev;
2436 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2437 		psp->pr_start = up->u_start;
2438 		bcopy(up->u_comm, psp->pr_fname,
2439 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2440 		bcopy(up->u_psargs, psp->pr_psargs,
2441 		    MIN(PRARGSZ-1, PSARGSZ));
2442 		psp->pr_argc = up->u_argc;
2443 		psp->pr_argv = up->u_argv;
2444 		psp->pr_envp = up->u_envp;
2445 
2446 		/* get the chosen lwp's lwpsinfo */
2447 		prgetlwpsinfo(t, &psp->pr_lwp);
2448 
2449 		/* compute %cpu for the process */
2450 		if (p->p_lwpcnt == 1)
2451 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2452 		else {
2453 			uint64_t pct = 0;
2454 			hrtime_t cur_time = gethrtime_unscaled();
2455 
2456 			t = p->p_tlist;
2457 			do {
2458 				pct += cpu_update_pct(t, cur_time);
2459 			} while ((t = t->t_forw) != p->p_tlist);
2460 
2461 			psp->pr_pctcpu = prgetpctcpu(pct);
2462 		}
2463 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2464 			psp->pr_size = 0;
2465 			psp->pr_rssize = 0;
2466 		} else {
2467 			mutex_exit(&p->p_lock);
2468 			AS_LOCK_ENTER(as, RW_READER);
2469 			psp->pr_size = btopr(as->a_resvsize) *
2470 			    (PAGESIZE / 1024);
2471 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2472 			psp->pr_pctmem = rm_pctmemory(as);
2473 			AS_LOCK_EXIT(as);
2474 			mutex_enter(&p->p_lock);
2475 		}
2476 	}
2477 }
2478 
2479 static size_t
2480 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2481 {
2482 	pr_misc_header_t *misc;
2483 	size_t len;
2484 
2485 	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2486 
2487 	if (data != NULL) {
2488 		misc = pr_iol_newbuf(data, len);
2489 		misc->pr_misc_type = type;
2490 		misc->pr_misc_size = len;
2491 		misc++;
2492 		bcopy((char *)val, (char *)misc, vlen);
2493 	}
2494 
2495 	return (len);
2496 }
2497 
2498 /*
2499  * There's no elegant way to determine if a character device
2500  * supports TLI, so just check a hardcoded list of known TLI
2501  * devices.
2502  */
2503 
2504 static boolean_t
2505 pristli(vnode_t *vp)
2506 {
2507 	static const char *tlidevs[] = {
2508 	    "udp", "udp6", "tcp", "tcp6"
2509 	};
2510 	char *devname;
2511 	uint_t i;
2512 
2513 	ASSERT(vp != NULL);
2514 
2515 	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2516 		return (B_FALSE);
2517 
2518 	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2519 		return (B_FALSE);
2520 
2521 	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2522 		if (strcmp(devname, tlidevs[i]) == 0)
2523 			return (B_TRUE);
2524 	}
2525 
2526 	return (B_FALSE);
2527 }
2528 
2529 static size_t
2530 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2531 {
2532 	char *pathname;
2533 	size_t pathlen;
2534 	size_t sz = 0;
2535 
2536 	/*
2537 	 * The global zone's path to a file in a non-global zone can exceed
2538 	 * MAXPATHLEN.
2539 	 */
2540 	pathlen = MAXPATHLEN * 2 + 1;
2541 	pathname = kmem_alloc(pathlen, KM_SLEEP);
2542 
2543 	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2544 		sz += prfdinfomisc(data, PR_PATHNAME,
2545 		    pathname, strlen(pathname) + 1);
2546 	}
2547 
2548 	kmem_free(pathname, pathlen);
2549 
2550 	return (sz);
2551 }
2552 
2553 static size_t
2554 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2555 {
2556 	strcmd_t strcmd;
2557 	int32_t rval;
2558 	size_t sz = 0;
2559 
2560 	strcmd.sc_cmd = TI_GETMYNAME;
2561 	strcmd.sc_timeout = 1;
2562 	strcmd.sc_len = STRCMDBUFSIZE;
2563 
2564 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2565 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2566 		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2567 		    strcmd.sc_len);
2568 	}
2569 
2570 	strcmd.sc_cmd = TI_GETPEERNAME;
2571 	strcmd.sc_timeout = 1;
2572 	strcmd.sc_len = STRCMDBUFSIZE;
2573 
2574 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2575 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2576 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2577 		    strcmd.sc_len);
2578 	}
2579 
2580 	return (sz);
2581 }
2582 
2583 static size_t
2584 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2585 {
2586 	sonode_t *so;
2587 	socklen_t vlen;
2588 	size_t sz = 0;
2589 	uint_t i;
2590 
2591 	if (vp->v_stream != NULL) {
2592 		so = VTOSO(vp->v_stream->sd_vnode);
2593 
2594 		if (so->so_version == SOV_STREAM)
2595 			so = NULL;
2596 	} else {
2597 		so = VTOSO(vp);
2598 	}
2599 
2600 	if (so == NULL)
2601 		return (0);
2602 
2603 	DTRACE_PROBE1(sonode, sonode_t *, so);
2604 
2605 	/* prmisc - PR_SOCKETNAME */
2606 
2607 	struct sockaddr_storage buf;
2608 	struct sockaddr *name = (struct sockaddr *)&buf;
2609 
2610 	vlen = sizeof (buf);
2611 	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2612 		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2613 
2614 	/* prmisc - PR_PEERSOCKNAME */
2615 
2616 	vlen = sizeof (buf);
2617 	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2618 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2619 
2620 	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2621 
2622 	static struct boolopt {
2623 		int		level;
2624 		int		opt;
2625 		int		bopt;
2626 	} boolopts[] = {
2627 		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
2628 		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
2629 #ifdef SO_REUSEPORT
2630 		/* SmartOS and OmniOS have SO_REUSEPORT */
2631 		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
2632 #endif
2633 		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
2634 		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
2635 		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
2636 		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
2637 		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
2638 		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
2639 		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
2640 		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
2641 		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
2642 		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
2643 		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2644 		    PR_UDP_NAT_T_ENDPOINT }
2645 	};
2646 	prsockopts_bool_opts_t opts;
2647 	int val;
2648 
2649 	if (data != NULL) {
2650 		opts.prsock_bool_opts = 0;
2651 
2652 		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2653 			vlen = sizeof (val);
2654 			if (SOP_GETSOCKOPT(so, boolopts[i].level,
2655 			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2656 			    val != 0) {
2657 				opts.prsock_bool_opts |= boolopts[i].bopt;
2658 			}
2659 		}
2660 	}
2661 
2662 	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2663 
2664 	/* prmisc - PR_SOCKOPT_LINGER */
2665 
2666 	struct linger l;
2667 
2668 	vlen = sizeof (l);
2669 	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2670 	    0, cred) == 0 && vlen > 0) {
2671 		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2672 	}
2673 
2674 	/* prmisc - PR_SOCKOPT_* int types */
2675 
2676 	static struct sopt {
2677 		int		level;
2678 		int		opt;
2679 		int		bopt;
2680 	} sopts[] = {
2681 		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
2682 		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
2683 		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
2684 	};
2685 
2686 	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2687 		vlen = sizeof (val);
2688 		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2689 		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
2690 			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2691 		}
2692 	}
2693 
2694 	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2695 
2696 	in_addr_t nexthop_val;
2697 
2698 	vlen = sizeof (nexthop_val);
2699 	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2700 	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2701 		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2702 		    &nexthop_val, vlen);
2703 	}
2704 
2705 	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2706 
2707 	struct sockaddr_in6 nexthop6_val;
2708 
2709 	vlen = sizeof (nexthop6_val);
2710 	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2711 	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2712 		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2713 		    &nexthop6_val, vlen);
2714 	}
2715 
2716 	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2717 
2718 	char cong[CC_ALGO_NAME_MAX];
2719 
2720 	vlen = sizeof (cong);
2721 	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2722 	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2723 		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2724 	}
2725 
2726 	/* prmisc - PR_SOCKFILTERS_PRIV */
2727 
2728 	struct fil_info fi;
2729 
2730 	vlen = sizeof (fi);
2731 	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2732 	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2733 		pr_misc_header_t *misc;
2734 		size_t len;
2735 
2736 		/*
2737 		 * We limit the number of returned filters to 32.
2738 		 * This is the maximum number that pfiles will print
2739 		 * anyway.
2740 		 */
2741 		vlen = MIN(32, fi.fi_pos + 1);
2742 		vlen *= sizeof (fi);
2743 
2744 		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2745 		sz += len;
2746 
2747 		if (data != NULL) {
2748 			/*
2749 			 * So that the filter list can be built incrementally,
2750 			 * prfdinfomisc() is not used here. Instead we
2751 			 * allocate a buffer directly on the copyout list using
2752 			 * pr_iol_newbuf()
2753 			 */
2754 			misc = pr_iol_newbuf(data, len);
2755 			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2756 			misc->pr_misc_size = len;
2757 			misc++;
2758 			len = vlen;
2759 			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2760 			    misc, &vlen, 0, cred) == 0) {
2761 				/*
2762 				 * In case the number of filters has reduced
2763 				 * since the first call, explicitly zero out
2764 				 * any unpopulated space.
2765 				 */
2766 				if (vlen < len)
2767 					bzero(misc + vlen, len - vlen);
2768 			} else {
2769 				/* Something went wrong, zero out the result */
2770 				bzero(misc, vlen);
2771 			}
2772 		}
2773 	}
2774 
2775 	return (sz);
2776 }
2777 
2778 typedef struct prfdinfo_nm_path_cbdata {
2779 	proc_t		*nmp_p;
2780 	u_offset_t	nmp_sz;
2781 	list_t		*nmp_data;
2782 } prfdinfo_nm_path_cbdata_t;
2783 
2784 static int
2785 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2786 {
2787 	prfdinfo_nm_path_cbdata_t *cb = arg;
2788 
2789 	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2790 
2791 	return (0);
2792 }
2793 
2794 u_offset_t
2795 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2796 {
2797 	u_offset_t sz;
2798 
2799 	/*
2800 	 * All fdinfo files will be at least this big -
2801 	 * sizeof fdinfo struct + zero length trailer
2802 	 */
2803 	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2804 
2805 	/* Pathname */
2806 	switch (vp->v_type) {
2807 	case VDOOR: {
2808 		prfdinfo_nm_path_cbdata_t cb = {
2809 			.nmp_p		= p,
2810 			.nmp_data	= NULL,
2811 			.nmp_sz		= 0
2812 		};
2813 
2814 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2815 		sz += cb.nmp_sz;
2816 		break;
2817 	}
2818 	case VSOCK:
2819 		break;
2820 	default:
2821 		sz += prfdinfopath(p, vp, NULL, cred);
2822 	}
2823 
2824 	/* Socket options */
2825 	if (vp->v_type == VSOCK)
2826 		sz += prfdinfosockopt(vp, NULL, cred);
2827 
2828 	/* TLI/XTI sockets */
2829 	if (pristli(vp))
2830 		sz += prfdinfotlisockopt(vp, NULL, cred);
2831 
2832 	return (sz);
2833 }
2834 
2835 int
2836 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2837     cred_t *file_cred, list_t *data)
2838 {
2839 	vattr_t vattr;
2840 	int error;
2841 
2842 	/*
2843 	 * The buffer has been initialised to zero by pr_iol_newbuf().
2844 	 * Initialise defaults for any values that should not default to zero.
2845 	 */
2846 	fdinfo->pr_uid = (uid_t)-1;
2847 	fdinfo->pr_gid = (gid_t)-1;
2848 	fdinfo->pr_size = -1;
2849 	fdinfo->pr_locktype = F_UNLCK;
2850 	fdinfo->pr_lockpid = -1;
2851 	fdinfo->pr_locksysid = -1;
2852 	fdinfo->pr_peerpid = -1;
2853 
2854 	/* Offset */
2855 
2856 	/*
2857 	 * pr_offset has already been set from the underlying file_t.
2858 	 * Check if it is plausible and reset to -1 if not.
2859 	 */
2860 	if (fdinfo->pr_offset != -1 &&
2861 	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2862 		fdinfo->pr_offset = -1;
2863 
2864 	/*
2865 	 * Attributes
2866 	 *
2867 	 * We have two cred_t structures available here.
2868 	 * 'cred' is the caller's credential, and 'file_cred' is the credential
2869 	 * for the file being inspected.
2870 	 *
2871 	 * When looking up the file attributes, file_cred is used in order
2872 	 * that the correct ownership is set for doors and FIFOs. Since the
2873 	 * caller has permission to read the fdinfo file in proc, this does
2874 	 * not expose any additional information.
2875 	 */
2876 	vattr.va_mask = AT_STAT;
2877 	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
2878 		fdinfo->pr_major = getmajor(vattr.va_fsid);
2879 		fdinfo->pr_minor = getminor(vattr.va_fsid);
2880 		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2881 		fdinfo->pr_rminor = getminor(vattr.va_rdev);
2882 		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2883 		fdinfo->pr_size = (off64_t)vattr.va_size;
2884 		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2885 		fdinfo->pr_uid = vattr.va_uid;
2886 		fdinfo->pr_gid = vattr.va_gid;
2887 		if (vp->v_type == VSOCK)
2888 			fdinfo->pr_fileflags |= sock_getfasync(vp);
2889 	}
2890 
2891 	/* locks */
2892 
2893 	flock64_t bf;
2894 
2895 	bzero(&bf, sizeof (bf));
2896 	bf.l_type = F_WRLCK;
2897 
2898 	if (VOP_FRLOCK(vp, F_GETLK, &bf,
2899 	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
2900 	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
2901 		fdinfo->pr_locktype = bf.l_type;
2902 		fdinfo->pr_lockpid = bf.l_pid;
2903 		fdinfo->pr_locksysid = bf.l_sysid;
2904 	}
2905 
2906 	/* peer cred */
2907 
2908 	k_peercred_t kpc;
2909 
2910 	switch (vp->v_type) {
2911 	case VFIFO:
2912 	case VSOCK: {
2913 		int32_t rval;
2914 
2915 		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
2916 		    FKIOCTL, cred, &rval, NULL);
2917 		break;
2918 	}
2919 	case VCHR: {
2920 		struct strioctl strioc;
2921 		int32_t rval;
2922 
2923 		if (vp->v_stream == NULL) {
2924 			error = ENOTSUP;
2925 			break;
2926 		}
2927 		strioc.ic_cmd = _I_GETPEERCRED;
2928 		strioc.ic_timout = INFTIM;
2929 		strioc.ic_len = (int)sizeof (k_peercred_t);
2930 		strioc.ic_dp = (char *)&kpc;
2931 
2932 		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
2933 		    STR_NOSIG | K_TO_K, cred, &rval);
2934 		break;
2935 	}
2936 	default:
2937 		error = ENOTSUP;
2938 		break;
2939 	}
2940 
2941 	if (error == 0 && kpc.pc_cr != NULL) {
2942 		proc_t *peerp;
2943 
2944 		fdinfo->pr_peerpid = kpc.pc_cpid;
2945 
2946 		crfree(kpc.pc_cr);
2947 
2948 		mutex_enter(&pidlock);
2949 		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
2950 			user_t *up;
2951 
2952 			mutex_enter(&peerp->p_lock);
2953 			mutex_exit(&pidlock);
2954 
2955 			up = PTOU(peerp);
2956 			bcopy(up->u_comm, fdinfo->pr_peername,
2957 			    MIN(sizeof (up->u_comm),
2958 			    sizeof (fdinfo->pr_peername) - 1));
2959 
2960 			mutex_exit(&peerp->p_lock);
2961 		} else {
2962 			mutex_exit(&pidlock);
2963 		}
2964 	}
2965 
2966 	/* pathname */
2967 
2968 	switch (vp->v_type) {
2969 	case VDOOR: {
2970 		prfdinfo_nm_path_cbdata_t cb = {
2971 			.nmp_p		= p,
2972 			.nmp_data	= data,
2973 			.nmp_sz		= 0
2974 		};
2975 
2976 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2977 		break;
2978 	}
2979 	case VSOCK:
2980 		/*
2981 		 * Don't attempt to determine the path for a socket as the
2982 		 * vnode has no associated v_path. It will cause a linear scan
2983 		 * of the dnlc table and result in no path being found.
2984 		 */
2985 		break;
2986 	default:
2987 		(void) prfdinfopath(p, vp, data, cred);
2988 	}
2989 
2990 	/* socket options */
2991 	if (vp->v_type == VSOCK)
2992 		(void) prfdinfosockopt(vp, data, cred);
2993 
2994 	/* TLI/XTI stream sockets */
2995 	if (pristli(vp))
2996 		(void) prfdinfotlisockopt(vp, data, cred);
2997 
2998 	/*
2999 	 * Add a terminating header with a zero size.
3000 	 */
3001 	pr_misc_header_t *misc;
3002 
3003 	misc = pr_iol_newbuf(data, sizeof (*misc));
3004 	misc->pr_misc_size = 0;
3005 	misc->pr_misc_type = (uint_t)-1;
3006 
3007 	return (0);
3008 }
3009 
3010 #ifdef _SYSCALL32_IMPL
3011 void
3012 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
3013 {
3014 	kthread_t *t;
3015 	struct cred *cred;
3016 	hrtime_t hrutime, hrstime;
3017 
3018 	ASSERT(MUTEX_HELD(&p->p_lock));
3019 
3020 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
3021 		bzero(psp, sizeof (*psp));
3022 	else {
3023 		thread_unlock(t);
3024 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3025 	}
3026 
3027 	/*
3028 	 * only export SSYS and SMSACCT; everything else is off-limits to
3029 	 * userland apps.
3030 	 */
3031 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3032 	psp->pr_nlwp = p->p_lwpcnt;
3033 	psp->pr_nzomb = p->p_zombcnt;
3034 	mutex_enter(&p->p_crlock);
3035 	cred = p->p_cred;
3036 	psp->pr_uid = crgetruid(cred);
3037 	psp->pr_euid = crgetuid(cred);
3038 	psp->pr_gid = crgetrgid(cred);
3039 	psp->pr_egid = crgetgid(cred);
3040 	mutex_exit(&p->p_crlock);
3041 	psp->pr_pid = p->p_pid;
3042 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3043 	    (p->p_flag & SZONETOP)) {
3044 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3045 		/*
3046 		 * Inside local zones, fake zsched's pid as parent pids for
3047 		 * processes which reference processes outside of the zone.
3048 		 */
3049 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3050 	} else {
3051 		psp->pr_ppid = p->p_ppid;
3052 	}
3053 	psp->pr_pgid = p->p_pgrp;
3054 	psp->pr_sid = p->p_sessp->s_sid;
3055 	psp->pr_taskid = p->p_task->tk_tkid;
3056 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
3057 	psp->pr_poolid = p->p_pool->pool_id;
3058 	psp->pr_zoneid = p->p_zone->zone_id;
3059 	if ((psp->pr_contract = PRCTID(p)) == 0)
3060 		psp->pr_contract = -1;
3061 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3062 	switch (p->p_model) {
3063 	case DATAMODEL_ILP32:
3064 		psp->pr_dmodel = PR_MODEL_ILP32;
3065 		break;
3066 	case DATAMODEL_LP64:
3067 		psp->pr_dmodel = PR_MODEL_LP64;
3068 		break;
3069 	}
3070 	hrutime = mstate_aggr_state(p, LMS_USER);
3071 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3072 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3073 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3074 
3075 	if (t == NULL) {
3076 		extern int wstat(int, int);	/* needs a header file */
3077 		int wcode = p->p_wcode;		/* must be atomic read */
3078 
3079 		if (wcode)
3080 			psp->pr_wstat = wstat(wcode, p->p_wdata);
3081 		psp->pr_ttydev = PRNODEV32;
3082 		psp->pr_lwp.pr_state = SZOMB;
3083 		psp->pr_lwp.pr_sname = 'Z';
3084 	} else {
3085 		user_t *up = PTOU(p);
3086 		struct as *as;
3087 		dev_t d;
3088 		extern dev_t rwsconsdev, rconsdev, uconsdev;
3089 
3090 		d = cttydev(p);
3091 		/*
3092 		 * If the controlling terminal is the real
3093 		 * or workstation console device, map to what the
3094 		 * user thinks is the console device. Handle case when
3095 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3096 		 */
3097 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3098 			d = uconsdev;
3099 		(void) cmpldev(&psp->pr_ttydev, d);
3100 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3101 		bcopy(up->u_comm, psp->pr_fname,
3102 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3103 		bcopy(up->u_psargs, psp->pr_psargs,
3104 		    MIN(PRARGSZ-1, PSARGSZ));
3105 		psp->pr_argc = up->u_argc;
3106 		psp->pr_argv = (caddr32_t)up->u_argv;
3107 		psp->pr_envp = (caddr32_t)up->u_envp;
3108 
3109 		/* get the chosen lwp's lwpsinfo */
3110 		prgetlwpsinfo32(t, &psp->pr_lwp);
3111 
3112 		/* compute %cpu for the process */
3113 		if (p->p_lwpcnt == 1)
3114 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3115 		else {
3116 			uint64_t pct = 0;
3117 			hrtime_t cur_time;
3118 
3119 			t = p->p_tlist;
3120 			cur_time = gethrtime_unscaled();
3121 			do {
3122 				pct += cpu_update_pct(t, cur_time);
3123 			} while ((t = t->t_forw) != p->p_tlist);
3124 
3125 			psp->pr_pctcpu = prgetpctcpu(pct);
3126 		}
3127 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3128 			psp->pr_size = 0;
3129 			psp->pr_rssize = 0;
3130 		} else {
3131 			mutex_exit(&p->p_lock);
3132 			AS_LOCK_ENTER(as, RW_READER);
3133 			psp->pr_size = (size32_t)
3134 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3135 			psp->pr_rssize = (size32_t)
3136 			    (rm_asrss(as) * (PAGESIZE / 1024));
3137 			psp->pr_pctmem = rm_pctmemory(as);
3138 			AS_LOCK_EXIT(as);
3139 			mutex_enter(&p->p_lock);
3140 		}
3141 	}
3142 
3143 	/*
3144 	 * If we are looking at an LP64 process, zero out
3145 	 * the fields that cannot be represented in ILP32.
3146 	 */
3147 	if (p->p_model != DATAMODEL_ILP32) {
3148 		psp->pr_size = 0;
3149 		psp->pr_rssize = 0;
3150 		psp->pr_argv = 0;
3151 		psp->pr_envp = 0;
3152 	}
3153 }
3154 
3155 #endif	/* _SYSCALL32_IMPL */
3156 
3157 void
3158 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3159 {
3160 	klwp_t *lwp = ttolwp(t);
3161 	sobj_ops_t *sobj;
3162 	char c, state;
3163 	uint64_t pct;
3164 	int retval, niceval;
3165 	hrtime_t hrutime, hrstime;
3166 
3167 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3168 
3169 	bzero(psp, sizeof (*psp));
3170 
3171 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3172 	psp->pr_lwpid = t->t_tid;
3173 	psp->pr_addr = (uintptr_t)t;
3174 	psp->pr_wchan = (uintptr_t)t->t_wchan;
3175 
3176 	/* map the thread state enum into a process state enum */
3177 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3178 	switch (state) {
3179 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3180 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3181 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3182 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3183 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3184 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3185 	default:		state = 0;		c = '?';	break;
3186 	}
3187 	psp->pr_state = state;
3188 	psp->pr_sname = c;
3189 	if ((sobj = t->t_sobj_ops) != NULL)
3190 		psp->pr_stype = SOBJ_TYPE(sobj);
3191 	retval = CL_DONICE(t, NULL, 0, &niceval);
3192 	if (retval == 0) {
3193 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3194 		psp->pr_nice = niceval + NZERO;
3195 	}
3196 	psp->pr_syscall = t->t_sysnum;
3197 	psp->pr_pri = t->t_pri;
3198 	psp->pr_start.tv_sec = t->t_start;
3199 	psp->pr_start.tv_nsec = 0L;
3200 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3201 	scalehrtime(&hrutime);
3202 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3203 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3204 	scalehrtime(&hrstime);
3205 	hrt2ts(hrutime + hrstime, &psp->pr_time);
3206 	/* compute %cpu for the lwp */
3207 	pct = cpu_update_pct(t, gethrtime_unscaled());
3208 	psp->pr_pctcpu = prgetpctcpu(pct);
3209 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3210 	if (psp->pr_cpu > 99)
3211 		psp->pr_cpu = 99;
3212 
3213 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3214 	    sizeof (psp->pr_clname) - 1);
3215 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3216 	psp->pr_onpro = t->t_cpu->cpu_id;
3217 	psp->pr_bindpro = t->t_bind_cpu;
3218 	psp->pr_bindpset = t->t_bind_pset;
3219 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3220 }
3221 
3222 #ifdef _SYSCALL32_IMPL
3223 void
3224 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3225 {
3226 	klwp_t *lwp = ttolwp(t);
3227 	sobj_ops_t *sobj;
3228 	char c, state;
3229 	uint64_t pct;
3230 	int retval, niceval;
3231 	hrtime_t hrutime, hrstime;
3232 
3233 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3234 
3235 	bzero(psp, sizeof (*psp));
3236 
3237 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3238 	psp->pr_lwpid = t->t_tid;
3239 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3240 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
3241 
3242 	/* map the thread state enum into a process state enum */
3243 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3244 	switch (state) {
3245 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3246 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3247 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3248 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3249 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3250 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3251 	default:		state = 0;		c = '?';	break;
3252 	}
3253 	psp->pr_state = state;
3254 	psp->pr_sname = c;
3255 	if ((sobj = t->t_sobj_ops) != NULL)
3256 		psp->pr_stype = SOBJ_TYPE(sobj);
3257 	retval = CL_DONICE(t, NULL, 0, &niceval);
3258 	if (retval == 0) {
3259 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3260 		psp->pr_nice = niceval + NZERO;
3261 	} else {
3262 		psp->pr_oldpri = 0;
3263 		psp->pr_nice = 0;
3264 	}
3265 	psp->pr_syscall = t->t_sysnum;
3266 	psp->pr_pri = t->t_pri;
3267 	psp->pr_start.tv_sec = (time32_t)t->t_start;
3268 	psp->pr_start.tv_nsec = 0L;
3269 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3270 	scalehrtime(&hrutime);
3271 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3272 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3273 	scalehrtime(&hrstime);
3274 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3275 	/* compute %cpu for the lwp */
3276 	pct = cpu_update_pct(t, gethrtime_unscaled());
3277 	psp->pr_pctcpu = prgetpctcpu(pct);
3278 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3279 	if (psp->pr_cpu > 99)
3280 		psp->pr_cpu = 99;
3281 
3282 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3283 	    sizeof (psp->pr_clname) - 1);
3284 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3285 	psp->pr_onpro = t->t_cpu->cpu_id;
3286 	psp->pr_bindpro = t->t_bind_cpu;
3287 	psp->pr_bindpset = t->t_bind_pset;
3288 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3289 }
3290 #endif	/* _SYSCALL32_IMPL */
3291 
3292 #ifdef _SYSCALL32_IMPL
3293 
3294 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
3295 
3296 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
3297 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
3298 		d->field = s->field;				\
3299 	}
3300 
3301 #define	PR_COPY_TIMESPEC(s, d, field)				\
3302 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3303 
3304 #define	PR_COPY_BUF(s, d, field)				\
3305 	bcopy(s->field, d->field, sizeof (d->field));
3306 
3307 #define	PR_IGNORE_FIELD(s, d, field)
3308 
3309 void
3310 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3311 {
3312 	bzero(dest, sizeof (*dest));
3313 
3314 	PR_COPY_FIELD(src, dest, pr_flag);
3315 	PR_COPY_FIELD(src, dest, pr_lwpid);
3316 	PR_IGNORE_FIELD(src, dest, pr_addr);
3317 	PR_IGNORE_FIELD(src, dest, pr_wchan);
3318 	PR_COPY_FIELD(src, dest, pr_stype);
3319 	PR_COPY_FIELD(src, dest, pr_state);
3320 	PR_COPY_FIELD(src, dest, pr_sname);
3321 	PR_COPY_FIELD(src, dest, pr_nice);
3322 	PR_COPY_FIELD(src, dest, pr_syscall);
3323 	PR_COPY_FIELD(src, dest, pr_oldpri);
3324 	PR_COPY_FIELD(src, dest, pr_cpu);
3325 	PR_COPY_FIELD(src, dest, pr_pri);
3326 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3327 	PR_COPY_TIMESPEC(src, dest, pr_start);
3328 	PR_COPY_BUF(src, dest, pr_clname);
3329 	PR_COPY_BUF(src, dest, pr_name);
3330 	PR_COPY_FIELD(src, dest, pr_onpro);
3331 	PR_COPY_FIELD(src, dest, pr_bindpro);
3332 	PR_COPY_FIELD(src, dest, pr_bindpset);
3333 	PR_COPY_FIELD(src, dest, pr_lgrp);
3334 }
3335 
3336 void
3337 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3338 {
3339 	bzero(dest, sizeof (*dest));
3340 
3341 	PR_COPY_FIELD(src, dest, pr_flag);
3342 	PR_COPY_FIELD(src, dest, pr_nlwp);
3343 	PR_COPY_FIELD(src, dest, pr_pid);
3344 	PR_COPY_FIELD(src, dest, pr_ppid);
3345 	PR_COPY_FIELD(src, dest, pr_pgid);
3346 	PR_COPY_FIELD(src, dest, pr_sid);
3347 	PR_COPY_FIELD(src, dest, pr_uid);
3348 	PR_COPY_FIELD(src, dest, pr_euid);
3349 	PR_COPY_FIELD(src, dest, pr_gid);
3350 	PR_COPY_FIELD(src, dest, pr_egid);
3351 	PR_IGNORE_FIELD(src, dest, pr_addr);
3352 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
3353 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3354 	PR_COPY_FIELD(src, dest, pr_ttydev);
3355 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3356 	PR_COPY_FIELD(src, dest, pr_pctmem);
3357 	PR_COPY_TIMESPEC(src, dest, pr_start);
3358 	PR_COPY_TIMESPEC(src, dest, pr_time);
3359 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
3360 	PR_COPY_BUF(src, dest, pr_fname);
3361 	PR_COPY_BUF(src, dest, pr_psargs);
3362 	PR_COPY_FIELD(src, dest, pr_wstat);
3363 	PR_COPY_FIELD(src, dest, pr_argc);
3364 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3365 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3366 	PR_COPY_FIELD(src, dest, pr_dmodel);
3367 	PR_COPY_FIELD(src, dest, pr_taskid);
3368 	PR_COPY_FIELD(src, dest, pr_projid);
3369 	PR_COPY_FIELD(src, dest, pr_nzomb);
3370 	PR_COPY_FIELD(src, dest, pr_poolid);
3371 	PR_COPY_FIELD(src, dest, pr_contract);
3372 	PR_COPY_FIELD(src, dest, pr_poolid);
3373 	PR_COPY_FIELD(src, dest, pr_poolid);
3374 
3375 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3376 }
3377 
3378 #undef	PR_COPY_FIELD
3379 #undef	PR_COPY_FIELD_ILP32
3380 #undef	PR_COPY_TIMESPEC
3381 #undef	PR_COPY_BUF
3382 #undef	PR_IGNORE_FIELD
3383 
3384 #endif	/* _SYSCALL32_IMPL */
3385 
3386 /*
3387  * This used to get called when microstate accounting was disabled but
3388  * microstate information was requested.  Since Microstate accounting is on
3389  * regardless of the proc flags, this simply makes it appear to procfs that
3390  * microstate accounting is on.  This is relatively meaningless since you
3391  * can't turn it off, but this is here for the sake of appearances.
3392  */
3393 
3394 /*ARGSUSED*/
3395 void
3396 estimate_msacct(kthread_t *t, hrtime_t curtime)
3397 {
3398 	proc_t *p;
3399 
3400 	if (t == NULL)
3401 		return;
3402 
3403 	p = ttoproc(t);
3404 	ASSERT(MUTEX_HELD(&p->p_lock));
3405 
3406 	/*
3407 	 * A system process (p0) could be referenced if the thread is
3408 	 * in the process of exiting.  Don't turn on microstate accounting
3409 	 * in that case.
3410 	 */
3411 	if (p->p_flag & SSYS)
3412 		return;
3413 
3414 	/*
3415 	 * Loop through all the LWPs (kernel threads) in the process.
3416 	 */
3417 	t = p->p_tlist;
3418 	do {
3419 		t->t_proc_flag |= TP_MSACCT;
3420 	} while ((t = t->t_forw) != p->p_tlist);
3421 
3422 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
3423 }
3424 
3425 /*
3426  * It's not really possible to disable microstate accounting anymore.
3427  * However, this routine simply turns off the ms accounting flags in a process
3428  * This way procfs can still pretend to turn microstate accounting on and
3429  * off for a process, but it actually doesn't do anything.  This is
3430  * a neutered form of preemptive idiot-proofing.
3431  */
3432 void
3433 disable_msacct(proc_t *p)
3434 {
3435 	kthread_t *t;
3436 
3437 	ASSERT(MUTEX_HELD(&p->p_lock));
3438 
3439 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
3440 	/*
3441 	 * Loop through all the LWPs (kernel threads) in the process.
3442 	 */
3443 	if ((t = p->p_tlist) != NULL) {
3444 		do {
3445 			/* clear per-thread flag */
3446 			t->t_proc_flag &= ~TP_MSACCT;
3447 		} while ((t = t->t_forw) != p->p_tlist);
3448 	}
3449 }
3450 
3451 /*
3452  * Return resource usage information.
3453  */
3454 void
3455 prgetusage(kthread_t *t, prhusage_t *pup)
3456 {
3457 	klwp_t *lwp = ttolwp(t);
3458 	hrtime_t *mstimep;
3459 	struct mstate *ms = &lwp->lwp_mstate;
3460 	int state;
3461 	int i;
3462 	hrtime_t curtime;
3463 	hrtime_t waitrq;
3464 	hrtime_t tmp1;
3465 
3466 	curtime = gethrtime_unscaled();
3467 
3468 	pup->pr_lwpid	= t->t_tid;
3469 	pup->pr_count	= 1;
3470 	pup->pr_create	= ms->ms_start;
3471 	pup->pr_term    = ms->ms_term;
3472 	scalehrtime(&pup->pr_create);
3473 	scalehrtime(&pup->pr_term);
3474 	if (ms->ms_term == 0) {
3475 		pup->pr_rtime = curtime - ms->ms_start;
3476 		scalehrtime(&pup->pr_rtime);
3477 	} else {
3478 		pup->pr_rtime = ms->ms_term - ms->ms_start;
3479 		scalehrtime(&pup->pr_rtime);
3480 	}
3481 
3482 
3483 	pup->pr_utime    = ms->ms_acct[LMS_USER];
3484 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
3485 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
3486 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
3487 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
3488 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
3489 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
3490 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
3491 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
3492 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3493 
3494 	prscaleusage(pup);
3495 
3496 	/*
3497 	 * Adjust for time waiting in the dispatcher queue.
3498 	 */
3499 	waitrq = t->t_waitrq;	/* hopefully atomic */
3500 	if (waitrq != 0) {
3501 		if (waitrq > curtime) {
3502 			curtime = gethrtime_unscaled();
3503 		}
3504 		tmp1 = curtime - waitrq;
3505 		scalehrtime(&tmp1);
3506 		pup->pr_wtime += tmp1;
3507 		curtime = waitrq;
3508 	}
3509 
3510 	/*
3511 	 * Adjust for time spent in current microstate.
3512 	 */
3513 	if (ms->ms_state_start > curtime) {
3514 		curtime = gethrtime_unscaled();
3515 	}
3516 
3517 	i = 0;
3518 	do {
3519 		switch (state = t->t_mstate) {
3520 		case LMS_SLEEP:
3521 			/*
3522 			 * Update the timer for the current sleep state.
3523 			 */
3524 			switch (state = ms->ms_prev) {
3525 			case LMS_TFAULT:
3526 			case LMS_DFAULT:
3527 			case LMS_KFAULT:
3528 			case LMS_USER_LOCK:
3529 				break;
3530 			default:
3531 				state = LMS_SLEEP;
3532 				break;
3533 			}
3534 			break;
3535 		case LMS_TFAULT:
3536 		case LMS_DFAULT:
3537 		case LMS_KFAULT:
3538 		case LMS_USER_LOCK:
3539 			state = LMS_SYSTEM;
3540 			break;
3541 		}
3542 		switch (state) {
3543 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3544 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3545 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3546 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3547 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3548 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3549 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3550 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3551 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3552 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3553 		default:		panic("prgetusage: unknown microstate");
3554 		}
3555 		tmp1 = curtime - ms->ms_state_start;
3556 		if (tmp1 < 0) {
3557 			curtime = gethrtime_unscaled();
3558 			i++;
3559 			continue;
3560 		}
3561 		scalehrtime(&tmp1);
3562 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3563 
3564 	*mstimep += tmp1;
3565 
3566 	/* update pup timestamp */
3567 	pup->pr_tstamp = curtime;
3568 	scalehrtime(&pup->pr_tstamp);
3569 
3570 	/*
3571 	 * Resource usage counters.
3572 	 */
3573 	pup->pr_minf  = lwp->lwp_ru.minflt;
3574 	pup->pr_majf  = lwp->lwp_ru.majflt;
3575 	pup->pr_nswap = lwp->lwp_ru.nswap;
3576 	pup->pr_inblk = lwp->lwp_ru.inblock;
3577 	pup->pr_oublk = lwp->lwp_ru.oublock;
3578 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
3579 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
3580 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
3581 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
3582 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
3583 	pup->pr_sysc  = lwp->lwp_ru.sysc;
3584 	pup->pr_ioch  = lwp->lwp_ru.ioch;
3585 }
3586 
3587 /*
3588  * Convert ms_acct stats from unscaled high-res time to nanoseconds
3589  */
3590 void
3591 prscaleusage(prhusage_t *usg)
3592 {
3593 	scalehrtime(&usg->pr_utime);
3594 	scalehrtime(&usg->pr_stime);
3595 	scalehrtime(&usg->pr_ttime);
3596 	scalehrtime(&usg->pr_tftime);
3597 	scalehrtime(&usg->pr_dftime);
3598 	scalehrtime(&usg->pr_kftime);
3599 	scalehrtime(&usg->pr_ltime);
3600 	scalehrtime(&usg->pr_slptime);
3601 	scalehrtime(&usg->pr_wtime);
3602 	scalehrtime(&usg->pr_stoptime);
3603 }
3604 
3605 
3606 /*
3607  * Sum resource usage information.
3608  */
3609 void
3610 praddusage(kthread_t *t, prhusage_t *pup)
3611 {
3612 	klwp_t *lwp = ttolwp(t);
3613 	hrtime_t *mstimep;
3614 	struct mstate *ms = &lwp->lwp_mstate;
3615 	int state;
3616 	int i;
3617 	hrtime_t curtime;
3618 	hrtime_t waitrq;
3619 	hrtime_t tmp;
3620 	prhusage_t conv;
3621 
3622 	curtime = gethrtime_unscaled();
3623 
3624 	if (ms->ms_term == 0) {
3625 		tmp = curtime - ms->ms_start;
3626 		scalehrtime(&tmp);
3627 		pup->pr_rtime += tmp;
3628 	} else {
3629 		tmp = ms->ms_term - ms->ms_start;
3630 		scalehrtime(&tmp);
3631 		pup->pr_rtime += tmp;
3632 	}
3633 
3634 	conv.pr_utime = ms->ms_acct[LMS_USER];
3635 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3636 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3637 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3638 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3639 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3640 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3641 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3642 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3643 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3644 
3645 	prscaleusage(&conv);
3646 
3647 	pup->pr_utime	+= conv.pr_utime;
3648 	pup->pr_stime	+= conv.pr_stime;
3649 	pup->pr_ttime	+= conv.pr_ttime;
3650 	pup->pr_tftime	+= conv.pr_tftime;
3651 	pup->pr_dftime	+= conv.pr_dftime;
3652 	pup->pr_kftime	+= conv.pr_kftime;
3653 	pup->pr_ltime	+= conv.pr_ltime;
3654 	pup->pr_slptime	+= conv.pr_slptime;
3655 	pup->pr_wtime	+= conv.pr_wtime;
3656 	pup->pr_stoptime += conv.pr_stoptime;
3657 
3658 	/*
3659 	 * Adjust for time waiting in the dispatcher queue.
3660 	 */
3661 	waitrq = t->t_waitrq;	/* hopefully atomic */
3662 	if (waitrq != 0) {
3663 		if (waitrq > curtime) {
3664 			curtime = gethrtime_unscaled();
3665 		}
3666 		tmp = curtime - waitrq;
3667 		scalehrtime(&tmp);
3668 		pup->pr_wtime += tmp;
3669 		curtime = waitrq;
3670 	}
3671 
3672 	/*
3673 	 * Adjust for time spent in current microstate.
3674 	 */
3675 	if (ms->ms_state_start > curtime) {
3676 		curtime = gethrtime_unscaled();
3677 	}
3678 
3679 	i = 0;
3680 	do {
3681 		switch (state = t->t_mstate) {
3682 		case LMS_SLEEP:
3683 			/*
3684 			 * Update the timer for the current sleep state.
3685 			 */
3686 			switch (state = ms->ms_prev) {
3687 			case LMS_TFAULT:
3688 			case LMS_DFAULT:
3689 			case LMS_KFAULT:
3690 			case LMS_USER_LOCK:
3691 				break;
3692 			default:
3693 				state = LMS_SLEEP;
3694 				break;
3695 			}
3696 			break;
3697 		case LMS_TFAULT:
3698 		case LMS_DFAULT:
3699 		case LMS_KFAULT:
3700 		case LMS_USER_LOCK:
3701 			state = LMS_SYSTEM;
3702 			break;
3703 		}
3704 		switch (state) {
3705 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3706 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3707 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3708 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3709 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3710 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3711 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3712 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3713 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3714 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3715 		default:		panic("praddusage: unknown microstate");
3716 		}
3717 		tmp = curtime - ms->ms_state_start;
3718 		if (tmp < 0) {
3719 			curtime = gethrtime_unscaled();
3720 			i++;
3721 			continue;
3722 		}
3723 		scalehrtime(&tmp);
3724 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3725 
3726 	*mstimep += tmp;
3727 
3728 	/* update pup timestamp */
3729 	pup->pr_tstamp = curtime;
3730 	scalehrtime(&pup->pr_tstamp);
3731 
3732 	/*
3733 	 * Resource usage counters.
3734 	 */
3735 	pup->pr_minf  += lwp->lwp_ru.minflt;
3736 	pup->pr_majf  += lwp->lwp_ru.majflt;
3737 	pup->pr_nswap += lwp->lwp_ru.nswap;
3738 	pup->pr_inblk += lwp->lwp_ru.inblock;
3739 	pup->pr_oublk += lwp->lwp_ru.oublock;
3740 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3741 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3742 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3743 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3744 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3745 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3746 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3747 }
3748 
3749 /*
3750  * Convert a prhusage_t to a prusage_t.
3751  * This means convert each hrtime_t to a timestruc_t
3752  * and copy the count fields uint64_t => ulong_t.
3753  */
3754 void
3755 prcvtusage(prhusage_t *pup, prusage_t *upup)
3756 {
3757 	uint64_t *ullp;
3758 	ulong_t *ulp;
3759 	int i;
3760 
3761 	upup->pr_lwpid = pup->pr_lwpid;
3762 	upup->pr_count = pup->pr_count;
3763 
3764 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3765 	hrt2ts(pup->pr_create,	&upup->pr_create);
3766 	hrt2ts(pup->pr_term,	&upup->pr_term);
3767 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3768 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3769 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3770 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3771 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3772 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3773 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3774 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3775 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3776 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3777 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3778 	bzero(upup->filltime, sizeof (upup->filltime));
3779 
3780 	ullp = &pup->pr_minf;
3781 	ulp = &upup->pr_minf;
3782 	for (i = 0; i < 22; i++)
3783 		*ulp++ = (ulong_t)*ullp++;
3784 }
3785 
3786 #ifdef _SYSCALL32_IMPL
3787 void
3788 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3789 {
3790 	uint64_t *ullp;
3791 	uint32_t *ulp;
3792 	int i;
3793 
3794 	upup->pr_lwpid = pup->pr_lwpid;
3795 	upup->pr_count = pup->pr_count;
3796 
3797 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3798 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3799 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3800 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3801 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3802 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3803 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3804 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3805 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3806 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3807 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3808 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3809 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3810 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3811 	bzero(upup->filltime, sizeof (upup->filltime));
3812 
3813 	ullp = &pup->pr_minf;
3814 	ulp = &upup->pr_minf;
3815 	for (i = 0; i < 22; i++)
3816 		*ulp++ = (uint32_t)*ullp++;
3817 }
3818 #endif	/* _SYSCALL32_IMPL */
3819 
3820 /*
3821  * Determine whether a set is empty.
3822  */
3823 int
3824 setisempty(uint32_t *sp, uint_t n)
3825 {
3826 	while (n--)
3827 		if (*sp++)
3828 			return (0);
3829 	return (1);
3830 }
3831 
3832 /*
3833  * Utility routine for establishing a watched area in the process.
3834  * Keep the list of watched areas sorted by virtual address.
3835  */
3836 int
3837 set_watched_area(proc_t *p, struct watched_area *pwa)
3838 {
3839 	caddr_t vaddr = pwa->wa_vaddr;
3840 	caddr_t eaddr = pwa->wa_eaddr;
3841 	ulong_t flags = pwa->wa_flags;
3842 	struct watched_area *target;
3843 	avl_index_t where;
3844 	int error = 0;
3845 
3846 	/* we must not be holding p->p_lock, but the process must be locked */
3847 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3848 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3849 
3850 	/*
3851 	 * If this is our first watchpoint, enable watchpoints for the process.
3852 	 */
3853 	if (!pr_watch_active(p)) {
3854 		kthread_t *t;
3855 
3856 		mutex_enter(&p->p_lock);
3857 		if ((t = p->p_tlist) != NULL) {
3858 			do {
3859 				watch_enable(t);
3860 			} while ((t = t->t_forw) != p->p_tlist);
3861 		}
3862 		mutex_exit(&p->p_lock);
3863 	}
3864 
3865 	target = pr_find_watched_area(p, pwa, &where);
3866 	if (target != NULL) {
3867 		/*
3868 		 * We discovered an existing, overlapping watched area.
3869 		 * Allow it only if it is an exact match.
3870 		 */
3871 		if (target->wa_vaddr != vaddr ||
3872 		    target->wa_eaddr != eaddr)
3873 			error = EINVAL;
3874 		else if (target->wa_flags != flags) {
3875 			error = set_watched_page(p, vaddr, eaddr,
3876 			    flags, target->wa_flags);
3877 			target->wa_flags = flags;
3878 		}
3879 		kmem_free(pwa, sizeof (struct watched_area));
3880 	} else {
3881 		avl_insert(&p->p_warea, pwa, where);
3882 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3883 	}
3884 
3885 	return (error);
3886 }
3887 
3888 /*
3889  * Utility routine for clearing a watched area in the process.
3890  * Must be an exact match of the virtual address.
3891  * size and flags don't matter.
3892  */
3893 int
3894 clear_watched_area(proc_t *p, struct watched_area *pwa)
3895 {
3896 	struct watched_area *found;
3897 
3898 	/* we must not be holding p->p_lock, but the process must be locked */
3899 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3900 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3901 
3902 
3903 	if (!pr_watch_active(p)) {
3904 		kmem_free(pwa, sizeof (struct watched_area));
3905 		return (0);
3906 	}
3907 
3908 	/*
3909 	 * Look for a matching address in the watched areas.  If a match is
3910 	 * found, clear the old watched area and adjust the watched page(s).  It
3911 	 * is not an error if there is no match.
3912 	 */
3913 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3914 	    found->wa_vaddr == pwa->wa_vaddr) {
3915 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3916 		    found->wa_flags);
3917 		avl_remove(&p->p_warea, found);
3918 		kmem_free(found, sizeof (struct watched_area));
3919 	}
3920 
3921 	kmem_free(pwa, sizeof (struct watched_area));
3922 
3923 	/*
3924 	 * If we removed the last watched area from the process, disable
3925 	 * watchpoints.
3926 	 */
3927 	if (!pr_watch_active(p)) {
3928 		kthread_t *t;
3929 
3930 		mutex_enter(&p->p_lock);
3931 		if ((t = p->p_tlist) != NULL) {
3932 			do {
3933 				watch_disable(t);
3934 			} while ((t = t->t_forw) != p->p_tlist);
3935 		}
3936 		mutex_exit(&p->p_lock);
3937 	}
3938 
3939 	return (0);
3940 }
3941 
3942 /*
3943  * Frees all the watched_area structures
3944  */
3945 void
3946 pr_free_watchpoints(proc_t *p)
3947 {
3948 	struct watched_area *delp;
3949 	void *cookie;
3950 
3951 	cookie = NULL;
3952 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3953 		kmem_free(delp, sizeof (struct watched_area));
3954 
3955 	avl_destroy(&p->p_warea);
3956 }
3957 
3958 /*
3959  * This one is called by the traced process to unwatch all the
3960  * pages while deallocating the list of watched_page structs.
3961  */
3962 void
3963 pr_free_watched_pages(proc_t *p)
3964 {
3965 	struct as *as = p->p_as;
3966 	struct watched_page *pwp;
3967 	uint_t prot;
3968 	int    retrycnt, err;
3969 	void *cookie;
3970 
3971 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3972 		return;
3973 
3974 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3975 	AS_LOCK_ENTER(as, RW_WRITER);
3976 
3977 	pwp = avl_first(&as->a_wpage);
3978 
3979 	cookie = NULL;
3980 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3981 		retrycnt = 0;
3982 		if ((prot = pwp->wp_oprot) != 0) {
3983 			caddr_t addr = pwp->wp_vaddr;
3984 			struct seg *seg;
3985 		retry:
3986 
3987 			if ((pwp->wp_prot != prot ||
3988 			    (pwp->wp_flags & WP_NOWATCH)) &&
3989 			    (seg = as_segat(as, addr)) != NULL) {
3990 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3991 				if (err == IE_RETRY) {
3992 					ASSERT(retrycnt == 0);
3993 					retrycnt++;
3994 					goto retry;
3995 				}
3996 			}
3997 		}
3998 		kmem_free(pwp, sizeof (struct watched_page));
3999 	}
4000 
4001 	avl_destroy(&as->a_wpage);
4002 	p->p_wprot = NULL;
4003 
4004 	AS_LOCK_EXIT(as);
4005 }
4006 
4007 /*
4008  * Insert a watched area into the list of watched pages.
4009  * If oflags is zero then we are adding a new watched area.
4010  * Otherwise we are changing the flags of an existing watched area.
4011  */
4012 static int
4013 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
4014     ulong_t flags, ulong_t oflags)
4015 {
4016 	struct as *as = p->p_as;
4017 	avl_tree_t *pwp_tree;
4018 	struct watched_page *pwp, *newpwp;
4019 	struct watched_page tpw;
4020 	avl_index_t where;
4021 	struct seg *seg;
4022 	uint_t prot;
4023 	caddr_t addr;
4024 
4025 	/*
4026 	 * We need to pre-allocate a list of structures before we grab the
4027 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4028 	 * held.
4029 	 */
4030 	newpwp = NULL;
4031 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4032 	    addr < eaddr; addr += PAGESIZE) {
4033 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4034 		pwp->wp_list = newpwp;
4035 		newpwp = pwp;
4036 	}
4037 
4038 	AS_LOCK_ENTER(as, RW_WRITER);
4039 
4040 	/*
4041 	 * Search for an existing watched page to contain the watched area.
4042 	 * If none is found, grab a new one from the available list
4043 	 * and insert it in the active list, keeping the list sorted
4044 	 * by user-level virtual address.
4045 	 */
4046 	if (p->p_flag & SVFWAIT)
4047 		pwp_tree = &p->p_wpage;
4048 	else
4049 		pwp_tree = &as->a_wpage;
4050 
4051 again:
4052 	if (avl_numnodes(pwp_tree) > prnwatch) {
4053 		AS_LOCK_EXIT(as);
4054 		while (newpwp != NULL) {
4055 			pwp = newpwp->wp_list;
4056 			kmem_free(newpwp, sizeof (struct watched_page));
4057 			newpwp = pwp;
4058 		}
4059 		return (E2BIG);
4060 	}
4061 
4062 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4063 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4064 		pwp = newpwp;
4065 		newpwp = newpwp->wp_list;
4066 		pwp->wp_list = NULL;
4067 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4068 		    (uintptr_t)PAGEMASK);
4069 		avl_insert(pwp_tree, pwp, where);
4070 	}
4071 
4072 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4073 
4074 	if (oflags & WA_READ)
4075 		pwp->wp_read--;
4076 	if (oflags & WA_WRITE)
4077 		pwp->wp_write--;
4078 	if (oflags & WA_EXEC)
4079 		pwp->wp_exec--;
4080 
4081 	ASSERT(pwp->wp_read >= 0);
4082 	ASSERT(pwp->wp_write >= 0);
4083 	ASSERT(pwp->wp_exec >= 0);
4084 
4085 	if (flags & WA_READ)
4086 		pwp->wp_read++;
4087 	if (flags & WA_WRITE)
4088 		pwp->wp_write++;
4089 	if (flags & WA_EXEC)
4090 		pwp->wp_exec++;
4091 
4092 	if (!(p->p_flag & SVFWAIT)) {
4093 		vaddr = pwp->wp_vaddr;
4094 		if (pwp->wp_oprot == 0 &&
4095 		    (seg = as_segat(as, vaddr)) != NULL) {
4096 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
4097 			pwp->wp_oprot = (uchar_t)prot;
4098 			pwp->wp_prot = (uchar_t)prot;
4099 		}
4100 		if (pwp->wp_oprot != 0) {
4101 			prot = pwp->wp_oprot;
4102 			if (pwp->wp_read)
4103 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4104 			if (pwp->wp_write)
4105 				prot &= ~PROT_WRITE;
4106 			if (pwp->wp_exec)
4107 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4108 			if (!(pwp->wp_flags & WP_NOWATCH) &&
4109 			    pwp->wp_prot != prot &&
4110 			    (pwp->wp_flags & WP_SETPROT) == 0) {
4111 				pwp->wp_flags |= WP_SETPROT;
4112 				pwp->wp_list = p->p_wprot;
4113 				p->p_wprot = pwp;
4114 			}
4115 			pwp->wp_prot = (uchar_t)prot;
4116 		}
4117 	}
4118 
4119 	/*
4120 	 * If the watched area extends into the next page then do
4121 	 * it over again with the virtual address of the next page.
4122 	 */
4123 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4124 		goto again;
4125 
4126 	AS_LOCK_EXIT(as);
4127 
4128 	/*
4129 	 * Free any pages we may have over-allocated
4130 	 */
4131 	while (newpwp != NULL) {
4132 		pwp = newpwp->wp_list;
4133 		kmem_free(newpwp, sizeof (struct watched_page));
4134 		newpwp = pwp;
4135 	}
4136 
4137 	return (0);
4138 }
4139 
4140 /*
4141  * Remove a watched area from the list of watched pages.
4142  * A watched area may extend over more than one page.
4143  */
4144 static void
4145 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4146 {
4147 	struct as *as = p->p_as;
4148 	struct watched_page *pwp;
4149 	struct watched_page tpw;
4150 	avl_tree_t *tree;
4151 	avl_index_t where;
4152 
4153 	AS_LOCK_ENTER(as, RW_WRITER);
4154 
4155 	if (p->p_flag & SVFWAIT)
4156 		tree = &p->p_wpage;
4157 	else
4158 		tree = &as->a_wpage;
4159 
4160 	tpw.wp_vaddr = vaddr =
4161 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4162 	pwp = avl_find(tree, &tpw, &where);
4163 	if (pwp == NULL)
4164 		pwp = avl_nearest(tree, where, AVL_AFTER);
4165 
4166 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4167 		ASSERT(vaddr <=  pwp->wp_vaddr);
4168 
4169 		if (flags & WA_READ)
4170 			pwp->wp_read--;
4171 		if (flags & WA_WRITE)
4172 			pwp->wp_write--;
4173 		if (flags & WA_EXEC)
4174 			pwp->wp_exec--;
4175 
4176 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4177 			/*
4178 			 * Reset the hat layer's protections on this page.
4179 			 */
4180 			if (pwp->wp_oprot != 0) {
4181 				uint_t prot = pwp->wp_oprot;
4182 
4183 				if (pwp->wp_read)
4184 					prot &=
4185 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4186 				if (pwp->wp_write)
4187 					prot &= ~PROT_WRITE;
4188 				if (pwp->wp_exec)
4189 					prot &=
4190 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4191 				if (!(pwp->wp_flags & WP_NOWATCH) &&
4192 				    pwp->wp_prot != prot &&
4193 				    (pwp->wp_flags & WP_SETPROT) == 0) {
4194 					pwp->wp_flags |= WP_SETPROT;
4195 					pwp->wp_list = p->p_wprot;
4196 					p->p_wprot = pwp;
4197 				}
4198 				pwp->wp_prot = (uchar_t)prot;
4199 			}
4200 		} else {
4201 			/*
4202 			 * No watched areas remain in this page.
4203 			 * Reset everything to normal.
4204 			 */
4205 			if (pwp->wp_oprot != 0) {
4206 				pwp->wp_prot = pwp->wp_oprot;
4207 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
4208 					pwp->wp_flags |= WP_SETPROT;
4209 					pwp->wp_list = p->p_wprot;
4210 					p->p_wprot = pwp;
4211 				}
4212 			}
4213 		}
4214 
4215 		pwp = AVL_NEXT(tree, pwp);
4216 	}
4217 
4218 	AS_LOCK_EXIT(as);
4219 }
4220 
4221 /*
4222  * Return the original protections for the specified page.
4223  */
4224 static void
4225 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4226 {
4227 	struct watched_page *pwp;
4228 	struct watched_page tpw;
4229 
4230 	ASSERT(AS_LOCK_HELD(as));
4231 
4232 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4233 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4234 		*prot = pwp->wp_oprot;
4235 }
4236 
4237 static prpagev_t *
4238 pr_pagev_create(struct seg *seg, int check_noreserve)
4239 {
4240 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4241 	size_t total_pages = seg_pages(seg);
4242 
4243 	/*
4244 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
4245 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
4246 	 * to about a megabyte of kernel heap by default.
4247 	 */
4248 	pagev->pg_npages = MIN(total_pages, pagev_lim);
4249 	pagev->pg_pnbase = 0;
4250 
4251 	pagev->pg_protv =
4252 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4253 
4254 	if (check_noreserve)
4255 		pagev->pg_incore =
4256 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4257 	else
4258 		pagev->pg_incore = NULL;
4259 
4260 	return (pagev);
4261 }
4262 
4263 static void
4264 pr_pagev_destroy(prpagev_t *pagev)
4265 {
4266 	if (pagev->pg_incore != NULL)
4267 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4268 
4269 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4270 	kmem_free(pagev, sizeof (prpagev_t));
4271 }
4272 
4273 static caddr_t
4274 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4275 {
4276 	ulong_t lastpg = seg_page(seg, eaddr - 1);
4277 	ulong_t pn, pnlim;
4278 	caddr_t saddr;
4279 	size_t len;
4280 
4281 	ASSERT(addr >= seg->s_base && addr <= eaddr);
4282 
4283 	if (addr == eaddr)
4284 		return (eaddr);
4285 
4286 refill:
4287 	ASSERT(addr < eaddr);
4288 	pagev->pg_pnbase = seg_page(seg, addr);
4289 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
4290 	saddr = addr;
4291 
4292 	if (lastpg < pnlim)
4293 		len = (size_t)(eaddr - addr);
4294 	else
4295 		len = pagev->pg_npages * PAGESIZE;
4296 
4297 	if (pagev->pg_incore != NULL) {
4298 		/*
4299 		 * INCORE cleverly has different semantics than GETPROT:
4300 		 * it returns info on pages up to but NOT including addr + len.
4301 		 */
4302 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4303 		pn = pagev->pg_pnbase;
4304 
4305 		do {
4306 			/*
4307 			 * Guilty knowledge here:  We know that segvn_incore
4308 			 * returns more than just the low-order bit that
4309 			 * indicates the page is actually in memory.  If any
4310 			 * bits are set, then the page has backing store.
4311 			 */
4312 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4313 				goto out;
4314 
4315 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4316 
4317 		/*
4318 		 * If we examined all the pages in the vector but we're not
4319 		 * at the end of the segment, take another lap.
4320 		 */
4321 		if (addr < eaddr)
4322 			goto refill;
4323 	}
4324 
4325 	/*
4326 	 * Need to take len - 1 because addr + len is the address of the
4327 	 * first byte of the page just past the end of what we want.
4328 	 */
4329 out:
4330 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4331 	return (addr);
4332 }
4333 
4334 static caddr_t
4335 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4336     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4337 {
4338 	/*
4339 	 * Our starting address is either the specified address, or the base
4340 	 * address from the start of the pagev.  If the latter is greater,
4341 	 * this means a previous call to pr_pagev_fill has already scanned
4342 	 * further than the end of the previous mapping.
4343 	 */
4344 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4345 	caddr_t addr = MAX(*saddrp, base);
4346 	ulong_t pn = seg_page(seg, addr);
4347 	uint_t prot, nprot;
4348 
4349 	/*
4350 	 * If we're dealing with noreserve pages, then advance addr to
4351 	 * the address of the next page which has backing store.
4352 	 */
4353 	if (pagev->pg_incore != NULL) {
4354 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4355 			if ((addr += PAGESIZE) == eaddr) {
4356 				*saddrp = addr;
4357 				prot = 0;
4358 				goto out;
4359 			}
4360 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4361 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4362 				if (addr == eaddr) {
4363 					*saddrp = addr;
4364 					prot = 0;
4365 					goto out;
4366 				}
4367 				pn = seg_page(seg, addr);
4368 			}
4369 		}
4370 	}
4371 
4372 	/*
4373 	 * Get the protections on the page corresponding to addr.
4374 	 */
4375 	pn = seg_page(seg, addr);
4376 	ASSERT(pn >= pagev->pg_pnbase);
4377 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4378 
4379 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4380 	getwatchprot(seg->s_as, addr, &prot);
4381 	*saddrp = addr;
4382 
4383 	/*
4384 	 * Now loop until we find a backed page with different protections
4385 	 * or we reach the end of this segment.
4386 	 */
4387 	while ((addr += PAGESIZE) < eaddr) {
4388 		/*
4389 		 * If pn has advanced to the page number following what we
4390 		 * have information on, refill the page vector and reset
4391 		 * addr and pn.  If pr_pagev_fill does not return the
4392 		 * address of the next page, we have a discontiguity and
4393 		 * thus have reached the end of the current mapping.
4394 		 */
4395 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4396 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4397 			if (naddr != addr)
4398 				goto out;
4399 			pn = seg_page(seg, addr);
4400 		}
4401 
4402 		/*
4403 		 * The previous page's protections are in prot, and it has
4404 		 * backing.  If this page is MAP_NORESERVE and has no backing,
4405 		 * then end this mapping and return the previous protections.
4406 		 */
4407 		if (pagev->pg_incore != NULL &&
4408 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4409 			break;
4410 
4411 		/*
4412 		 * Otherwise end the mapping if this page's protections (nprot)
4413 		 * are different than those in the previous page (prot).
4414 		 */
4415 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4416 		getwatchprot(seg->s_as, addr, &nprot);
4417 
4418 		if (nprot != prot)
4419 			break;
4420 	}
4421 
4422 out:
4423 	*protp = prot;
4424 	return (addr);
4425 }
4426 
4427 size_t
4428 pr_getsegsize(struct seg *seg, int reserved)
4429 {
4430 	size_t size = seg->s_size;
4431 
4432 	/*
4433 	 * If we're interested in the reserved space, return the size of the
4434 	 * segment itself.  Everything else in this function is a special case
4435 	 * to determine the actual underlying size of various segment types.
4436 	 */
4437 	if (reserved)
4438 		return (size);
4439 
4440 	/*
4441 	 * If this is a segvn mapping of a regular file, return the smaller
4442 	 * of the segment size and the remaining size of the file beyond
4443 	 * the file offset corresponding to seg->s_base.
4444 	 */
4445 	if (seg->s_ops == &segvn_ops) {
4446 		vattr_t vattr;
4447 		vnode_t *vp;
4448 
4449 		vattr.va_mask = AT_SIZE;
4450 
4451 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4452 		    vp != NULL && vp->v_type == VREG &&
4453 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4454 
4455 			u_offset_t fsize = vattr.va_size;
4456 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4457 
4458 			if (fsize < offset)
4459 				fsize = 0;
4460 			else
4461 				fsize -= offset;
4462 
4463 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4464 
4465 			if (fsize < (u_offset_t)size)
4466 				size = (size_t)fsize;
4467 		}
4468 
4469 		return (size);
4470 	}
4471 
4472 	/*
4473 	 * If this is an ISM shared segment, don't include pages that are
4474 	 * beyond the real size of the spt segment that backs it.
4475 	 */
4476 	if (seg->s_ops == &segspt_shmops)
4477 		return (MIN(spt_realsize(seg), size));
4478 
4479 	/*
4480 	 * If this is segment is a mapping from /dev/null, then this is a
4481 	 * reservation of virtual address space and has no actual size.
4482 	 * Such segments are backed by segdev and have type set to neither
4483 	 * MAP_SHARED nor MAP_PRIVATE.
4484 	 */
4485 	if (seg->s_ops == &segdev_ops &&
4486 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
4487 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
4488 		return (0);
4489 
4490 	/*
4491 	 * If this segment doesn't match one of the special types we handle,
4492 	 * just return the size of the segment itself.
4493 	 */
4494 	return (size);
4495 }
4496 
4497 uint_t
4498 pr_getprot(struct seg *seg, int reserved, void **tmp,
4499     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4500 {
4501 	struct as *as = seg->s_as;
4502 
4503 	caddr_t saddr = *saddrp;
4504 	caddr_t naddr;
4505 
4506 	int check_noreserve;
4507 	uint_t prot;
4508 
4509 	union {
4510 		struct segvn_data *svd;
4511 		struct segdev_data *sdp;
4512 		void *data;
4513 	} s;
4514 
4515 	s.data = seg->s_data;
4516 
4517 	ASSERT(AS_WRITE_HELD(as));
4518 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
4519 	ASSERT(eaddr <= seg->s_base + seg->s_size);
4520 
4521 	/*
4522 	 * Don't include MAP_NORESERVE pages in the address range
4523 	 * unless their mappings have actually materialized.
4524 	 * We cheat by knowing that segvn is the only segment
4525 	 * driver that supports MAP_NORESERVE.
4526 	 */
4527 	check_noreserve =
4528 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4529 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4530 	    (s.svd->flags & MAP_NORESERVE));
4531 
4532 	/*
4533 	 * Examine every page only as a last resort.  We use guilty knowledge
4534 	 * of segvn and segdev to avoid this: if there are no per-page
4535 	 * protections present in the segment and we don't care about
4536 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4537 	 */
4538 	if (!check_noreserve && saddr == seg->s_base &&
4539 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4540 		prot = s.svd->prot;
4541 		getwatchprot(as, saddr, &prot);
4542 		naddr = eaddr;
4543 
4544 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4545 	    s.sdp != NULL && s.sdp->pageprot == 0) {
4546 		prot = s.sdp->prot;
4547 		getwatchprot(as, saddr, &prot);
4548 		naddr = eaddr;
4549 
4550 	} else {
4551 		prpagev_t *pagev;
4552 
4553 		/*
4554 		 * If addr is sitting at the start of the segment, then
4555 		 * create a page vector to store protection and incore
4556 		 * information for pages in the segment, and fill it.
4557 		 * Otherwise, we expect *tmp to address the prpagev_t
4558 		 * allocated by a previous call to this function.
4559 		 */
4560 		if (saddr == seg->s_base) {
4561 			pagev = pr_pagev_create(seg, check_noreserve);
4562 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4563 
4564 			ASSERT(*tmp == NULL);
4565 			*tmp = pagev;
4566 
4567 			ASSERT(saddr <= eaddr);
4568 			*saddrp = saddr;
4569 
4570 			if (saddr == eaddr) {
4571 				naddr = saddr;
4572 				prot = 0;
4573 				goto out;
4574 			}
4575 
4576 		} else {
4577 			ASSERT(*tmp != NULL);
4578 			pagev = (prpagev_t *)*tmp;
4579 		}
4580 
4581 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4582 		ASSERT(naddr <= eaddr);
4583 	}
4584 
4585 out:
4586 	if (naddr == eaddr)
4587 		pr_getprot_done(tmp);
4588 	*naddrp = naddr;
4589 	return (prot);
4590 }
4591 
4592 void
4593 pr_getprot_done(void **tmp)
4594 {
4595 	if (*tmp != NULL) {
4596 		pr_pagev_destroy((prpagev_t *)*tmp);
4597 		*tmp = NULL;
4598 	}
4599 }
4600 
4601 /*
4602  * Return true iff the vnode is a /proc file from the object directory.
4603  */
4604 int
4605 pr_isobject(vnode_t *vp)
4606 {
4607 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4608 }
4609 
4610 /*
4611  * Return true iff the vnode is a /proc file opened by the process itself.
4612  */
4613 int
4614 pr_isself(vnode_t *vp)
4615 {
4616 	/*
4617 	 * XXX: To retain binary compatibility with the old
4618 	 * ioctl()-based version of /proc, we exempt self-opens
4619 	 * of /proc/<pid> from being marked close-on-exec.
4620 	 */
4621 	return (vn_matchops(vp, prvnodeops) &&
4622 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
4623 	    VTOP(vp)->pr_type != PR_PIDDIR);
4624 }
4625 
4626 static ssize_t
4627 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4628 {
4629 	ssize_t pagesize, hatsize;
4630 
4631 	ASSERT(AS_WRITE_HELD(seg->s_as));
4632 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4633 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4634 	ASSERT(saddr < eaddr);
4635 
4636 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4637 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4638 	ASSERT(pagesize != 0);
4639 
4640 	if (pagesize == -1)
4641 		pagesize = PAGESIZE;
4642 
4643 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4644 
4645 	while (saddr < eaddr) {
4646 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4647 			break;
4648 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4649 		saddr += pagesize;
4650 	}
4651 
4652 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4653 	return (hatsize);
4654 }
4655 
4656 /*
4657  * Return an array of structures with extended memory map information.
4658  * We allocate here; the caller must deallocate.
4659  */
4660 int
4661 prgetxmap(proc_t *p, list_t *iolhead)
4662 {
4663 	struct as *as = p->p_as;
4664 	prxmap_t *mp;
4665 	struct seg *seg;
4666 	struct seg *brkseg, *stkseg;
4667 	struct vnode *vp;
4668 	struct vattr vattr;
4669 	uint_t prot;
4670 
4671 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4672 
4673 	/*
4674 	 * Request an initial buffer size that doesn't waste memory
4675 	 * if the address space has only a small number of segments.
4676 	 */
4677 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4678 
4679 	if ((seg = AS_SEGFIRST(as)) == NULL)
4680 		return (0);
4681 
4682 	brkseg = break_seg(p);
4683 	stkseg = as_segat(as, prgetstackbase(p));
4684 
4685 	do {
4686 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4687 		caddr_t saddr, naddr, baddr;
4688 		void *tmp = NULL;
4689 		ssize_t psz;
4690 		char *parr;
4691 		uint64_t npages;
4692 		uint64_t pagenum;
4693 
4694 		if ((seg->s_flags & S_HOLE) != 0) {
4695 			continue;
4696 		}
4697 		/*
4698 		 * Segment loop part one: iterate from the base of the segment
4699 		 * to its end, pausing at each address boundary (baddr) between
4700 		 * ranges that have different virtual memory protections.
4701 		 */
4702 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4703 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4704 			ASSERT(baddr >= saddr && baddr <= eaddr);
4705 
4706 			/*
4707 			 * Segment loop part two: iterate from the current
4708 			 * position to the end of the protection boundary,
4709 			 * pausing at each address boundary (naddr) between
4710 			 * ranges that have different underlying page sizes.
4711 			 */
4712 			for (; saddr < baddr; saddr = naddr) {
4713 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4714 				ASSERT(naddr >= saddr && naddr <= baddr);
4715 
4716 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4717 
4718 				mp->pr_vaddr = (uintptr_t)saddr;
4719 				mp->pr_size = naddr - saddr;
4720 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4721 				mp->pr_mflags = 0;
4722 				if (prot & PROT_READ)
4723 					mp->pr_mflags |= MA_READ;
4724 				if (prot & PROT_WRITE)
4725 					mp->pr_mflags |= MA_WRITE;
4726 				if (prot & PROT_EXEC)
4727 					mp->pr_mflags |= MA_EXEC;
4728 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4729 					mp->pr_mflags |= MA_SHARED;
4730 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4731 					mp->pr_mflags |= MA_NORESERVE;
4732 				if (seg->s_ops == &segspt_shmops ||
4733 				    (seg->s_ops == &segvn_ops &&
4734 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4735 				    vp == NULL)))
4736 					mp->pr_mflags |= MA_ANON;
4737 				if (seg == brkseg)
4738 					mp->pr_mflags |= MA_BREAK;
4739 				else if (seg == stkseg)
4740 					mp->pr_mflags |= MA_STACK;
4741 				if (seg->s_ops == &segspt_shmops)
4742 					mp->pr_mflags |= MA_ISM | MA_SHM;
4743 
4744 				mp->pr_pagesize = PAGESIZE;
4745 				if (psz == -1) {
4746 					mp->pr_hatpagesize = 0;
4747 				} else {
4748 					mp->pr_hatpagesize = psz;
4749 				}
4750 
4751 				/*
4752 				 * Manufacture a filename for the "object" dir.
4753 				 */
4754 				mp->pr_dev = PRNODEV;
4755 				vattr.va_mask = AT_FSID|AT_NODEID;
4756 				if (seg->s_ops == &segvn_ops &&
4757 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4758 				    vp != NULL && vp->v_type == VREG &&
4759 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4760 				    NULL) == 0) {
4761 					mp->pr_dev = vattr.va_fsid;
4762 					mp->pr_ino = vattr.va_nodeid;
4763 					if (vp == p->p_exec)
4764 						(void) strcpy(mp->pr_mapname,
4765 						    "a.out");
4766 					else
4767 						pr_object_name(mp->pr_mapname,
4768 						    vp, &vattr);
4769 				}
4770 
4771 				/*
4772 				 * Get the SysV shared memory id, if any.
4773 				 */
4774 				if ((mp->pr_mflags & MA_SHARED) &&
4775 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4776 				    seg->s_base)) != SHMID_NONE) {
4777 					if (mp->pr_shmid == SHMID_FREE)
4778 						mp->pr_shmid = -1;
4779 
4780 					mp->pr_mflags |= MA_SHM;
4781 				} else {
4782 					mp->pr_shmid = -1;
4783 				}
4784 
4785 				npages = ((uintptr_t)(naddr - saddr)) >>
4786 				    PAGESHIFT;
4787 				parr = kmem_zalloc(npages, KM_SLEEP);
4788 
4789 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4790 
4791 				for (pagenum = 0; pagenum < npages; pagenum++) {
4792 					if (parr[pagenum] & SEG_PAGE_INCORE)
4793 						mp->pr_rss++;
4794 					if (parr[pagenum] & SEG_PAGE_ANON)
4795 						mp->pr_anon++;
4796 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4797 						mp->pr_locked++;
4798 				}
4799 				kmem_free(parr, npages);
4800 			}
4801 		}
4802 		ASSERT(tmp == NULL);
4803 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4804 
4805 	return (0);
4806 }
4807 
4808 /*
4809  * Return the process's credentials.  We don't need a 32-bit equivalent of
4810  * this function because prcred_t and prcred32_t are actually the same.
4811  */
4812 void
4813 prgetcred(proc_t *p, prcred_t *pcrp)
4814 {
4815 	mutex_enter(&p->p_crlock);
4816 	cred2prcred(p->p_cred, pcrp);
4817 	mutex_exit(&p->p_crlock);
4818 }
4819 
4820 void
4821 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4822 {
4823 	ASSERT(psfp != NULL);
4824 
4825 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4826 	psfp->pr_lower = p->p_secflags.psf_lower;
4827 	psfp->pr_upper = p->p_secflags.psf_upper;
4828 	psfp->pr_effective = p->p_secflags.psf_effective;
4829 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4830 }
4831 
4832 /*
4833  * Compute actual size of the prpriv_t structure.
4834  */
4835 
4836 size_t
4837 prgetprivsize(void)
4838 {
4839 	return (priv_prgetprivsize(NULL));
4840 }
4841 
4842 /*
4843  * Return the process's privileges.  We don't need a 32-bit equivalent of
4844  * this function because prpriv_t and prpriv32_t are actually the same.
4845  */
4846 void
4847 prgetpriv(proc_t *p, prpriv_t *pprp)
4848 {
4849 	mutex_enter(&p->p_crlock);
4850 	cred2prpriv(p->p_cred, pprp);
4851 	mutex_exit(&p->p_crlock);
4852 }
4853 
4854 #ifdef _SYSCALL32_IMPL
4855 /*
4856  * Return an array of structures with HAT memory map information.
4857  * We allocate here; the caller must deallocate.
4858  */
4859 int
4860 prgetxmap32(proc_t *p, list_t *iolhead)
4861 {
4862 	struct as *as = p->p_as;
4863 	prxmap32_t *mp;
4864 	struct seg *seg;
4865 	struct seg *brkseg, *stkseg;
4866 	struct vnode *vp;
4867 	struct vattr vattr;
4868 	uint_t prot;
4869 
4870 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4871 
4872 	/*
4873 	 * Request an initial buffer size that doesn't waste memory
4874 	 * if the address space has only a small number of segments.
4875 	 */
4876 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4877 
4878 	if ((seg = AS_SEGFIRST(as)) == NULL)
4879 		return (0);
4880 
4881 	brkseg = break_seg(p);
4882 	stkseg = as_segat(as, prgetstackbase(p));
4883 
4884 	do {
4885 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4886 		caddr_t saddr, naddr, baddr;
4887 		void *tmp = NULL;
4888 		ssize_t psz;
4889 		char *parr;
4890 		uint64_t npages;
4891 		uint64_t pagenum;
4892 
4893 		if ((seg->s_flags & S_HOLE) != 0) {
4894 			continue;
4895 		}
4896 
4897 		/*
4898 		 * Segment loop part one: iterate from the base of the segment
4899 		 * to its end, pausing at each address boundary (baddr) between
4900 		 * ranges that have different virtual memory protections.
4901 		 */
4902 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4903 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4904 			ASSERT(baddr >= saddr && baddr <= eaddr);
4905 
4906 			/*
4907 			 * Segment loop part two: iterate from the current
4908 			 * position to the end of the protection boundary,
4909 			 * pausing at each address boundary (naddr) between
4910 			 * ranges that have different underlying page sizes.
4911 			 */
4912 			for (; saddr < baddr; saddr = naddr) {
4913 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4914 				ASSERT(naddr >= saddr && naddr <= baddr);
4915 
4916 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4917 
4918 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4919 				mp->pr_size = (size32_t)(naddr - saddr);
4920 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4921 				mp->pr_mflags = 0;
4922 				if (prot & PROT_READ)
4923 					mp->pr_mflags |= MA_READ;
4924 				if (prot & PROT_WRITE)
4925 					mp->pr_mflags |= MA_WRITE;
4926 				if (prot & PROT_EXEC)
4927 					mp->pr_mflags |= MA_EXEC;
4928 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4929 					mp->pr_mflags |= MA_SHARED;
4930 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4931 					mp->pr_mflags |= MA_NORESERVE;
4932 				if (seg->s_ops == &segspt_shmops ||
4933 				    (seg->s_ops == &segvn_ops &&
4934 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4935 				    vp == NULL)))
4936 					mp->pr_mflags |= MA_ANON;
4937 				if (seg == brkseg)
4938 					mp->pr_mflags |= MA_BREAK;
4939 				else if (seg == stkseg)
4940 					mp->pr_mflags |= MA_STACK;
4941 				if (seg->s_ops == &segspt_shmops)
4942 					mp->pr_mflags |= MA_ISM | MA_SHM;
4943 
4944 				mp->pr_pagesize = PAGESIZE;
4945 				if (psz == -1) {
4946 					mp->pr_hatpagesize = 0;
4947 				} else {
4948 					mp->pr_hatpagesize = psz;
4949 				}
4950 
4951 				/*
4952 				 * Manufacture a filename for the "object" dir.
4953 				 */
4954 				mp->pr_dev = PRNODEV32;
4955 				vattr.va_mask = AT_FSID|AT_NODEID;
4956 				if (seg->s_ops == &segvn_ops &&
4957 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4958 				    vp != NULL && vp->v_type == VREG &&
4959 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4960 				    NULL) == 0) {
4961 					(void) cmpldev(&mp->pr_dev,
4962 					    vattr.va_fsid);
4963 					mp->pr_ino = vattr.va_nodeid;
4964 					if (vp == p->p_exec)
4965 						(void) strcpy(mp->pr_mapname,
4966 						    "a.out");
4967 					else
4968 						pr_object_name(mp->pr_mapname,
4969 						    vp, &vattr);
4970 				}
4971 
4972 				/*
4973 				 * Get the SysV shared memory id, if any.
4974 				 */
4975 				if ((mp->pr_mflags & MA_SHARED) &&
4976 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4977 				    seg->s_base)) != SHMID_NONE) {
4978 					if (mp->pr_shmid == SHMID_FREE)
4979 						mp->pr_shmid = -1;
4980 
4981 					mp->pr_mflags |= MA_SHM;
4982 				} else {
4983 					mp->pr_shmid = -1;
4984 				}
4985 
4986 				npages = ((uintptr_t)(naddr - saddr)) >>
4987 				    PAGESHIFT;
4988 				parr = kmem_zalloc(npages, KM_SLEEP);
4989 
4990 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4991 
4992 				for (pagenum = 0; pagenum < npages; pagenum++) {
4993 					if (parr[pagenum] & SEG_PAGE_INCORE)
4994 						mp->pr_rss++;
4995 					if (parr[pagenum] & SEG_PAGE_ANON)
4996 						mp->pr_anon++;
4997 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4998 						mp->pr_locked++;
4999 				}
5000 				kmem_free(parr, npages);
5001 			}
5002 		}
5003 		ASSERT(tmp == NULL);
5004 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5005 
5006 	return (0);
5007 }
5008 #endif	/* _SYSCALL32_IMPL */
5009