fs/proc/prsubr.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2019 Joyent, Inc.
 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
 * Copyright 2022 MNX Cloud, Inc.
 * Copyright 2025 Oxide Computer Company
 */

/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved	*/

#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/priv.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/inline.h>
#include <sys/kmem.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/brand.h>
#include <sys/sobject.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/var.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/session.h>
#include <sys/pcb.h>
#include <sys/signal.h>
#include <sys/user.h>
#include <sys/disp.h>
#include <sys/class.h>
#include <sys/ts.h>
#include <sys/bitmap.h>
#include <sys/poll.h>
#include <sys/shm_impl.h>
#include <sys/fault.h>
#include <sys/syscall.h>
#include <sys/procfs.h>
#include <sys/processor.h>
#include <sys/cpuvar.h>
#include <sys/copyops.h>
#include <sys/time.h>
#include <sys/msacct.h>
#include <sys/flock_impl.h>
#include <sys/stropts.h>
#include <sys/strsubr.h>
#include <sys/pathname.h>
#include <sys/mode.h>
#include <sys/socketvar.h>
#include <sys/autoconf.h>
#include <sys/dtrace.h>
#include <sys/timod.h>
#include <sys/fs/namenode.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <inet/cc.h>
#include <vm/as.h>
#include <vm/rm.h>
#include <vm/seg.h>
#include <vm/seg_vn.h>
#include <vm/seg_dev.h>
#include <vm/seg_spt.h>
#include <vm/page.h>
#include <sys/vmparam.h>
#include <sys/swap.h>
#include <fs/proc/prdata.h>
#include <sys/task.h>
#include <sys/project.h>
#include <sys/contract_impl.h>
#include <sys/contract/process.h>
#include <sys/contract/process_impl.h>
#include <sys/schedctl.h>
#include <sys/pool.h>
#include <sys/zone.h>
#include <sys/atomic.h>
#include <sys/sdt.h>

#define	MAX_ITERS_SPIN	5

typedef struct prpagev {
	uint_t *pg_protv;	/* vector of page permissions */
	char *pg_incore;	/* vector of incore flags */
	size_t pg_npages;	/* number of pages in protv and incore */
	ulong_t pg_pnbase;	/* pn within segment of first protv element */
} prpagev_t;

size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */

extern struct seg_ops segdev_ops;	/* needs a header file */
extern struct seg_ops segspt_shmops;	/* needs a header file */

static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);

/*
 * Choose an lwp from the complete set of lwps for the process.
 * This is called for any operation applied to the process
 * file descriptor that requires an lwp to operate upon.
 *
 * Returns a pointer to the thread for the selected LWP,
 * and with the dispatcher lock held for the thread.
 *
 * The algorithm for choosing an lwp is critical for /proc semantics;
 * don't touch this code unless you know all of the implications.
 */
kthread_t *
prchoose(proc_t *p)
{
	kthread_t *t;
	kthread_t *t_onproc = NULL;	/* running on processor */
	kthread_t *t_run = NULL;	/* runnable, on disp queue */
	kthread_t *t_sleep = NULL;	/* sleeping */
	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
	kthread_t *t_susp = NULL;	/* suspended stop */
	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
	kthread_t *t_req = NULL;	/* requested stop */
	kthread_t *t_istop = NULL;	/* event-of-interest stop */
	kthread_t *t_dtrace = NULL;	/* DTrace stop */

	ASSERT(MUTEX_HELD(&p->p_lock));

	/*
	 * If the agent lwp exists, it takes precedence over all others.
	 */
	if ((t = p->p_agenttp) != NULL) {
		thread_lock(t);
		return (t);
	}

	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
		return (t);
	do {		/* for eacn lwp in the process */
		if (VSTOPPED(t)) {	/* virtually stopped */
			if (t_req == NULL)
				t_req = t;
			continue;
		}

		/* If this is a process kernel thread, ignore it. */
		if ((t->t_proc_flag & TP_KTHREAD) != 0) {
			continue;
		}

		thread_lock(t);		/* make sure thread is in good state */
		switch (t->t_state) {
		default:
			panic("prchoose: bad thread state %d, thread 0x%p",
			    t->t_state, (void *)t);
			/*NOTREACHED*/
		case TS_SLEEP:
			/* this is filthy */
			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
			    t->t_wchan0 == NULL) {
				if (t_hold == NULL)
					t_hold = t;
			} else {
				if (t_sleep == NULL)
					t_sleep = t;
			}
			break;
		case TS_RUN:
		case TS_WAIT:
			if (t_run == NULL)
				t_run = t;
			break;
		case TS_ONPROC:
			if (t_onproc == NULL)
				t_onproc = t;
			break;
		case TS_ZOMB:		/* last possible choice */
			break;
		case TS_STOPPED:
			switch (t->t_whystop) {
			case PR_SUSPENDED:
				if (t_susp == NULL)
					t_susp = t;
				break;
			case PR_JOBCONTROL:
				if (t->t_proc_flag & TP_PRSTOP) {
					if (t_jdstop == NULL)
						t_jdstop = t;
				} else {
					if (t_jstop == NULL)
						t_jstop = t;
				}
				break;
			case PR_REQUESTED:
				if (t->t_dtrace_stop && t_dtrace == NULL)
					t_dtrace = t;
				else if (t_req == NULL)
					t_req = t;
				break;
			case PR_SYSENTRY:
			case PR_SYSEXIT:
			case PR_SIGNALLED:
			case PR_FAULTED:
				/*
				 * Make an lwp calling exit() be the
				 * last lwp seen in the process.
				 */
				if (t_istop == NULL ||
				    (t_istop->t_whystop == PR_SYSENTRY &&
				    t_istop->t_whatstop == SYS_exit))
					t_istop = t;
				break;
			case PR_CHECKPOINT:	/* can't happen? */
				break;
			default:
				panic("prchoose: bad t_whystop %d, thread 0x%p",
				    t->t_whystop, (void *)t);
				/*NOTREACHED*/
			}
			break;
		}
		thread_unlock(t);
	} while ((t = t->t_forw) != p->p_tlist);

	if (t_onproc)
		t = t_onproc;
	else if (t_run)
		t = t_run;
	else if (t_sleep)
		t = t_sleep;
	else if (t_jstop)
		t = t_jstop;
	else if (t_jdstop)
		t = t_jdstop;
	else if (t_istop)
		t = t_istop;
	else if (t_dtrace)
		t = t_dtrace;
	else if (t_req)
		t = t_req;
	else if (t_hold)
		t = t_hold;
	else if (t_susp)
		t = t_susp;
	else			/* TS_ZOMB */
		t = p->p_tlist;

	if (t != NULL)
		thread_lock(t);
	return (t);
}

/*
 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
 * on the /proc file descriptor.  Called from stop() when a traced
 * process stops on an event of interest.  Also called from exit()
 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
 */
void
prnotify(struct vnode *vp)
{
	prcommon_t *pcp = VTOP(vp)->pr_common;

	mutex_enter(&pcp->prc_mutex);
	cv_broadcast(&pcp->prc_wait);
	mutex_exit(&pcp->prc_mutex);
	if (pcp->prc_flags & PRC_POLL) {
		/*
		 * We call pollwakeup() with POLLHUP to ensure that
		 * the pollers are awakened even if they are polling
		 * for nothing (i.e., waiting for the process to exit).
		 * This enables the use of the PRC_POLL flag for optimization
		 * (we can turn off PRC_POLL only if we know no pollers remain).
		 */
		pcp->prc_flags &= ~PRC_POLL;
		pollwakeup(&pcp->prc_pollhead, POLLHUP);
	}
}

/* called immediately below, in prfree() */
static void
prfreenotify(vnode_t *vp)
{
	prnode_t *pnp;
	prcommon_t *pcp;

	while (vp != NULL) {
		pnp = VTOP(vp);
		pcp = pnp->pr_common;
		ASSERT(pcp->prc_thread == NULL);
		pcp->prc_proc = NULL;
		/*
		 * We can't call prnotify() here because we are holding
		 * pidlock.  We assert that there is no need to.
		 */
		mutex_enter(&pcp->prc_mutex);
		cv_broadcast(&pcp->prc_wait);
		mutex_exit(&pcp->prc_mutex);
		ASSERT(!(pcp->prc_flags & PRC_POLL));

		vp = pnp->pr_next;
		pnp->pr_next = NULL;
	}
}

/*
 * Called from a hook in freeproc() when a traced process is removed
 * from the process table.  The proc-table pointers of all associated
 * /proc vnodes are cleared to indicate that the process has gone away.
 */
void
prfree(proc_t *p)
{
	uint_t slot = p->p_slot;

	ASSERT(MUTEX_HELD(&pidlock));

	/*
	 * Block the process against /proc so it can be freed.
	 * It cannot be freed while locked by some controlling process.
	 * Lock ordering:
	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
	 */
	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
	mutex_enter(&p->p_lock);
	while (p->p_proc_flag & P_PR_LOCK) {
		mutex_exit(&pr_pidlock);
		cv_wait(&pr_pid_cv[slot], &p->p_lock);
		mutex_exit(&p->p_lock);
		mutex_enter(&pr_pidlock);
		mutex_enter(&p->p_lock);
	}

	ASSERT(p->p_tlist == NULL);

	prfreenotify(p->p_plist);
	p->p_plist = NULL;

	prfreenotify(p->p_trace);
	p->p_trace = NULL;

	/*
	 * We broadcast to wake up everyone waiting for this process.
	 * No one can reach this process from this point on.
	 */
	cv_broadcast(&pr_pid_cv[slot]);

	mutex_exit(&p->p_lock);
	mutex_exit(&pr_pidlock);
}

/*
 * Called from a hook in exit() when a traced process is becoming a zombie.
 */
void
prexit(proc_t *p)
{
	ASSERT(MUTEX_HELD(&p->p_lock));

	if (pr_watch_active(p)) {
		pr_free_watchpoints(p);
		watch_disable(curthread);
	}
	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
	if (p->p_trace) {
		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
		prnotify(p->p_trace);
	}
	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
}

/*
 * Called when a thread calls lwp_exit().
 */
void
prlwpexit(kthread_t *t)
{
	vnode_t *vp;
	prnode_t *pnp;
	prcommon_t *pcp;
	proc_t *p = ttoproc(t);
	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;

	ASSERT(t == curthread);
	ASSERT(MUTEX_HELD(&p->p_lock));

	/*
	 * The process must be blocked against /proc to do this safely.
	 * The lwp must not disappear while the process is marked P_PR_LOCK.
	 * It is the caller's responsibility to have called prbarrier(p).
	 */
	ASSERT(!(p->p_proc_flag & P_PR_LOCK));

	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
		pnp = VTOP(vp);
		pcp = pnp->pr_common;
		if (pcp->prc_thread == t) {
			pcp->prc_thread = NULL;
			pcp->prc_flags |= PRC_DESTROY;
		}
	}

	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
		pnp = VTOP(vp);
		pcp = pnp->pr_common;
		pcp->prc_thread = NULL;
		pcp->prc_flags |= PRC_DESTROY;
		prnotify(vp);
	}

	if (p->p_trace)
		prnotify(p->p_trace);
}

/*
 * Called when a zombie thread is joined or when a
 * detached lwp exits.  Called from lwp_hash_out().
 */
void
prlwpfree(proc_t *p, lwpent_t *lep)
{
	vnode_t *vp;
	prnode_t *pnp;
	prcommon_t *pcp;

	ASSERT(MUTEX_HELD(&p->p_lock));

	/*
	 * The process must be blocked against /proc to do this safely.
	 * The lwp must not disappear while the process is marked P_PR_LOCK.
	 * It is the caller's responsibility to have called prbarrier(p).
	 */
	ASSERT(!(p->p_proc_flag & P_PR_LOCK));

	vp = lep->le_trace;
	lep->le_trace = NULL;
	while (vp) {
		prnotify(vp);
		pnp = VTOP(vp);
		pcp = pnp->pr_common;
		ASSERT(pcp->prc_thread == NULL &&
		    (pcp->prc_flags & PRC_DESTROY));
		pcp->prc_tslot = -1;
		vp = pnp->pr_next;
		pnp->pr_next = NULL;
	}

	if (p->p_trace)
		prnotify(p->p_trace);
}

/*
 * Called from a hook in exec() when a thread starts exec().
 */
void
prexecstart(void)
{
	proc_t *p = ttoproc(curthread);
	klwp_t *lwp = ttolwp(curthread);

	/*
	 * The P_PR_EXEC flag blocks /proc operations for
	 * the duration of the exec().
	 * We can't start exec() while the process is
	 * locked by /proc, so we call prbarrier().
	 * lwp_nostop keeps the process from being stopped
	 * via job control for the duration of the exec().
	 */

	ASSERT(MUTEX_HELD(&p->p_lock));
	prbarrier(p);
	lwp->lwp_nostop++;
	p->p_proc_flag |= P_PR_EXEC;
}

/*
 * Called from a hook in exec() when a thread finishes exec().
 * The thread may or may not have succeeded.  Some other thread
 * may have beat it to the punch.
 */
void
prexecend(void)
{
	proc_t *p = ttoproc(curthread);
	klwp_t *lwp = ttolwp(curthread);
	vnode_t *vp;
	prnode_t *pnp;
	prcommon_t *pcp;
	model_t model = p->p_model;
	id_t tid = curthread->t_tid;
	int tslot = curthread->t_dslot;

	ASSERT(MUTEX_HELD(&p->p_lock));

	lwp->lwp_nostop--;
	if (p->p_flag & SEXITLWPS) {
		/*
		 * We are on our way to exiting because some
		 * other thread beat us in the race to exec().
		 * Don't clear the P_PR_EXEC flag in this case.
		 */
		return;
	}

	/*
	 * Wake up anyone waiting in /proc for the process to complete exec().
	 */
	p->p_proc_flag &= ~P_PR_EXEC;
	if ((vp = p->p_trace) != NULL) {
		pcp = VTOP(vp)->pr_common;
		mutex_enter(&pcp->prc_mutex);
		cv_broadcast(&pcp->prc_wait);
		mutex_exit(&pcp->prc_mutex);
		for (; vp != NULL; vp = pnp->pr_next) {
			pnp = VTOP(vp);
			pnp->pr_common->prc_datamodel = model;
		}
	}
	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
		/*
		 * We dealt with the process common above.
		 */
		ASSERT(p->p_trace != NULL);
		pcp = VTOP(vp)->pr_common;
		mutex_enter(&pcp->prc_mutex);
		cv_broadcast(&pcp->prc_wait);
		mutex_exit(&pcp->prc_mutex);
		for (; vp != NULL; vp = pnp->pr_next) {
			pnp = VTOP(vp);
			pcp = pnp->pr_common;
			pcp->prc_datamodel = model;
			pcp->prc_tid = tid;
			pcp->prc_tslot = tslot;
		}
	}
}

/*
 * Called from a hook in relvm() just before freeing the address space.
 * We free all the watched areas now.
 */
void
prrelvm(void)
{
	proc_t *p = ttoproc(curthread);

	mutex_enter(&p->p_lock);
	prbarrier(p);	/* block all other /proc operations */
	if (pr_watch_active(p)) {
		pr_free_watchpoints(p);
		watch_disable(curthread);
	}
	mutex_exit(&p->p_lock);
	pr_free_watched_pages(p);
}

/*
 * Called from hooks in exec-related code when a traced process
 * attempts to exec(2) a setuid/setgid program or an unreadable
 * file.  Rather than fail the exec we invalidate the associated
 * /proc vnodes so that subsequent attempts to use them will fail.
 *
 * All /proc vnodes, except directory vnodes, are retained on a linked
 * list (rooted at p_plist in the process structure) until last close.
 *
 * A controlling process must re-open the /proc files in order to
 * regain control.
 */
void
prinvalidate(struct user *up)
{
	kthread_t *t = curthread;
	proc_t *p = ttoproc(t);
	vnode_t *vp;
	prnode_t *pnp;
	int writers = 0;

	mutex_enter(&p->p_lock);
	prbarrier(p);	/* block all other /proc operations */

	/*
	 * At this moment, there can be only one lwp in the process.
	 */
	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);

	/*
	 * Invalidate any currently active /proc vnodes.
	 */
	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
		pnp = VTOP(vp);
		switch (pnp->pr_type) {
		case PR_PSINFO:		/* these files can read by anyone */
		case PR_LPSINFO:
		case PR_LWPSINFO:
		case PR_LWPDIR:
		case PR_LWPIDDIR:
		case PR_USAGE:
		case PR_LUSAGE:
		case PR_LWPUSAGE:
			break;
		default:
			pnp->pr_flags |= PR_INVAL;
			break;
		}
	}
	/*
	 * Wake up anyone waiting for the process or lwp.
	 * p->p_trace is guaranteed to be non-NULL if there
	 * are any open /proc files for this process.
	 */
	if ((vp = p->p_trace) != NULL) {
		prcommon_t *pcp = VTOP(vp)->pr_pcommon;

		prnotify(vp);
		/*
		 * Are there any writers?
		 */
		if ((writers = pcp->prc_writers) != 0) {
			/*
			 * Clear the exclusive open flag (old /proc interface).
			 * Set prc_selfopens equal to prc_writers so that
			 * the next O_EXCL|O_WRITE open will succeed
			 * even with existing (though invalid) writers.
			 * prclose() must decrement prc_selfopens when
			 * the invalid files are closed.
			 */
			pcp->prc_flags &= ~PRC_EXCL;
			ASSERT(pcp->prc_selfopens <= writers);
			pcp->prc_selfopens = writers;
		}
	}
	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
	while (vp != NULL) {
		/*
		 * We should not invalidate the lwpiddir vnodes,
		 * but the necessities of maintaining the old
		 * ioctl()-based version of /proc require it.
		 */
		pnp = VTOP(vp);
		pnp->pr_flags |= PR_INVAL;
		prnotify(vp);
		vp = pnp->pr_next;
	}

	/*
	 * If any tracing flags are in effect and any vnodes are open for
	 * writing then set the requested-stop and run-on-last-close flags.
	 * Otherwise, clear all tracing flags.
	 */
	t->t_proc_flag &= ~TP_PAUSE;
	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
		t->t_proc_flag |= TP_PRSTOP;
		aston(t);		/* so ISSIG will see the flag */
		p->p_proc_flag |= P_PR_RUNLCL;
	} else {
		premptyset(&up->u_entrymask);		/* syscalls */
		premptyset(&up->u_exitmask);
		up->u_systrap = 0;
		premptyset(&p->p_sigmask);		/* signals */
		premptyset(&p->p_fltmask);		/* faults */
		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
		prnostep(ttolwp(t));
	}

	mutex_exit(&p->p_lock);
}

/*
 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
 * Return with pr_pidlock held in all cases.
 * Return with p_lock held if the the process still exists.
 * Return value is the process pointer if the process still exists, else NULL.
 * If we lock the process, give ourself kernel priority to avoid deadlocks;
 * this is undone in prunlock().
 */
proc_t *
pr_p_lock(prnode_t *pnp)
{
	proc_t *p;
	prcommon_t *pcp;

	mutex_enter(&pr_pidlock);
	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
		return (NULL);
	mutex_enter(&p->p_lock);
	while (p->p_proc_flag & P_PR_LOCK) {
		/*
		 * This cv/mutex pair is persistent even if
		 * the process disappears while we sleep.
		 */
		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
		kmutex_t *mp = &p->p_lock;

		mutex_exit(&pr_pidlock);
		cv_wait(cv, mp);
		mutex_exit(mp);
		mutex_enter(&pr_pidlock);
		if (pcp->prc_proc == NULL)
			return (NULL);
		ASSERT(p == pcp->prc_proc);
		mutex_enter(&p->p_lock);
	}
	p->p_proc_flag |= P_PR_LOCK;
	return (p);
}

/*
 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
 * This prevents any lwp of the process from disappearing and
 * blocks most operations that a process can perform on itself.
 * Returns 0 on success, a non-zero error number on failure.
 *
 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
 *
 * error returns:
 *	ENOENT: process or lwp has disappeared or process is exiting
 *		(or has become a zombie and zdisp == ZNO).
 *	EAGAIN: procfs vnode has become invalid.
 *	EINTR:  signal arrived while waiting for exec to complete.
 */
int
prlock(prnode_t *pnp, int zdisp)
{
	prcommon_t *pcp;
	proc_t *p;

again:
	pcp = pnp->pr_common;
	p = pr_p_lock(pnp);
	mutex_exit(&pr_pidlock);

	/*
	 * Return ENOENT immediately if there is no process.
	 */
	if (p == NULL)
		return (ENOENT);

	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);

	/*
	 * Return ENOENT if process entered zombie state or is exiting
	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
	 */
	if (zdisp == ZNO &&
	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
		prunlock(pnp);
		return (ENOENT);
	}

	/*
	 * If lwp-specific, check to see if lwp has disappeared.
	 */
	if (pcp->prc_flags & PRC_LWP) {
		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
		    pcp->prc_tslot == -1) {
			prunlock(pnp);
			return (ENOENT);
		}
	}

	/*
	 * Return EAGAIN if we have encountered a security violation.
	 * (The process exec'd a set-id or unreadable executable file.)
	 */
	if (pnp->pr_flags & PR_INVAL) {
		prunlock(pnp);
		return (EAGAIN);
	}

	/*
	 * If process is undergoing an exec(), wait for
	 * completion and then start all over again.
	 */
	if (p->p_proc_flag & P_PR_EXEC) {
		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
		mutex_enter(&pcp->prc_mutex);
		prunlock(pnp);
		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
			mutex_exit(&pcp->prc_mutex);
			return (EINTR);
		}
		mutex_exit(&pcp->prc_mutex);
		goto again;
	}

	/*
	 * We return holding p->p_lock.
	 */
	return (0);
}

/*
 * Undo prlock() and pr_p_lock().
 * p->p_lock is still held; pr_pidlock is no longer held.
 *
 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
 * if any, waiting for the flag to be dropped; it retains p->p_lock.
 *
 * prunlock() calls prunmark() and then drops p->p_lock.
 */
void
prunmark(proc_t *p)
{
	ASSERT(p->p_proc_flag & P_PR_LOCK);
	ASSERT(MUTEX_HELD(&p->p_lock));

	cv_signal(&pr_pid_cv[p->p_slot]);
	p->p_proc_flag &= ~P_PR_LOCK;
}

void
prunlock(prnode_t *pnp)
{
	prcommon_t *pcp = pnp->pr_common;
	proc_t *p = pcp->prc_proc;

	/*
	 * If we (or someone) gave it a SIGKILL, and it is not
	 * already a zombie, set it running unconditionally.
	 */
	if ((p->p_flag & SKILLED) &&
	    !(p->p_flag & SEXITING) &&
	    !(pcp->prc_flags & PRC_DESTROY) &&
	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) {
		int err = pr_setrun(pnp, 0);
		/*
		 * EBUSY here means either the process was not stopped by /proc
		 * or there is an agent lwp.  If there's an agent lwp, we don't
		 * need to do anything as it will run and witness the SIGKILL.
		 * However, if there's no agent lwp and the process was not
		 * stopped by /proc, it may have been stopped by SIGSTOP; try
		 * getting lwps running with TS_XSTART to undo SIGSTOP effect.
		 *
		 * Notably, other TS_* bits are inappropriate here:
		 * * Do not set TS_PSTART; pr_setrun() above would have already
		 *   set this if it did anything for this process.
		 * * Do not set TS_CSTART or TS_UNPAUSE; lwps may be stopped by
		 *   PR_SUSPEND for many reasons. Some cases, like holdlwps(),
		 *   will resume the process before the corresponding syscall
		 *   returns. Other cases, like dumping core, the suspender
		 *   will tear down the lwps as it completes.
		 * * Do not set TS_RESUME out of caution; not sure about the
		 *   consequences of a process going away during CPR resume and
		 *   CPR should set the process running eventually.
		 * * Do not set TS_CREATE because lwp creation expects threads
		 *   to remain paused until lwp completes.
		 */
		if (err == EBUSY && p->p_agenttp == NULL) {
			runlwps(p, TS_XSTART);
		}
	}
	prunmark(p);
	mutex_exit(&p->p_lock);
}

/*
 * Called while holding p->p_lock to delay until the process is unlocked.
 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
 * The process cannot become locked again until p->p_lock is dropped.
 */
void
prbarrier(proc_t *p)
{
	ASSERT(MUTEX_HELD(&p->p_lock));

	if (p->p_proc_flag & P_PR_LOCK) {
		/* The process is locked; delay until not locked */
		uint_t slot = p->p_slot;

		while (p->p_proc_flag & P_PR_LOCK)
			cv_wait(&pr_pid_cv[slot], &p->p_lock);
		cv_signal(&pr_pid_cv[slot]);
	}
}

/*
 * Return process/lwp status.
 * The u-block is mapped in by this routine and unmapped at the end.
 */
void
prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
{
	kthread_t *t;

	ASSERT(MUTEX_HELD(&p->p_lock));

	t = prchoose(p);	/* returns locked thread */
	ASSERT(t != NULL);
	thread_unlock(t);

	/* just bzero the process part, prgetlwpstatus() does the rest */
	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
	sp->pr_nlwp = p->p_lwpcnt;
	sp->pr_nzomb = p->p_zombcnt;
	prassignset(&sp->pr_sigpend, &p->p_sig);
	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
	sp->pr_brksize = p->p_brksize;
	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
	sp->pr_stksize = p->p_stksize;
	sp->pr_pid = p->p_pid;
	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
	    (p->p_flag & SZONETOP)) {
		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
		/*
		 * Inside local zones, fake zsched's pid as parent pids for
		 * processes which reference processes outside of the zone.
		 */
		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
	} else {
		sp->pr_ppid = p->p_ppid;
	}
	sp->pr_pgid  = p->p_pgrp;
	sp->pr_sid   = p->p_sessp->s_sid;
	sp->pr_taskid = p->p_task->tk_tkid;
	sp->pr_projid = p->p_task->tk_proj->kpj_id;
	sp->pr_zoneid = p->p_zone->zone_id;
	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
	prassignset(&sp->pr_flttrace, &p->p_fltmask);
	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
	switch (p->p_model) {
	case DATAMODEL_ILP32:
		sp->pr_dmodel = PR_MODEL_ILP32;
		break;
	case DATAMODEL_LP64:
		sp->pr_dmodel = PR_MODEL_LP64;
		break;
	}
	if (p->p_agenttp)
		sp->pr_agentid = p->p_agenttp->t_tid;

	/* get the chosen lwp's status */
	prgetlwpstatus(t, &sp->pr_lwp, zp);

	/* replicate the flags */
	sp->pr_flags = sp->pr_lwp.pr_flags;
}

/*
 * Query mask of held signals for a given thread.
 *
 * This makes use of schedctl_sigblock() to query if userspace has requested
 * that all maskable signals be held.  While it would be tempting to call
 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
 * done safely without the risk of racing with the thread under consideration.
 */
void
prgethold(kthread_t *t, sigset_t *sp)
{
	k_sigset_t set;

	if (schedctl_sigblock(t)) {
		set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
		set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
		set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
	} else {
		set = t->t_hold;
	}
	sigktou(&set, sp);
}

#ifdef _SYSCALL32_IMPL
void
prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
{
	proc_t *p = ttoproc(t);
	klwp_t *lwp = ttolwp(t);
	struct mstate *ms = &lwp->lwp_mstate;
	hrtime_t usr, sys;
	int flags;
	ulong_t instr;

	ASSERT(MUTEX_HELD(&p->p_lock));

	bzero(sp, sizeof (*sp));
	flags = 0L;
	if (t->t_state == TS_STOPPED) {
		flags |= PR_STOPPED;
		if ((t->t_schedflag & TS_PSTART) == 0)
			flags |= PR_ISTOP;
	} else if (VSTOPPED(t)) {
		flags |= PR_STOPPED|PR_ISTOP;
	}
	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
		flags |= PR_DSTOP;
	if (lwp->lwp_asleep)
		flags |= PR_ASLEEP;
	if (t == p->p_agenttp)
		flags |= PR_AGENT;
	if (!(t->t_proc_flag & TP_TWAIT))
		flags |= PR_DETACH;
	if (t->t_proc_flag & TP_DAEMON)
		flags |= PR_DAEMON;
	if (p->p_proc_flag & P_PR_FORK)
		flags |= PR_FORK;
	if (p->p_proc_flag & P_PR_RUNLCL)
		flags |= PR_RLC;
	if (p->p_proc_flag & P_PR_KILLCL)
		flags |= PR_KLC;
	if (p->p_proc_flag & P_PR_ASYNC)
		flags |= PR_ASYNC;
	if (p->p_proc_flag & P_PR_BPTADJ)
		flags |= PR_BPTADJ;
	if (p->p_proc_flag & P_PR_PTRACE)
		flags |= PR_PTRACE;
	if (p->p_flag & SMSACCT)
		flags |= PR_MSACCT;
	if (p->p_flag & SMSFORK)
		flags |= PR_MSFORK;
	if (p->p_flag & SVFWAIT)
		flags |= PR_VFORKP;
	sp->pr_flags = flags;
	if (VSTOPPED(t)) {
		sp->pr_why   = PR_REQUESTED;
		sp->pr_what  = 0;
	} else {
		sp->pr_why   = t->t_whystop;
		sp->pr_what  = t->t_whatstop;
	}
	sp->pr_lwpid = t->t_tid;
	sp->pr_cursig  = lwp->lwp_cursig;
	prassignset(&sp->pr_lwppend, &t->t_sig);
	prgethold(t, &sp->pr_lwphold);
	if (t->t_whystop == PR_FAULTED) {
		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
		if (t->t_whatstop == FLTPAGE)
			sp->pr_info.si_addr =
			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
	} else if (lwp->lwp_curinfo)
		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
	    sp->pr_info.si_zoneid != zp->zone_id) {
		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
		sp->pr_info.si_uid = 0;
		sp->pr_info.si_ctid = -1;
		sp->pr_info.si_zoneid = zp->zone_id;
	}
	sp->pr_altstack.ss_sp =
	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
	    sizeof (sp->pr_clname) - 1);
	if (flags & PR_STOPPED)
		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
	usr = ms->ms_acct[LMS_USER];
	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
	scalehrtime(&usr);
	scalehrtime(&sys);
	hrt2ts32(usr, &sp->pr_utime);
	hrt2ts32(sys, &sp->pr_stime);

	/*
	 * Fetch the current instruction, if not a system process.
	 * We don't attempt this unless the lwp is stopped.
	 */
	if ((p->p_flag & SSYS) || p->p_as == &kas)
		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
	else if (!(flags & PR_STOPPED))
		sp->pr_flags |= PR_PCINVAL;
	else if (!prfetchinstr(lwp, &instr))
		sp->pr_flags |= PR_PCINVAL;
	else
		sp->pr_instr = (uint32_t)instr;

	/*
	 * Drop p_lock while touching the lwp's stack.
	 */
	mutex_exit(&p->p_lock);
	if (prisstep(lwp))
		sp->pr_flags |= PR_STEP;
	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
		int i;

		sp->pr_syscall = get_syscall32_args(lwp,
		    (int *)sp->pr_sysarg, &i);
		sp->pr_nsysarg = (ushort_t)i;
	}
	if ((flags & PR_STOPPED) || t == curthread)
		prgetprregs32(lwp, sp->pr_reg);
	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
	    (flags & PR_VFORKP)) {
		long r1, r2;
		user_t *up;
		auxv_t *auxp;
		int i;

		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
		if (sp->pr_errno == 0) {
			sp->pr_rval1 = (int32_t)r1;
			sp->pr_rval2 = (int32_t)r2;
			sp->pr_errpriv = PRIV_NONE;
		} else
			sp->pr_errpriv = lwp->lwp_badpriv;

		if (t->t_sysnum == SYS_execve) {
			up = PTOU(p);
			sp->pr_sysarg[0] = 0;
			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
			sp->pr_sysarg[3] = 0;
			for (i = 0, auxp = up->u_auxv;
			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
			    i++, auxp++) {
				if (auxp->a_type == AT_SUN_EXECNAME) {
					sp->pr_sysarg[0] =
					    (caddr32_t)
					    (uintptr_t)auxp->a_un.a_ptr;
					break;
				}
			}
		}
	}
	if (prhasfp())
		prgetprfpregs32(lwp, &sp->pr_fpreg);
	mutex_enter(&p->p_lock);
}

void
prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
{
	kthread_t *t;

	ASSERT(MUTEX_HELD(&p->p_lock));

	t = prchoose(p);	/* returns locked thread */
	ASSERT(t != NULL);
	thread_unlock(t);

	/* just bzero the process part, prgetlwpstatus32() does the rest */
	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
	sp->pr_nlwp = p->p_lwpcnt;
	sp->pr_nzomb = p->p_zombcnt;
	prassignset(&sp->pr_sigpend, &p->p_sig);
	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
	sp->pr_brksize = (uint32_t)p->p_brksize;
	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
	sp->pr_stksize = (uint32_t)p->p_stksize;
	sp->pr_pid   = p->p_pid;
	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
	    (p->p_flag & SZONETOP)) {
		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
		/*
		 * Inside local zones, fake zsched's pid as parent pids for
		 * processes which reference processes outside of the zone.
		 */
		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
	} else {
		sp->pr_ppid = p->p_ppid;
	}
	sp->pr_pgid  = p->p_pgrp;
	sp->pr_sid   = p->p_sessp->s_sid;
	sp->pr_taskid = p->p_task->tk_tkid;
	sp->pr_projid = p->p_task->tk_proj->kpj_id;
	sp->pr_zoneid = p->p_zone->zone_id;
	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
	prassignset(&sp->pr_flttrace, &p->p_fltmask);
	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
	switch (p->p_model) {
	case DATAMODEL_ILP32:
		sp->pr_dmodel = PR_MODEL_ILP32;
		break;
	case DATAMODEL_LP64:
		sp->pr_dmodel = PR_MODEL_LP64;
		break;
	}
	if (p->p_agenttp)
		sp->pr_agentid = p->p_agenttp->t_tid;

	/* get the chosen lwp's status */
	prgetlwpstatus32(t, &sp->pr_lwp, zp);

	/* replicate the flags */
	sp->pr_flags = sp->pr_lwp.pr_flags;
}
#endif	/* _SYSCALL32_IMPL */

/*
 * Return lwp status.
 */
void
prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
{
	proc_t *p = ttoproc(t);
	klwp_t *lwp = ttolwp(t);
	struct mstate *ms = &lwp->lwp_mstate;
	hrtime_t usr, sys;
	int flags;
	ulong_t instr;

	ASSERT(MUTEX_HELD(&p->p_lock));

	bzero(sp, sizeof (*sp));
	flags = 0L;
	if (t->t_state == TS_STOPPED) {
		flags |= PR_STOPPED;
		if ((t->t_schedflag & TS_PSTART) == 0)
			flags |= PR_ISTOP;
	} else if (VSTOPPED(t)) {
		flags |= PR_STOPPED|PR_ISTOP;
	}
	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
		flags |= PR_DSTOP;
	if (lwp->lwp_asleep)
		flags |= PR_ASLEEP;
	if (t == p->p_agenttp)
		flags |= PR_AGENT;
	if (!(t->t_proc_flag & TP_TWAIT))
		flags |= PR_DETACH;
	if (t->t_proc_flag & TP_DAEMON)
		flags |= PR_DAEMON;
	if (p->p_proc_flag & P_PR_FORK)
		flags |= PR_FORK;
	if (p->p_proc_flag & P_PR_RUNLCL)
		flags |= PR_RLC;
	if (p->p_proc_flag & P_PR_KILLCL)
		flags |= PR_KLC;
	if (p->p_proc_flag & P_PR_ASYNC)
		flags |= PR_ASYNC;
	if (p->p_proc_flag & P_PR_BPTADJ)
		flags |= PR_BPTADJ;
	if (p->p_proc_flag & P_PR_PTRACE)
		flags |= PR_PTRACE;
	if (p->p_flag & SMSACCT)
		flags |= PR_MSACCT;
	if (p->p_flag & SMSFORK)
		flags |= PR_MSFORK;
	if (p->p_flag & SVFWAIT)
		flags |= PR_VFORKP;
	if (p->p_pgidp->pid_pgorphaned)
		flags |= PR_ORPHAN;
	if (p->p_pidflag & CLDNOSIGCHLD)
		flags |= PR_NOSIGCHLD;
	if (p->p_pidflag & CLDWAITPID)
		flags |= PR_WAITPID;
	sp->pr_flags = flags;
	if (VSTOPPED(t)) {
		sp->pr_why   = PR_REQUESTED;
		sp->pr_what  = 0;
	} else {
		sp->pr_why   = t->t_whystop;
		sp->pr_what  = t->t_whatstop;
	}
	sp->pr_lwpid = t->t_tid;
	sp->pr_cursig  = lwp->lwp_cursig;
	prassignset(&sp->pr_lwppend, &t->t_sig);
	prgethold(t, &sp->pr_lwphold);
	if (t->t_whystop == PR_FAULTED)
		bcopy(&lwp->lwp_siginfo,
		    &sp->pr_info, sizeof (k_siginfo_t));
	else if (lwp->lwp_curinfo)
		bcopy(&lwp->lwp_curinfo->sq_info,
		    &sp->pr_info, sizeof (k_siginfo_t));
	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
	    sp->pr_info.si_zoneid != zp->zone_id) {
		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
		sp->pr_info.si_uid = 0;
		sp->pr_info.si_ctid = -1;
		sp->pr_info.si_zoneid = zp->zone_id;
	}
	sp->pr_altstack = lwp->lwp_sigaltstack;
	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
	sp->pr_ustack = lwp->lwp_ustack;
	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
	    sizeof (sp->pr_clname) - 1);
	if (flags & PR_STOPPED)
		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
	usr = ms->ms_acct[LMS_USER];
	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
	scalehrtime(&usr);
	scalehrtime(&sys);
	hrt2ts(usr, &sp->pr_utime);
	hrt2ts(sys, &sp->pr_stime);

	/*
	 * Fetch the current instruction, if not a system process.
	 * We don't attempt this unless the lwp is stopped.
	 */
	if ((p->p_flag & SSYS) || p->p_as == &kas)
		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
	else if (!(flags & PR_STOPPED))
		sp->pr_flags |= PR_PCINVAL;
	else if (!prfetchinstr(lwp, &instr))
		sp->pr_flags |= PR_PCINVAL;
	else
		sp->pr_instr = instr;

	/*
	 * Drop p_lock while touching the lwp's stack.
	 */
	mutex_exit(&p->p_lock);
	if (prisstep(lwp))
		sp->pr_flags |= PR_STEP;
	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
		int i;

		sp->pr_syscall = get_syscall_args(lwp,
		    (long *)sp->pr_sysarg, &i);
		sp->pr_nsysarg = (ushort_t)i;
	}
	if ((flags & PR_STOPPED) || t == curthread)
		prgetprregs(lwp, sp->pr_reg);
	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
	    (flags & PR_VFORKP)) {
		user_t *up;
		auxv_t *auxp;
		int i;

		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
		if (sp->pr_errno == 0)
			sp->pr_errpriv = PRIV_NONE;
		else
			sp->pr_errpriv = lwp->lwp_badpriv;

		if (t->t_sysnum == SYS_execve) {
			up = PTOU(p);
			sp->pr_sysarg[0] = 0;
			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
			sp->pr_sysarg[3] = 0;
			for (i = 0, auxp = up->u_auxv;
			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
			    i++, auxp++) {
				if (auxp->a_type == AT_SUN_EXECNAME) {
					sp->pr_sysarg[0] =
					    (uintptr_t)auxp->a_un.a_ptr;
					break;
				}
			}
		}
	}
	if (prhasfp())
		prgetprfpregs(lwp, &sp->pr_fpreg);
	mutex_enter(&p->p_lock);
}

/*
 * Get the sigaction structure for the specified signal.  The u-block
 * must already have been mapped in by the caller.
 */
void
prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
{
	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;

	bzero(sp, sizeof (*sp));

	if (sig != 0 && (unsigned)sig < nsig) {
		sp->sa_handler = up->u_signal[sig-1];
		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
		if (sigismember(&up->u_sigonstack, sig))
			sp->sa_flags |= SA_ONSTACK;
		if (sigismember(&up->u_sigresethand, sig))
			sp->sa_flags |= SA_RESETHAND;
		if (sigismember(&up->u_sigrestart, sig))
			sp->sa_flags |= SA_RESTART;
		if (sigismember(&p->p_siginfo, sig))
			sp->sa_flags |= SA_SIGINFO;
		if (sigismember(&up->u_signodefer, sig))
			sp->sa_flags |= SA_NODEFER;
		if (sig == SIGCLD) {
			if (p->p_flag & SNOWAIT)
				sp->sa_flags |= SA_NOCLDWAIT;
			if ((p->p_flag & SJCTL) == 0)
				sp->sa_flags |= SA_NOCLDSTOP;
		}
	}
}

#ifdef _SYSCALL32_IMPL
void
prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
{
	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;

	bzero(sp, sizeof (*sp));

	if (sig != 0 && (unsigned)sig < nsig) {
		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
		if (sigismember(&up->u_sigonstack, sig))
			sp->sa_flags |= SA_ONSTACK;
		if (sigismember(&up->u_sigresethand, sig))
			sp->sa_flags |= SA_RESETHAND;
		if (sigismember(&up->u_sigrestart, sig))
			sp->sa_flags |= SA_RESTART;
		if (sigismember(&p->p_siginfo, sig))
			sp->sa_flags |= SA_SIGINFO;
		if (sigismember(&up->u_signodefer, sig))
			sp->sa_flags |= SA_NODEFER;
		if (sig == SIGCLD) {
			if (p->p_flag & SNOWAIT)
				sp->sa_flags |= SA_NOCLDWAIT;
			if ((p->p_flag & SJCTL) == 0)
				sp->sa_flags |= SA_NOCLDSTOP;
		}
	}
}
#endif	/* _SYSCALL32_IMPL */

/*
 * Count the number of segments in this process's address space.
 */
uint_t
prnsegs(struct as *as, int reserved)
{
	uint_t n = 0;
	struct seg *seg;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
		caddr_t saddr, naddr;
		void *tmp = NULL;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			(void) pr_getprot(seg, reserved, &tmp,
			    &saddr, &naddr, eaddr);
			if (saddr != naddr) {
				n++;
				/*
				 * prnsegs() was formerly designated to return
				 * an 'int' despite having no ability or use
				 * for negative results.  As part of changing
				 * it to 'uint_t', keep the old effective limit
				 * of INT_MAX in place.
				 */
				if (n == INT_MAX) {
					pr_getprot_done(&tmp);
					ASSERT(tmp == NULL);
					return (n);
				}
			}
		}

		ASSERT(tmp == NULL);
	}

	return (n);
}

/*
 * Convert uint32_t to decimal string w/o leading zeros.
 * Add trailing null characters if 'len' is greater than string length.
 * Return the string length.
 */
int
pr_u32tos(uint32_t n, char *s, int len)
{
	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
	char *cp = cbuf;
	char *end = s + len;

	do {
		*cp++ = (char)(n % 10 + '0');
		n /= 10;
	} while (n);

	len = (int)(cp - cbuf);

	do {
		*s++ = *--cp;
	} while (cp > cbuf);

	while (s < end)		/* optional pad */
		*s++ = '\0';

	return (len);
}

/*
 * Convert uint64_t to decimal string w/o leading zeros.
 * Return the string length.
 */
static int
pr_u64tos(uint64_t n, char *s)
{
	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
	char *cp = cbuf;
	int len;

	do {
		*cp++ = (char)(n % 10 + '0');
		n /= 10;
	} while (n);

	len = (int)(cp - cbuf);

	do {
		*s++ = *--cp;
	} while (cp > cbuf);

	return (len);
}

/*
 * Similar to getf() / getf_gen(), but for the specified process.  On success,
 * returns the fp with fp->f_count incremented.  The caller MUST call
 * closef(fp) on the returned fp after completing any actions using that fp.
 * We return a reference-held (fp->f_count bumped) file_t so no other closef()
 * can invoke destructive VOP_CLOSE actions while we're inspecting the
 * process's FD.
 *
 * Returns NULL for errors: either an empty process-table slot post-fi_lock
 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock.
 * Both failure modes have DTrace probes.
 *
 * The current design of the procfs "close" code path uses the following lock
 * order of:
 *
 *   1: (file_t) f_tlock
 *   2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK
 *
 * That happens because closef() holds f_tlock while calling fop_close(),
 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its
 * beginning.
 *
 * That lock order creates a challenge for pr_getf, which needs to take those
 * locks in the opposite order when the fd points to a procfs file descriptor.
 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some
 * (limited) number of times, failing if we don't get both locks.
 *
 * The cases where this can fail are rare, and all involve a procfs caller
 * asking for info (eg. FDINFO) on another procfs FD.  In these cases,
 * returning EBADF (which results from a NULL return from pr_getf()) is
 * acceptable.
 *
 * One can increase the number of tries in pr_getf_maxtries if one is worried
 * about the contentuous case.
 */

uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */
int pr_getf_maxtries = 3;  /* So you can tune it from /etc/system */

file_t *
pr_getf(proc_t *p, uint_t fd, short *flag)
{
	uf_entry_t *ufp;
	uf_info_t *fip;
	file_t *fp;
	int tries = 0;

	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));

retry:
	fip = P_FINFO(p);

	if (fd >= fip->fi_nfiles)
		return (NULL);

	mutex_exit(&p->p_lock);
	mutex_enter(&fip->fi_lock);
	UF_ENTER(ufp, fip, fd);
	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
		if (mutex_tryenter(&fp->f_tlock)) {
			ASSERT(fp->f_count > 0);
			fp->f_count++;
			mutex_exit(&fp->f_tlock);
			if (flag != NULL)
				*flag = ufp->uf_flag;
		} else {
			/*
			 * Note the number of mutex_trylock attempts.
			 *
			 * The exit path will catch this and try again if we
			 * are below the retry threshhold (pr_getf_maxtries).
			 */
			tries++;
			pr_getf_tryfails++;
			/*
			 * If we hit pr_getf_maxtries, we'll return NULL.
			 * DTrace scripts looking for this sort of failure
			 * should check when arg1 is pr_getf_maxtries.
			 */
			DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int,
			    tries);
			fp = NULL;
		}
	} else {
		fp = NULL;
		/* If we fail here, someone else closed this FD. */
		DTRACE_PROBE1(pr_getf_emptyslot, int, tries);
		tries = pr_getf_maxtries; /* Don't bother retrying. */
	}
	UF_EXIT(ufp);
	mutex_exit(&fip->fi_lock);
	mutex_enter(&p->p_lock);

	/* Use goto instead of tail-recursion so we can keep "tries" around. */
	if (fp == NULL) {
		/* "tries" starts at 1. */
		if (tries < pr_getf_maxtries)
			goto retry;
	} else {
		/*
		 * Probes here will detect successes after arg1's number of
		 * mutex_tryenter() calls.
		 */
		DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1);
	}

	return (fp);
}


/*
 * Just as pr_getf() is a little unusual in how it goes about making the file_t
 * safe for procfs consumers to access it, so too is pr_releasef() for safely
 * releasing that "hold".  The "hold" is unlike normal file descriptor activity
 * -- procfs is just an interloper here, wanting access to the vnode_t without
 * risk of a racing close() disrupting the state.  Just as pr_getf() avoids some
 * of the typical file_t behavior (such as auditing) when establishing its hold,
 * so too should pr_releasef().  It should not go through the motions of
 * closef() (since it is not a true close()) unless racing activity causes it to
 * be the last actor holding the refcount above zero.
 *
 * Under normal circumstances, we expect to find file_t`f_count > 1 after
 * the successful pr_getf() call.  We are, after all, accessing a resource
 * already held by the process in question.  We would also expect to rarely race
 * with a close() of the underlying fd, meaning that file_t`f_count > 1 would
 * still holds at pr_releasef() time.  That would mean we only need to decrement
 * f_count, leaving it to the process to later close the fd (thus triggering
 * VOP_CLOSE(), etc).
 *
 * It is only when that process manages to close() the fd while we have it
 * "held" in procfs that we must make a trip through the traditional closef()
 * logic to ensure proper tear-down of the file_t.
 */
void
pr_releasef(file_t *fp)
{
	mutex_enter(&fp->f_tlock);
	if (fp->f_count > 1) {
		/*
		 * This is the most common case: The file is still held open by
		 * the process, and we simply need to release our hold by
		 * decrementing f_count
		 */
		fp->f_count--;
		mutex_exit(&fp->f_tlock);
	} else {
		/*
		 * A rare occasion: The process snuck a close() of this file
		 * while we were doing our business in procfs.  Given that
		 * f_count == 1, we are the only one with a reference to the
		 * file_t and need to take a trip through closef() to free it.
		 */
		mutex_exit(&fp->f_tlock);
		(void) closef(fp);
	}
}

void
pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
{
	char *s = name;
	struct vfs *vfsp;
	struct vfssw *vfsswp;

	if ((vfsp = vp->v_vfsp) != NULL &&
	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
	    *vfsswp->vsw_name) {
		(void) strcpy(s, vfsswp->vsw_name);
		s += strlen(s);
		*s++ = '.';
	}
	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
	*s++ = '.';
	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
	*s++ = '.';
	s += pr_u64tos(vattr->va_nodeid, s);
	*s++ = '\0';
}

struct seg *
break_seg(proc_t *p)
{
	caddr_t addr = p->p_brkbase;
	struct seg *seg;
	struct vnode *vp;

	if (p->p_brksize != 0)
		addr += p->p_brksize - 1;
	seg = as_segat(p->p_as, addr);
	if (seg != NULL && seg->s_ops == &segvn_ops &&
	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
		return (seg);
	return (NULL);
}

/*
 * Implementation of service functions to handle procfs generic chained
 * copyout buffers.
 */
typedef struct pr_iobuf_list {
	list_node_t	piol_link;	/* buffer linkage */
	size_t		piol_size;	/* total size (header + data) */
	size_t		piol_usedsize;	/* amount to copy out from this buf */
} piol_t;

#define	MAPSIZE	(64 * 1024)
#define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))

void
pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
{
	piol_t	*iol;
	size_t	initial_size = MIN(1, n) * itemsize;

	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));

	ASSERT(list_head(iolhead) == NULL);
	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
	ASSERT(initial_size > 0);

	/*
	 * Someone creating chained copyout buffers may ask for less than
	 * MAPSIZE if the amount of data to be buffered is known to be
	 * smaller than that.
	 * But in order to prevent involuntary self-denial of service,
	 * the requested input size is clamped at MAPSIZE.
	 */
	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
	iol = kmem_alloc(initial_size, KM_SLEEP);
	list_insert_head(iolhead, iol);
	iol->piol_usedsize = 0;
	iol->piol_size = initial_size;
}

void *
pr_iol_newbuf(list_t *iolhead, size_t itemsize)
{
	piol_t	*iol;
	char	*new;

	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
	ASSERT(list_head(iolhead) != NULL);

	iol = (piol_t *)list_tail(iolhead);

	if (iol->piol_size <
	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
		/*
		 * Out of space in the current buffer. Allocate more.
		 */
		piol_t *newiol;

		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
		newiol->piol_size = MAPSIZE;
		newiol->piol_usedsize = 0;

		list_insert_after(iolhead, iol, newiol);
		iol = list_next(iolhead, iol);
		ASSERT(iol == newiol);
	}
	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
	iol->piol_usedsize += itemsize;
	bzero(new, itemsize);
	return (new);
}

void
pr_iol_freelist(list_t *iolhead)
{
	piol_t	*iol;

	while ((iol = list_head(iolhead)) != NULL) {
		list_remove(iolhead, iol);
		kmem_free(iol, iol->piol_size);
	}
	list_destroy(iolhead);
}

int
pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
{
	int error = errin;
	piol_t	*iol;

	while ((iol = list_head(iolhead)) != NULL) {
		list_remove(iolhead, iol);
		if (!error) {
			if (copyout(PIOL_DATABUF(iol), *tgt,
			    iol->piol_usedsize))
				error = EFAULT;
			*tgt += iol->piol_usedsize;
		}
		kmem_free(iol, iol->piol_size);
	}
	list_destroy(iolhead);

	return (error);
}

int
pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
{
	offset_t	off = uiop->uio_offset;
	char		*base;
	size_t		size;
	piol_t		*iol;
	int		error = errin;

	while ((iol = list_head(iolhead)) != NULL) {
		list_remove(iolhead, iol);
		base = PIOL_DATABUF(iol);
		size = iol->piol_usedsize;
		if (off <= size && error == 0 && uiop->uio_resid > 0)
			error = uiomove(base + off, size - off,
			    UIO_READ, uiop);
		off = MAX(0, off - (offset_t)size);
		kmem_free(iol, iol->piol_size);
	}
	list_destroy(iolhead);

	return (error);
}

/*
 * Return an array of structures with memory map information.
 * We allocate here; the caller must deallocate.
 */
int
prgetmap(proc_t *p, int reserved, list_t *iolhead)
{
	struct as *as = p->p_as;
	prmap_t *mp;
	struct seg *seg;
	struct seg *brkseg, *stkseg;
	struct vnode *vp;
	struct vattr vattr;
	uint_t prot;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	/*
	 * Request an initial buffer size that doesn't waste memory
	 * if the address space has only a small number of segments.
	 */
	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	brkseg = break_seg(p);
	stkseg = as_segat(as, prgetstackbase(p));

	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
		caddr_t saddr, naddr;
		void *tmp = NULL;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			prot = pr_getprot(seg, reserved, &tmp,
			    &saddr, &naddr, eaddr);
			if (saddr == naddr)
				continue;

			mp = pr_iol_newbuf(iolhead, sizeof (*mp));

			mp->pr_vaddr = (uintptr_t)saddr;
			mp->pr_size = naddr - saddr;
			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
			mp->pr_mflags = 0;
			if (prot & PROT_READ)
				mp->pr_mflags |= MA_READ;
			if (prot & PROT_WRITE)
				mp->pr_mflags |= MA_WRITE;
			if (prot & PROT_EXEC)
				mp->pr_mflags |= MA_EXEC;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
				mp->pr_mflags |= MA_SHARED;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
				mp->pr_mflags |= MA_NORESERVE;
			if (seg->s_ops == &segspt_shmops ||
			    (seg->s_ops == &segvn_ops &&
			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
				mp->pr_mflags |= MA_ANON;
			if (seg == brkseg)
				mp->pr_mflags |= MA_BREAK;
			else if (seg == stkseg) {
				mp->pr_mflags |= MA_STACK;
				if (reserved) {
					size_t maxstack =
					    ((size_t)p->p_stk_ctl +
					    PAGEOFFSET) & PAGEMASK;
					mp->pr_vaddr =
					    (uintptr_t)prgetstackbase(p) +
					    p->p_stksize - maxstack;
					mp->pr_size = (uintptr_t)naddr -
					    mp->pr_vaddr;
				}
			}
			if (seg->s_ops == &segspt_shmops)
				mp->pr_mflags |= MA_ISM | MA_SHM;
			mp->pr_pagesize = PAGESIZE;

			/*
			 * Manufacture a filename for the "object" directory.
			 */
			vattr.va_mask = AT_FSID|AT_NODEID;
			if (seg->s_ops == &segvn_ops &&
			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
			    vp != NULL && vp->v_type == VREG &&
			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
				if (vp == p->p_exec)
					(void) strcpy(mp->pr_mapname, "a.out");
				else
					pr_object_name(mp->pr_mapname,
					    vp, &vattr);
			}

			/*
			 * Get the SysV shared memory id, if any.
			 */
			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
			    SHMID_NONE) {
				if (mp->pr_shmid == SHMID_FREE)
					mp->pr_shmid = -1;

				mp->pr_mflags |= MA_SHM;
			} else {
				mp->pr_shmid = -1;
			}
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (0);
}

#ifdef _SYSCALL32_IMPL
int
prgetmap32(proc_t *p, int reserved, list_t *iolhead)
{
	struct as *as = p->p_as;
	prmap32_t *mp;
	struct seg *seg;
	struct seg *brkseg, *stkseg;
	struct vnode *vp;
	struct vattr vattr;
	uint_t prot;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	/*
	 * Request an initial buffer size that doesn't waste memory
	 * if the address space has only a small number of segments.
	 */
	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	brkseg = break_seg(p);
	stkseg = as_segat(as, prgetstackbase(p));

	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
		caddr_t saddr, naddr;
		void *tmp = NULL;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			prot = pr_getprot(seg, reserved, &tmp,
			    &saddr, &naddr, eaddr);
			if (saddr == naddr)
				continue;

			mp = pr_iol_newbuf(iolhead, sizeof (*mp));

			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
			mp->pr_size = (size32_t)(naddr - saddr);
			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
			mp->pr_mflags = 0;
			if (prot & PROT_READ)
				mp->pr_mflags |= MA_READ;
			if (prot & PROT_WRITE)
				mp->pr_mflags |= MA_WRITE;
			if (prot & PROT_EXEC)
				mp->pr_mflags |= MA_EXEC;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
				mp->pr_mflags |= MA_SHARED;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
				mp->pr_mflags |= MA_NORESERVE;
			if (seg->s_ops == &segspt_shmops ||
			    (seg->s_ops == &segvn_ops &&
			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
				mp->pr_mflags |= MA_ANON;
			if (seg == brkseg)
				mp->pr_mflags |= MA_BREAK;
			else if (seg == stkseg) {
				mp->pr_mflags |= MA_STACK;
				if (reserved) {
					size_t maxstack =
					    ((size_t)p->p_stk_ctl +
					    PAGEOFFSET) & PAGEMASK;
					uintptr_t vaddr =
					    (uintptr_t)prgetstackbase(p) +
					    p->p_stksize - maxstack;
					mp->pr_vaddr = (caddr32_t)vaddr;
					mp->pr_size = (size32_t)
					    ((uintptr_t)naddr - vaddr);
				}
			}
			if (seg->s_ops == &segspt_shmops)
				mp->pr_mflags |= MA_ISM | MA_SHM;
			mp->pr_pagesize = PAGESIZE;

			/*
			 * Manufacture a filename for the "object" directory.
			 */
			vattr.va_mask = AT_FSID|AT_NODEID;
			if (seg->s_ops == &segvn_ops &&
			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
			    vp != NULL && vp->v_type == VREG &&
			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
				if (vp == p->p_exec)
					(void) strcpy(mp->pr_mapname, "a.out");
				else
					pr_object_name(mp->pr_mapname,
					    vp, &vattr);
			}

			/*
			 * Get the SysV shared memory id, if any.
			 */
			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
			    SHMID_NONE) {
				if (mp->pr_shmid == SHMID_FREE)
					mp->pr_shmid = -1;

				mp->pr_mflags |= MA_SHM;
			} else {
				mp->pr_shmid = -1;
			}
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (0);
}
#endif	/* _SYSCALL32_IMPL */

/*
 * Return the size of the /proc page data file.
 */
size_t
prpdsize(struct as *as)
{
	struct seg *seg;
	size_t size;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	size = sizeof (prpageheader_t);
	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr;
		void *tmp = NULL;
		size_t npage;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
				size += sizeof (prasmap_t) + round8(npage);
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (size);
}

#ifdef _SYSCALL32_IMPL
size_t
prpdsize32(struct as *as)
{
	struct seg *seg;
	size_t size;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	size = sizeof (prpageheader32_t);
	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr;
		void *tmp = NULL;
		size_t npage;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
				size += sizeof (prasmap32_t) + round8(npage);
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (size);
}
#endif	/* _SYSCALL32_IMPL */

/*
 * Read page data information.
 */
int
prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
{
	struct as *as = p->p_as;
	caddr_t buf;
	size_t size;
	prpageheader_t *php;
	prasmap_t *pmp;
	struct seg *seg;
	int error;

again:
	AS_LOCK_ENTER(as, RW_WRITER);

	if ((seg = AS_SEGFIRST(as)) == NULL) {
		AS_LOCK_EXIT(as);
		return (0);
	}
	size = prpdsize(as);
	if (uiop->uio_resid < size) {
		AS_LOCK_EXIT(as);
		return (E2BIG);
	}

	buf = kmem_zalloc(size, KM_SLEEP);
	php = (prpageheader_t *)buf;
	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));

	hrt2ts(gethrtime(), &php->pr_tstamp);
	php->pr_nmap = 0;
	php->pr_npage = 0;
	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr;
		void *tmp = NULL;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			struct vnode *vp;
			struct vattr vattr;
			size_t len;
			size_t npage;
			uint_t prot;
			uintptr_t next;

			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
			if ((len = (size_t)(naddr - saddr)) == 0)
				continue;
			npage = len / PAGESIZE;
			next = (uintptr_t)(pmp + 1) + round8(npage);
			/*
			 * It's possible that the address space can change
			 * subtlely even though we're holding as->a_lock
			 * due to the nondeterminism of page_exists() in
			 * the presence of asychronously flushed pages or
			 * mapped files whose sizes are changing.
			 * page_exists() may be called indirectly from
			 * pr_getprot() by a SEGOP_INCORE() routine.
			 * If this happens we need to make sure we don't
			 * overrun the buffer whose size we computed based
			 * on the initial iteration through the segments.
			 * Once we've detected an overflow, we need to clean
			 * up the temporary memory allocated in pr_getprot()
			 * and retry. If there's a pending signal, we return
			 * EINTR so that this thread can be dislodged if
			 * a latent bug causes us to spin indefinitely.
			 */
			if (next > (uintptr_t)buf + size) {
				pr_getprot_done(&tmp);
				AS_LOCK_EXIT(as);

				kmem_free(buf, size);

				if (ISSIG(curthread, JUSTLOOKING))
					return (EINTR);

				goto again;
			}

			php->pr_nmap++;
			php->pr_npage += npage;
			pmp->pr_vaddr = (uintptr_t)saddr;
			pmp->pr_npage = npage;
			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
			pmp->pr_mflags = 0;
			if (prot & PROT_READ)
				pmp->pr_mflags |= MA_READ;
			if (prot & PROT_WRITE)
				pmp->pr_mflags |= MA_WRITE;
			if (prot & PROT_EXEC)
				pmp->pr_mflags |= MA_EXEC;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
				pmp->pr_mflags |= MA_SHARED;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
				pmp->pr_mflags |= MA_NORESERVE;
			if (seg->s_ops == &segspt_shmops ||
			    (seg->s_ops == &segvn_ops &&
			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
				pmp->pr_mflags |= MA_ANON;
			if (seg->s_ops == &segspt_shmops)
				pmp->pr_mflags |= MA_ISM | MA_SHM;
			pmp->pr_pagesize = PAGESIZE;
			/*
			 * Manufacture a filename for the "object" directory.
			 */
			vattr.va_mask = AT_FSID|AT_NODEID;
			if (seg->s_ops == &segvn_ops &&
			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
			    vp != NULL && vp->v_type == VREG &&
			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
				if (vp == p->p_exec)
					(void) strcpy(pmp->pr_mapname, "a.out");
				else
					pr_object_name(pmp->pr_mapname,
					    vp, &vattr);
			}

			/*
			 * Get the SysV shared memory id, if any.
			 */
			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
			    SHMID_NONE) {
				if (pmp->pr_shmid == SHMID_FREE)
					pmp->pr_shmid = -1;

				pmp->pr_mflags |= MA_SHM;
			} else {
				pmp->pr_shmid = -1;
			}

			hat_getstat(as, saddr, len, hatid,
			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
			pmp = (prasmap_t *)next;
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	AS_LOCK_EXIT(as);

	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
	kmem_free(buf, size);

	return (error);
}

#ifdef _SYSCALL32_IMPL
int
prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
{
	struct as *as = p->p_as;
	caddr_t buf;
	size_t size;
	prpageheader32_t *php;
	prasmap32_t *pmp;
	struct seg *seg;
	int error;

again:
	AS_LOCK_ENTER(as, RW_WRITER);

	if ((seg = AS_SEGFIRST(as)) == NULL) {
		AS_LOCK_EXIT(as);
		return (0);
	}
	size = prpdsize32(as);
	if (uiop->uio_resid < size) {
		AS_LOCK_EXIT(as);
		return (E2BIG);
	}

	buf = kmem_zalloc(size, KM_SLEEP);
	php = (prpageheader32_t *)buf;
	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));

	hrt2ts32(gethrtime(), &php->pr_tstamp);
	php->pr_nmap = 0;
	php->pr_npage = 0;
	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr;
		void *tmp = NULL;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
			struct vnode *vp;
			struct vattr vattr;
			size_t len;
			size_t npage;
			uint_t prot;
			uintptr_t next;

			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
			if ((len = (size_t)(naddr - saddr)) == 0)
				continue;
			npage = len / PAGESIZE;
			next = (uintptr_t)(pmp + 1) + round8(npage);
			/*
			 * It's possible that the address space can change
			 * subtlely even though we're holding as->a_lock
			 * due to the nondeterminism of page_exists() in
			 * the presence of asychronously flushed pages or
			 * mapped files whose sizes are changing.
			 * page_exists() may be called indirectly from
			 * pr_getprot() by a SEGOP_INCORE() routine.
			 * If this happens we need to make sure we don't
			 * overrun the buffer whose size we computed based
			 * on the initial iteration through the segments.
			 * Once we've detected an overflow, we need to clean
			 * up the temporary memory allocated in pr_getprot()
			 * and retry. If there's a pending signal, we return
			 * EINTR so that this thread can be dislodged if
			 * a latent bug causes us to spin indefinitely.
			 */
			if (next > (uintptr_t)buf + size) {
				pr_getprot_done(&tmp);
				AS_LOCK_EXIT(as);

				kmem_free(buf, size);

				if (ISSIG(curthread, JUSTLOOKING))
					return (EINTR);

				goto again;
			}

			php->pr_nmap++;
			php->pr_npage += npage;
			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
			pmp->pr_npage = (size32_t)npage;
			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
			pmp->pr_mflags = 0;
			if (prot & PROT_READ)
				pmp->pr_mflags |= MA_READ;
			if (prot & PROT_WRITE)
				pmp->pr_mflags |= MA_WRITE;
			if (prot & PROT_EXEC)
				pmp->pr_mflags |= MA_EXEC;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
				pmp->pr_mflags |= MA_SHARED;
			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
				pmp->pr_mflags |= MA_NORESERVE;
			if (seg->s_ops == &segspt_shmops ||
			    (seg->s_ops == &segvn_ops &&
			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
				pmp->pr_mflags |= MA_ANON;
			if (seg->s_ops == &segspt_shmops)
				pmp->pr_mflags |= MA_ISM | MA_SHM;
			pmp->pr_pagesize = PAGESIZE;
			/*
			 * Manufacture a filename for the "object" directory.
			 */
			vattr.va_mask = AT_FSID|AT_NODEID;
			if (seg->s_ops == &segvn_ops &&
			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
			    vp != NULL && vp->v_type == VREG &&
			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
				if (vp == p->p_exec)
					(void) strcpy(pmp->pr_mapname, "a.out");
				else
					pr_object_name(pmp->pr_mapname,
					    vp, &vattr);
			}

			/*
			 * Get the SysV shared memory id, if any.
			 */
			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
			    SHMID_NONE) {
				if (pmp->pr_shmid == SHMID_FREE)
					pmp->pr_shmid = -1;

				pmp->pr_mflags |= MA_SHM;
			} else {
				pmp->pr_shmid = -1;
			}

			hat_getstat(as, saddr, len, hatid,
			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
			pmp = (prasmap32_t *)next;
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	AS_LOCK_EXIT(as);

	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
	kmem_free(buf, size);

	return (error);
}
#endif	/* _SYSCALL32_IMPL */

ushort_t
prgetpctcpu(uint64_t pct)
{
	/*
	 * The value returned will be relevant in the zone of the examiner,
	 * which may not be the same as the zone which performed the procfs
	 * mount.
	 */
	int nonline = zone_ncpus_online_get(curproc->p_zone);

	/*
	 * Prorate over online cpus so we don't exceed 100%
	 */
	if (nonline > 1)
		pct /= nonline;
	pct >>= 16;		/* convert to 16-bit scaled integer */
	if (pct > 0x8000)	/* might happen, due to rounding */
		pct = 0x8000;
	return ((ushort_t)pct);
}

/*
 * Return information used by ps(1).
 */
void
prgetpsinfo(proc_t *p, psinfo_t *psp)
{
	kthread_t *t;
	struct cred *cred;
	hrtime_t hrutime, hrstime;

	ASSERT(MUTEX_HELD(&p->p_lock));

	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
		bzero(psp, sizeof (*psp));
	else {
		thread_unlock(t);
		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
	}

	/*
	 * only export SSYS and SMSACCT; everything else is off-limits to
	 * userland apps.
	 */
	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
	psp->pr_nlwp = p->p_lwpcnt;
	psp->pr_nzomb = p->p_zombcnt;
	mutex_enter(&p->p_crlock);
	cred = p->p_cred;
	psp->pr_uid = crgetruid(cred);
	psp->pr_euid = crgetuid(cred);
	psp->pr_gid = crgetrgid(cred);
	psp->pr_egid = crgetgid(cred);
	mutex_exit(&p->p_crlock);
	psp->pr_pid = p->p_pid;
	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
	    (p->p_flag & SZONETOP)) {
		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
		/*
		 * Inside local zones, fake zsched's pid as parent pids for
		 * processes which reference processes outside of the zone.
		 */
		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
	} else {
		psp->pr_ppid = p->p_ppid;
	}
	psp->pr_pgid = p->p_pgrp;
	psp->pr_sid = p->p_sessp->s_sid;
	psp->pr_taskid = p->p_task->tk_tkid;
	psp->pr_projid = p->p_task->tk_proj->kpj_id;
	psp->pr_poolid = p->p_pool->pool_id;
	psp->pr_zoneid = p->p_zone->zone_id;
	if ((psp->pr_contract = PRCTID(p)) == 0)
		psp->pr_contract = -1;
	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
	switch (p->p_model) {
	case DATAMODEL_ILP32:
		psp->pr_dmodel = PR_MODEL_ILP32;
		break;
	case DATAMODEL_LP64:
		psp->pr_dmodel = PR_MODEL_LP64;
		break;
	}
	hrutime = mstate_aggr_state(p, LMS_USER);
	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
	hrt2ts((hrutime + hrstime), &psp->pr_time);
	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);

	if (t == NULL) {
		int wcode = p->p_wcode;		/* must be atomic read */

		if (wcode)
			psp->pr_wstat = wstat(wcode, p->p_wdata);
		psp->pr_ttydev = PRNODEV;
		psp->pr_lwp.pr_state = SZOMB;
		psp->pr_lwp.pr_sname = 'Z';
		psp->pr_lwp.pr_bindpro = PBIND_NONE;
		psp->pr_lwp.pr_bindpset = PS_NONE;
	} else {
		user_t *up = PTOU(p);
		struct as *as;
		dev_t d;
		extern dev_t rwsconsdev, rconsdev, uconsdev;

		d = cttydev(p);
		/*
		 * If the controlling terminal is the real
		 * or workstation console device, map to what the
		 * user thinks is the console device. Handle case when
		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
		 */
		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
			d = uconsdev;
		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
		psp->pr_start = up->u_start;
		bcopy(up->u_comm, psp->pr_fname,
		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
		bcopy(up->u_psargs, psp->pr_psargs,
		    MIN(PRARGSZ-1, PSARGSZ));
		psp->pr_argc = up->u_argc;
		psp->pr_argv = up->u_argv;
		psp->pr_envp = up->u_envp;

		/* get the chosen lwp's lwpsinfo */
		prgetlwpsinfo(t, &psp->pr_lwp);

		/* compute %cpu for the process */
		if (p->p_lwpcnt == 1)
			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
		else {
			uint64_t pct = 0;
			hrtime_t cur_time = gethrtime_unscaled();

			t = p->p_tlist;
			do {
				pct += cpu_update_pct(t, cur_time);
			} while ((t = t->t_forw) != p->p_tlist);

			psp->pr_pctcpu = prgetpctcpu(pct);
		}
		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
			psp->pr_size = 0;
			psp->pr_rssize = 0;
		} else {
			mutex_exit(&p->p_lock);
			AS_LOCK_ENTER(as, RW_READER);
			psp->pr_size = btopr(as->a_resvsize) *
			    (PAGESIZE / 1024);
			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
			psp->pr_pctmem = rm_pctmemory(as);
			AS_LOCK_EXIT(as);
			mutex_enter(&p->p_lock);
		}
	}
}

static size_t
prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
{
	pr_misc_header_t *misc;
	size_t len;

	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);

	if (data != NULL) {
		misc = pr_iol_newbuf(data, len);
		misc->pr_misc_type = type;
		misc->pr_misc_size = len;
		misc++;
		bcopy((char *)val, (char *)misc, vlen);
	}

	return (len);
}

/*
 * There's no elegant way to determine if a character device
 * supports TLI, so just check a hardcoded list of known TLI
 * devices.
 */

static boolean_t
pristli(vnode_t *vp)
{
	static const char *tlidevs[] = {
	    "udp", "udp6", "tcp", "tcp6"
	};
	char *devname;
	uint_t i;

	ASSERT(vp != NULL);

	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
		return (B_FALSE);

	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
		return (B_FALSE);

	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
		if (strcmp(devname, tlidevs[i]) == 0)
			return (B_TRUE);
	}

	return (B_FALSE);
}

static size_t
prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
{
	char *pathname;
	size_t pathlen;
	size_t sz = 0;

	/*
	 * The global zone's path to a file in a non-global zone can exceed
	 * MAXPATHLEN.
	 */
	pathlen = MAXPATHLEN * 2 + 1;
	pathname = kmem_alloc(pathlen, KM_SLEEP);

	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
		sz += prfdinfomisc(data, PR_PATHNAME,
		    pathname, strlen(pathname) + 1);
	}

	kmem_free(pathname, pathlen);

	return (sz);
}

static size_t
prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
{
	strcmd_t strcmd;
	int32_t rval;
	size_t sz = 0;

	strcmd.sc_cmd = TI_GETMYNAME;
	strcmd.sc_timeout = 1;
	strcmd.sc_len = STRCMDBUFSIZE;

	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
		    strcmd.sc_len);
	}

	strcmd.sc_cmd = TI_GETPEERNAME;
	strcmd.sc_timeout = 1;
	strcmd.sc_len = STRCMDBUFSIZE;

	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
		    strcmd.sc_len);
	}

	return (sz);
}

static size_t
prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
{
	sonode_t *so;
	socklen_t vlen;
	size_t sz = 0;
	uint_t i;

	if (vp->v_stream != NULL) {
		so = VTOSO(vp->v_stream->sd_vnode);

		if (so->so_version == SOV_STREAM)
			so = NULL;
	} else {
		so = VTOSO(vp);
	}

	if (so == NULL)
		return (0);

	DTRACE_PROBE1(sonode, sonode_t *, so);

	/* prmisc - PR_SOCKETNAME */

	struct sockaddr_storage buf;
	struct sockaddr *name = (struct sockaddr *)&buf;

	vlen = sizeof (buf);
	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);

	/* prmisc - PR_PEERSOCKNAME */

	vlen = sizeof (buf);
	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);

	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */

	static struct boolopt {
		int		level;
		int		opt;
		int		bopt;
	} boolopts[] = {
		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
#ifdef SO_REUSEPORT
		/* SmartOS and OmniOS have SO_REUSEPORT */
		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
#endif
		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
		    PR_UDP_NAT_T_ENDPOINT }
	};
	prsockopts_bool_opts_t opts;
	int val;

	if (data != NULL) {
		opts.prsock_bool_opts = 0;

		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
			vlen = sizeof (val);
			if (SOP_GETSOCKOPT(so, boolopts[i].level,
			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
			    val != 0) {
				opts.prsock_bool_opts |= boolopts[i].bopt;
			}
		}
	}

	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));

	/* prmisc - PR_SOCKOPT_LINGER */

	struct linger l;

	vlen = sizeof (l);
	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
	    0, cred) == 0 && vlen > 0) {
		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
	}

	/* prmisc - PR_SOCKOPT_* int types */

	static struct sopt {
		int		level;
		int		opt;
		int		bopt;
	} sopts[] = {
		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
	};

	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
		vlen = sizeof (val);
		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
		}
	}

	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */

	in_addr_t nexthop_val;

	vlen = sizeof (nexthop_val);
	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
		    &nexthop_val, vlen);
	}

	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */

	struct sockaddr_in6 nexthop6_val;

	vlen = sizeof (nexthop6_val);
	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
		    &nexthop6_val, vlen);
	}

	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */

	char cong[CC_ALGO_NAME_MAX];

	vlen = sizeof (cong);
	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
	}

	/* prmisc - PR_SOCKFILTERS_PRIV */

	struct fil_info fi;

	vlen = sizeof (fi);
	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
		pr_misc_header_t *misc;
		size_t len;

		/*
		 * We limit the number of returned filters to 32.
		 * This is the maximum number that pfiles will print
		 * anyway.
		 */
		vlen = MIN(32, fi.fi_pos + 1);
		vlen *= sizeof (fi);

		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
		sz += len;

		if (data != NULL) {
			/*
			 * So that the filter list can be built incrementally,
			 * prfdinfomisc() is not used here. Instead we
			 * allocate a buffer directly on the copyout list using
			 * pr_iol_newbuf()
			 */
			misc = pr_iol_newbuf(data, len);
			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
			misc->pr_misc_size = len;
			misc++;
			len = vlen;
			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
			    misc, &vlen, 0, cred) == 0) {
				/*
				 * In case the number of filters has reduced
				 * since the first call, explicitly zero out
				 * any unpopulated space.
				 */
				if (vlen < len)
					bzero(misc + vlen, len - vlen);
			} else {
				/* Something went wrong, zero out the result */
				bzero(misc, vlen);
			}
		}
	}

	return (sz);
}

typedef struct prfdinfo_nm_path_cbdata {
	proc_t		*nmp_p;
	u_offset_t	nmp_sz;
	list_t		*nmp_data;
} prfdinfo_nm_path_cbdata_t;

static int
prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
{
	prfdinfo_nm_path_cbdata_t *cb = arg;

	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);

	return (0);
}

u_offset_t
prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
{
	u_offset_t sz;

	/*
	 * All fdinfo files will be at least this big -
	 * sizeof fdinfo struct + zero length trailer
	 */
	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);

	/* Pathname */
	switch (vp->v_type) {
	case VDOOR: {
		prfdinfo_nm_path_cbdata_t cb = {
			.nmp_p		= p,
			.nmp_data	= NULL,
			.nmp_sz		= 0
		};

		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
		sz += cb.nmp_sz;
		break;
	}
	case VSOCK:
		break;
	default:
		sz += prfdinfopath(p, vp, NULL, cred);
	}

	/* Socket options */
	if (vp->v_type == VSOCK)
		sz += prfdinfosockopt(vp, NULL, cred);

	/* TLI/XTI sockets */
	if (pristli(vp))
		sz += prfdinfotlisockopt(vp, NULL, cred);

	return (sz);
}

int
prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
    cred_t *file_cred, list_t *data)
{
	vattr_t vattr;
	int error;

	/*
	 * The buffer has been initialised to zero by pr_iol_newbuf().
	 * Initialise defaults for any values that should not default to zero.
	 */
	fdinfo->pr_uid = (uid_t)-1;
	fdinfo->pr_gid = (gid_t)-1;
	fdinfo->pr_size = -1;
	fdinfo->pr_locktype = F_UNLCK;
	fdinfo->pr_lockpid = -1;
	fdinfo->pr_locksysid = -1;
	fdinfo->pr_peerpid = -1;

	/* Offset */

	/*
	 * pr_offset has already been set from the underlying file_t.
	 * Check if it is plausible and reset to -1 if not.
	 */
	if (fdinfo->pr_offset != -1 &&
	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
		fdinfo->pr_offset = -1;

	/*
	 * Attributes
	 *
	 * We have two cred_t structures available here.
	 * 'cred' is the caller's credential, and 'file_cred' is the credential
	 * for the file being inspected.
	 *
	 * When looking up the file attributes, file_cred is used in order
	 * that the correct ownership is set for doors and FIFOs. Since the
	 * caller has permission to read the fdinfo file in proc, this does
	 * not expose any additional information.
	 */
	vattr.va_mask = AT_STAT;
	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
		fdinfo->pr_major = getmajor(vattr.va_fsid);
		fdinfo->pr_minor = getminor(vattr.va_fsid);
		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
		fdinfo->pr_rminor = getminor(vattr.va_rdev);
		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
		fdinfo->pr_size = (off64_t)vattr.va_size;
		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
		fdinfo->pr_uid = vattr.va_uid;
		fdinfo->pr_gid = vattr.va_gid;
		if (vp->v_type == VSOCK)
			fdinfo->pr_fileflags |= sock_getfasync(vp);
	}

	/* locks */

	flock64_t bf;

	bzero(&bf, sizeof (bf));
	bf.l_type = F_WRLCK;

	if (VOP_FRLOCK(vp, F_GETLK, &bf,
	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
		fdinfo->pr_locktype = bf.l_type;
		fdinfo->pr_lockpid = bf.l_pid;
		fdinfo->pr_locksysid = bf.l_sysid;
	}

	/* peer cred */

	k_peercred_t kpc;

	switch (vp->v_type) {
	case VFIFO:
	case VSOCK: {
		int32_t rval;

		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
		    FKIOCTL, cred, &rval, NULL);
		break;
	}
	case VCHR: {
		struct strioctl strioc;
		int32_t rval;

		if (vp->v_stream == NULL) {
			error = ENOTSUP;
			break;
		}
		strioc.ic_cmd = _I_GETPEERCRED;
		strioc.ic_timout = INFTIM;
		strioc.ic_len = (int)sizeof (k_peercred_t);
		strioc.ic_dp = (char *)&kpc;

		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
		    STR_NOSIG | K_TO_K, cred, &rval);
		break;
	}
	default:
		error = ENOTSUP;
		break;
	}

	if (error == 0 && kpc.pc_cr != NULL) {
		proc_t *peerp;

		fdinfo->pr_peerpid = kpc.pc_cpid;

		crfree(kpc.pc_cr);

		mutex_enter(&pidlock);
		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
			user_t *up;

			mutex_enter(&peerp->p_lock);
			mutex_exit(&pidlock);

			up = PTOU(peerp);
			bcopy(up->u_comm, fdinfo->pr_peername,
			    MIN(sizeof (up->u_comm),
			    sizeof (fdinfo->pr_peername) - 1));

			mutex_exit(&peerp->p_lock);
		} else {
			mutex_exit(&pidlock);
		}
	}

	/* pathname */

	switch (vp->v_type) {
	case VDOOR: {
		prfdinfo_nm_path_cbdata_t cb = {
			.nmp_p		= p,
			.nmp_data	= data,
			.nmp_sz		= 0
		};

		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
		break;
	}
	case VSOCK:
		/*
		 * Don't attempt to determine the path for a socket as the
		 * vnode has no associated v_path. It will cause a linear scan
		 * of the dnlc table and result in no path being found.
		 */
		break;
	default:
		(void) prfdinfopath(p, vp, data, cred);
	}

	/* socket options */
	if (vp->v_type == VSOCK)
		(void) prfdinfosockopt(vp, data, cred);

	/* TLI/XTI stream sockets */
	if (pristli(vp))
		(void) prfdinfotlisockopt(vp, data, cred);

	/*
	 * Add a terminating header with a zero size.
	 */
	pr_misc_header_t *misc;

	misc = pr_iol_newbuf(data, sizeof (*misc));
	misc->pr_misc_size = 0;
	misc->pr_misc_type = (uint_t)-1;

	return (0);
}

#ifdef _SYSCALL32_IMPL
void
prgetpsinfo32(proc_t *p, psinfo32_t *psp)
{
	kthread_t *t;
	struct cred *cred;
	hrtime_t hrutime, hrstime;

	ASSERT(MUTEX_HELD(&p->p_lock));

	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
		bzero(psp, sizeof (*psp));
	else {
		thread_unlock(t);
		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
	}

	/*
	 * only export SSYS and SMSACCT; everything else is off-limits to
	 * userland apps.
	 */
	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
	psp->pr_nlwp = p->p_lwpcnt;
	psp->pr_nzomb = p->p_zombcnt;
	mutex_enter(&p->p_crlock);
	cred = p->p_cred;
	psp->pr_uid = crgetruid(cred);
	psp->pr_euid = crgetuid(cred);
	psp->pr_gid = crgetrgid(cred);
	psp->pr_egid = crgetgid(cred);
	mutex_exit(&p->p_crlock);
	psp->pr_pid = p->p_pid;
	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
	    (p->p_flag & SZONETOP)) {
		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
		/*
		 * Inside local zones, fake zsched's pid as parent pids for
		 * processes which reference processes outside of the zone.
		 */
		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
	} else {
		psp->pr_ppid = p->p_ppid;
	}
	psp->pr_pgid = p->p_pgrp;
	psp->pr_sid = p->p_sessp->s_sid;
	psp->pr_taskid = p->p_task->tk_tkid;
	psp->pr_projid = p->p_task->tk_proj->kpj_id;
	psp->pr_poolid = p->p_pool->pool_id;
	psp->pr_zoneid = p->p_zone->zone_id;
	if ((psp->pr_contract = PRCTID(p)) == 0)
		psp->pr_contract = -1;
	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
	switch (p->p_model) {
	case DATAMODEL_ILP32:
		psp->pr_dmodel = PR_MODEL_ILP32;
		break;
	case DATAMODEL_LP64:
		psp->pr_dmodel = PR_MODEL_LP64;
		break;
	}
	hrutime = mstate_aggr_state(p, LMS_USER);
	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
	hrt2ts32(hrutime + hrstime, &psp->pr_time);
	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);

	if (t == NULL) {
		extern int wstat(int, int);	/* needs a header file */
		int wcode = p->p_wcode;		/* must be atomic read */

		if (wcode)
			psp->pr_wstat = wstat(wcode, p->p_wdata);
		psp->pr_ttydev = PRNODEV32;
		psp->pr_lwp.pr_state = SZOMB;
		psp->pr_lwp.pr_sname = 'Z';
	} else {
		user_t *up = PTOU(p);
		struct as *as;
		dev_t d;
		extern dev_t rwsconsdev, rconsdev, uconsdev;

		d = cttydev(p);
		/*
		 * If the controlling terminal is the real
		 * or workstation console device, map to what the
		 * user thinks is the console device. Handle case when
		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
		 */
		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
			d = uconsdev;
		(void) cmpldev(&psp->pr_ttydev, d);
		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
		bcopy(up->u_comm, psp->pr_fname,
		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
		bcopy(up->u_psargs, psp->pr_psargs,
		    MIN(PRARGSZ-1, PSARGSZ));
		psp->pr_argc = up->u_argc;
		psp->pr_argv = (caddr32_t)up->u_argv;
		psp->pr_envp = (caddr32_t)up->u_envp;

		/* get the chosen lwp's lwpsinfo */
		prgetlwpsinfo32(t, &psp->pr_lwp);

		/* compute %cpu for the process */
		if (p->p_lwpcnt == 1)
			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
		else {
			uint64_t pct = 0;
			hrtime_t cur_time;

			t = p->p_tlist;
			cur_time = gethrtime_unscaled();
			do {
				pct += cpu_update_pct(t, cur_time);
			} while ((t = t->t_forw) != p->p_tlist);

			psp->pr_pctcpu = prgetpctcpu(pct);
		}
		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
			psp->pr_size = 0;
			psp->pr_rssize = 0;
		} else {
			mutex_exit(&p->p_lock);
			AS_LOCK_ENTER(as, RW_READER);
			psp->pr_size = (size32_t)
			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
			psp->pr_rssize = (size32_t)
			    (rm_asrss(as) * (PAGESIZE / 1024));
			psp->pr_pctmem = rm_pctmemory(as);
			AS_LOCK_EXIT(as);
			mutex_enter(&p->p_lock);
		}
	}

	/*
	 * If we are looking at an LP64 process, zero out
	 * the fields that cannot be represented in ILP32.
	 */
	if (p->p_model != DATAMODEL_ILP32) {
		psp->pr_size = 0;
		psp->pr_rssize = 0;
		psp->pr_argv = 0;
		psp->pr_envp = 0;
	}
}

#endif	/* _SYSCALL32_IMPL */

void
prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
{
	klwp_t *lwp = ttolwp(t);
	sobj_ops_t *sobj;
	char c, state;
	uint64_t pct;
	int retval, niceval;
	hrtime_t hrutime, hrstime;

	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));

	bzero(psp, sizeof (*psp));

	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
	psp->pr_lwpid = t->t_tid;
	psp->pr_addr = (uintptr_t)t;
	psp->pr_wchan = (uintptr_t)t->t_wchan;

	/* map the thread state enum into a process state enum */
	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
	switch (state) {
	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
	case TS_RUN:		state = SRUN;		c = 'R';	break;
	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
	default:		state = 0;		c = '?';	break;
	}
	psp->pr_state = state;
	psp->pr_sname = c;
	if ((sobj = t->t_sobj_ops) != NULL)
		psp->pr_stype = SOBJ_TYPE(sobj);
	retval = CL_DONICE(t, NULL, 0, &niceval);
	if (retval == 0) {
		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
		psp->pr_nice = niceval + NZERO;
	}
	psp->pr_syscall = t->t_sysnum;
	psp->pr_pri = t->t_pri;
	psp->pr_start.tv_sec = t->t_start;
	psp->pr_start.tv_nsec = 0L;
	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
	scalehrtime(&hrutime);
	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
	scalehrtime(&hrstime);
	hrt2ts(hrutime + hrstime, &psp->pr_time);
	/* compute %cpu for the lwp */
	pct = cpu_update_pct(t, gethrtime_unscaled());
	psp->pr_pctcpu = prgetpctcpu(pct);
	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
	if (psp->pr_cpu > 99)
		psp->pr_cpu = 99;

	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
	    sizeof (psp->pr_clname) - 1);
	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
	psp->pr_onpro = t->t_cpu->cpu_id;
	psp->pr_bindpro = t->t_bind_cpu;
	psp->pr_bindpset = t->t_bind_pset;
	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
}

#ifdef _SYSCALL32_IMPL
void
prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
{
	klwp_t *lwp = ttolwp(t);
	sobj_ops_t *sobj;
	char c, state;
	uint64_t pct;
	int retval, niceval;
	hrtime_t hrutime, hrstime;

	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));

	bzero(psp, sizeof (*psp));

	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
	psp->pr_lwpid = t->t_tid;
	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */

	/* map the thread state enum into a process state enum */
	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
	switch (state) {
	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
	case TS_RUN:		state = SRUN;		c = 'R';	break;
	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
	default:		state = 0;		c = '?';	break;
	}
	psp->pr_state = state;
	psp->pr_sname = c;
	if ((sobj = t->t_sobj_ops) != NULL)
		psp->pr_stype = SOBJ_TYPE(sobj);
	retval = CL_DONICE(t, NULL, 0, &niceval);
	if (retval == 0) {
		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
		psp->pr_nice = niceval + NZERO;
	} else {
		psp->pr_oldpri = 0;
		psp->pr_nice = 0;
	}
	psp->pr_syscall = t->t_sysnum;
	psp->pr_pri = t->t_pri;
	psp->pr_start.tv_sec = (time32_t)t->t_start;
	psp->pr_start.tv_nsec = 0L;
	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
	scalehrtime(&hrutime);
	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
	scalehrtime(&hrstime);
	hrt2ts32(hrutime + hrstime, &psp->pr_time);
	/* compute %cpu for the lwp */
	pct = cpu_update_pct(t, gethrtime_unscaled());
	psp->pr_pctcpu = prgetpctcpu(pct);
	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
	if (psp->pr_cpu > 99)
		psp->pr_cpu = 99;

	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
	    sizeof (psp->pr_clname) - 1);
	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
	psp->pr_onpro = t->t_cpu->cpu_id;
	psp->pr_bindpro = t->t_bind_cpu;
	psp->pr_bindpset = t->t_bind_pset;
	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
}
#endif	/* _SYSCALL32_IMPL */

#ifdef _SYSCALL32_IMPL

#define	PR_COPY_FIELD(s, d, field)	 d->field = s->field

#define	PR_COPY_FIELD_ILP32(s, d, field)				\
	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
		d->field = s->field;				\
	}

#define	PR_COPY_TIMESPEC(s, d, field)				\
	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);

#define	PR_COPY_BUF(s, d, field)				\
	bcopy(s->field, d->field, sizeof (d->field));

#define	PR_IGNORE_FIELD(s, d, field)

void
lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
{
	bzero(dest, sizeof (*dest));

	PR_COPY_FIELD(src, dest, pr_flag);
	PR_COPY_FIELD(src, dest, pr_lwpid);
	PR_IGNORE_FIELD(src, dest, pr_addr);
	PR_IGNORE_FIELD(src, dest, pr_wchan);
	PR_COPY_FIELD(src, dest, pr_stype);
	PR_COPY_FIELD(src, dest, pr_state);
	PR_COPY_FIELD(src, dest, pr_sname);
	PR_COPY_FIELD(src, dest, pr_nice);
	PR_COPY_FIELD(src, dest, pr_syscall);
	PR_COPY_FIELD(src, dest, pr_oldpri);
	PR_COPY_FIELD(src, dest, pr_cpu);
	PR_COPY_FIELD(src, dest, pr_pri);
	PR_COPY_FIELD(src, dest, pr_pctcpu);
	PR_COPY_TIMESPEC(src, dest, pr_start);
	PR_COPY_BUF(src, dest, pr_clname);
	PR_COPY_BUF(src, dest, pr_name);
	PR_COPY_FIELD(src, dest, pr_onpro);
	PR_COPY_FIELD(src, dest, pr_bindpro);
	PR_COPY_FIELD(src, dest, pr_bindpset);
	PR_COPY_FIELD(src, dest, pr_lgrp);
}

void
psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
{
	bzero(dest, sizeof (*dest));

	PR_COPY_FIELD(src, dest, pr_flag);
	PR_COPY_FIELD(src, dest, pr_nlwp);
	PR_COPY_FIELD(src, dest, pr_pid);
	PR_COPY_FIELD(src, dest, pr_ppid);
	PR_COPY_FIELD(src, dest, pr_pgid);
	PR_COPY_FIELD(src, dest, pr_sid);
	PR_COPY_FIELD(src, dest, pr_uid);
	PR_COPY_FIELD(src, dest, pr_euid);
	PR_COPY_FIELD(src, dest, pr_gid);
	PR_COPY_FIELD(src, dest, pr_egid);
	PR_IGNORE_FIELD(src, dest, pr_addr);
	PR_COPY_FIELD_ILP32(src, dest, pr_size);
	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
	PR_COPY_FIELD(src, dest, pr_ttydev);
	PR_COPY_FIELD(src, dest, pr_pctcpu);
	PR_COPY_FIELD(src, dest, pr_pctmem);
	PR_COPY_TIMESPEC(src, dest, pr_start);
	PR_COPY_TIMESPEC(src, dest, pr_time);
	PR_COPY_TIMESPEC(src, dest, pr_ctime);
	PR_COPY_BUF(src, dest, pr_fname);
	PR_COPY_BUF(src, dest, pr_psargs);
	PR_COPY_FIELD(src, dest, pr_wstat);
	PR_COPY_FIELD(src, dest, pr_argc);
	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
	PR_COPY_FIELD(src, dest, pr_dmodel);
	PR_COPY_FIELD(src, dest, pr_taskid);
	PR_COPY_FIELD(src, dest, pr_projid);
	PR_COPY_FIELD(src, dest, pr_nzomb);
	PR_COPY_FIELD(src, dest, pr_poolid);
	PR_COPY_FIELD(src, dest, pr_contract);
	PR_COPY_FIELD(src, dest, pr_poolid);
	PR_COPY_FIELD(src, dest, pr_poolid);

	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
}

#undef	PR_COPY_FIELD
#undef	PR_COPY_FIELD_ILP32
#undef	PR_COPY_TIMESPEC
#undef	PR_COPY_BUF
#undef	PR_IGNORE_FIELD

#endif	/* _SYSCALL32_IMPL */

/*
 * This used to get called when microstate accounting was disabled but
 * microstate information was requested.  Since Microstate accounting is on
 * regardless of the proc flags, this simply makes it appear to procfs that
 * microstate accounting is on.  This is relatively meaningless since you
 * can't turn it off, but this is here for the sake of appearances.
 */

/*ARGSUSED*/
void
estimate_msacct(kthread_t *t, hrtime_t curtime)
{
	proc_t *p;

	if (t == NULL)
		return;

	p = ttoproc(t);
	ASSERT(MUTEX_HELD(&p->p_lock));

	/*
	 * A system process (p0) could be referenced if the thread is
	 * in the process of exiting.  Don't turn on microstate accounting
	 * in that case.
	 */
	if (p->p_flag & SSYS)
		return;

	/*
	 * Loop through all the LWPs (kernel threads) in the process.
	 */
	t = p->p_tlist;
	do {
		t->t_proc_flag |= TP_MSACCT;
	} while ((t = t->t_forw) != p->p_tlist);

	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
}

/*
 * It's not really possible to disable microstate accounting anymore.
 * However, this routine simply turns off the ms accounting flags in a process
 * This way procfs can still pretend to turn microstate accounting on and
 * off for a process, but it actually doesn't do anything.  This is
 * a neutered form of preemptive idiot-proofing.
 */
void
disable_msacct(proc_t *p)
{
	kthread_t *t;

	ASSERT(MUTEX_HELD(&p->p_lock));

	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
	/*
	 * Loop through all the LWPs (kernel threads) in the process.
	 */
	if ((t = p->p_tlist) != NULL) {
		do {
			/* clear per-thread flag */
			t->t_proc_flag &= ~TP_MSACCT;
		} while ((t = t->t_forw) != p->p_tlist);
	}
}

/*
 * Return resource usage information.
 */
void
prgetusage(kthread_t *t, prhusage_t *pup)
{
	klwp_t *lwp = ttolwp(t);
	hrtime_t *mstimep;
	struct mstate *ms = &lwp->lwp_mstate;
	int state;
	int i;
	hrtime_t curtime;
	hrtime_t waitrq;
	hrtime_t tmp1;

	curtime = gethrtime_unscaled();

	pup->pr_lwpid	= t->t_tid;
	pup->pr_count	= 1;
	pup->pr_create	= ms->ms_start;
	pup->pr_term    = ms->ms_term;
	scalehrtime(&pup->pr_create);
	scalehrtime(&pup->pr_term);
	if (ms->ms_term == 0) {
		pup->pr_rtime = curtime - ms->ms_start;
		scalehrtime(&pup->pr_rtime);
	} else {
		pup->pr_rtime = ms->ms_term - ms->ms_start;
		scalehrtime(&pup->pr_rtime);
	}


	pup->pr_utime    = ms->ms_acct[LMS_USER];
	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];

	prscaleusage(pup);

	/*
	 * Adjust for time waiting in the dispatcher queue.
	 */
	waitrq = t->t_waitrq;	/* hopefully atomic */
	if (waitrq != 0) {
		if (waitrq > curtime) {
			curtime = gethrtime_unscaled();
		}
		tmp1 = curtime - waitrq;
		scalehrtime(&tmp1);
		pup->pr_wtime += tmp1;
		curtime = waitrq;
	}

	/*
	 * Adjust for time spent in current microstate.
	 */
	if (ms->ms_state_start > curtime) {
		curtime = gethrtime_unscaled();
	}

	i = 0;
	do {
		switch (state = t->t_mstate) {
		case LMS_SLEEP:
			/*
			 * Update the timer for the current sleep state.
			 */
			switch (state = ms->ms_prev) {
			case LMS_TFAULT:
			case LMS_DFAULT:
			case LMS_KFAULT:
			case LMS_USER_LOCK:
				break;
			default:
				state = LMS_SLEEP;
				break;
			}
			break;
		case LMS_TFAULT:
		case LMS_DFAULT:
		case LMS_KFAULT:
		case LMS_USER_LOCK:
			state = LMS_SYSTEM;
			break;
		}
		switch (state) {
		case LMS_USER:		mstimep = &pup->pr_utime;	break;
		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
		default:		panic("prgetusage: unknown microstate");
		}
		tmp1 = curtime - ms->ms_state_start;
		if (tmp1 < 0) {
			curtime = gethrtime_unscaled();
			i++;
			continue;
		}
		scalehrtime(&tmp1);
	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);

	*mstimep += tmp1;

	/* update pup timestamp */
	pup->pr_tstamp = curtime;
	scalehrtime(&pup->pr_tstamp);

	/*
	 * Resource usage counters.
	 */
	pup->pr_minf  = lwp->lwp_ru.minflt;
	pup->pr_majf  = lwp->lwp_ru.majflt;
	pup->pr_nswap = lwp->lwp_ru.nswap;
	pup->pr_inblk = lwp->lwp_ru.inblock;
	pup->pr_oublk = lwp->lwp_ru.oublock;
	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
	pup->pr_sigs  = lwp->lwp_ru.nsignals;
	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
	pup->pr_sysc  = lwp->lwp_ru.sysc;
	pup->pr_ioch  = lwp->lwp_ru.ioch;
}

/*
 * Convert ms_acct stats from unscaled high-res time to nanoseconds
 */
void
prscaleusage(prhusage_t *usg)
{
	scalehrtime(&usg->pr_utime);
	scalehrtime(&usg->pr_stime);
	scalehrtime(&usg->pr_ttime);
	scalehrtime(&usg->pr_tftime);
	scalehrtime(&usg->pr_dftime);
	scalehrtime(&usg->pr_kftime);
	scalehrtime(&usg->pr_ltime);
	scalehrtime(&usg->pr_slptime);
	scalehrtime(&usg->pr_wtime);
	scalehrtime(&usg->pr_stoptime);
}


/*
 * Sum resource usage information.
 */
void
praddusage(kthread_t *t, prhusage_t *pup)
{
	klwp_t *lwp = ttolwp(t);
	hrtime_t *mstimep;
	struct mstate *ms = &lwp->lwp_mstate;
	int state;
	int i;
	hrtime_t curtime;
	hrtime_t waitrq;
	hrtime_t tmp;
	prhusage_t conv;

	curtime = gethrtime_unscaled();

	if (ms->ms_term == 0) {
		tmp = curtime - ms->ms_start;
		scalehrtime(&tmp);
		pup->pr_rtime += tmp;
	} else {
		tmp = ms->ms_term - ms->ms_start;
		scalehrtime(&tmp);
		pup->pr_rtime += tmp;
	}

	conv.pr_utime = ms->ms_acct[LMS_USER];
	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];

	prscaleusage(&conv);

	pup->pr_utime	+= conv.pr_utime;
	pup->pr_stime	+= conv.pr_stime;
	pup->pr_ttime	+= conv.pr_ttime;
	pup->pr_tftime	+= conv.pr_tftime;
	pup->pr_dftime	+= conv.pr_dftime;
	pup->pr_kftime	+= conv.pr_kftime;
	pup->pr_ltime	+= conv.pr_ltime;
	pup->pr_slptime	+= conv.pr_slptime;
	pup->pr_wtime	+= conv.pr_wtime;
	pup->pr_stoptime += conv.pr_stoptime;

	/*
	 * Adjust for time waiting in the dispatcher queue.
	 */
	waitrq = t->t_waitrq;	/* hopefully atomic */
	if (waitrq != 0) {
		if (waitrq > curtime) {
			curtime = gethrtime_unscaled();
		}
		tmp = curtime - waitrq;
		scalehrtime(&tmp);
		pup->pr_wtime += tmp;
		curtime = waitrq;
	}

	/*
	 * Adjust for time spent in current microstate.
	 */
	if (ms->ms_state_start > curtime) {
		curtime = gethrtime_unscaled();
	}

	i = 0;
	do {
		switch (state = t->t_mstate) {
		case LMS_SLEEP:
			/*
			 * Update the timer for the current sleep state.
			 */
			switch (state = ms->ms_prev) {
			case LMS_TFAULT:
			case LMS_DFAULT:
			case LMS_KFAULT:
			case LMS_USER_LOCK:
				break;
			default:
				state = LMS_SLEEP;
				break;
			}
			break;
		case LMS_TFAULT:
		case LMS_DFAULT:
		case LMS_KFAULT:
		case LMS_USER_LOCK:
			state = LMS_SYSTEM;
			break;
		}
		switch (state) {
		case LMS_USER:		mstimep = &pup->pr_utime;	break;
		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
		default:		panic("praddusage: unknown microstate");
		}
		tmp = curtime - ms->ms_state_start;
		if (tmp < 0) {
			curtime = gethrtime_unscaled();
			i++;
			continue;
		}
		scalehrtime(&tmp);
	} while (tmp < 0 && i < MAX_ITERS_SPIN);

	*mstimep += tmp;

	/* update pup timestamp */
	pup->pr_tstamp = curtime;
	scalehrtime(&pup->pr_tstamp);

	/*
	 * Resource usage counters.
	 */
	pup->pr_minf  += lwp->lwp_ru.minflt;
	pup->pr_majf  += lwp->lwp_ru.majflt;
	pup->pr_nswap += lwp->lwp_ru.nswap;
	pup->pr_inblk += lwp->lwp_ru.inblock;
	pup->pr_oublk += lwp->lwp_ru.oublock;
	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
	pup->pr_sigs  += lwp->lwp_ru.nsignals;
	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
	pup->pr_sysc  += lwp->lwp_ru.sysc;
	pup->pr_ioch  += lwp->lwp_ru.ioch;
}

/*
 * Convert a prhusage_t to a prusage_t.
 * This means convert each hrtime_t to a timestruc_t
 * and copy the count fields uint64_t => ulong_t.
 */
void
prcvtusage(prhusage_t *pup, prusage_t *upup)
{
	uint64_t *ullp;
	ulong_t *ulp;
	int i;

	upup->pr_lwpid = pup->pr_lwpid;
	upup->pr_count = pup->pr_count;

	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
	hrt2ts(pup->pr_create,	&upup->pr_create);
	hrt2ts(pup->pr_term,	&upup->pr_term);
	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
	hrt2ts(pup->pr_utime,	&upup->pr_utime);
	hrt2ts(pup->pr_stime,	&upup->pr_stime);
	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
	bzero(upup->filltime, sizeof (upup->filltime));

	ullp = &pup->pr_minf;
	ulp = &upup->pr_minf;
	for (i = 0; i < 22; i++)
		*ulp++ = (ulong_t)*ullp++;
}

#ifdef _SYSCALL32_IMPL
void
prcvtusage32(prhusage_t *pup, prusage32_t *upup)
{
	uint64_t *ullp;
	uint32_t *ulp;
	int i;

	upup->pr_lwpid = pup->pr_lwpid;
	upup->pr_count = pup->pr_count;

	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
	hrt2ts32(pup->pr_create,	&upup->pr_create);
	hrt2ts32(pup->pr_term,		&upup->pr_term);
	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
	bzero(upup->filltime, sizeof (upup->filltime));

	ullp = &pup->pr_minf;
	ulp = &upup->pr_minf;
	for (i = 0; i < 22; i++)
		*ulp++ = (uint32_t)*ullp++;
}
#endif	/* _SYSCALL32_IMPL */

/*
 * Determine whether a set is empty.
 */
int
setisempty(uint32_t *sp, uint_t n)
{
	while (n--)
		if (*sp++)
			return (0);
	return (1);
}

/*
 * Utility routine for establishing a watched area in the process.
 * Keep the list of watched areas sorted by virtual address.
 */
int
set_watched_area(proc_t *p, struct watched_area *pwa)
{
	caddr_t vaddr = pwa->wa_vaddr;
	caddr_t eaddr = pwa->wa_eaddr;
	ulong_t flags = pwa->wa_flags;
	struct watched_area *target;
	avl_index_t where;
	int error = 0;

	/* we must not be holding p->p_lock, but the process must be locked */
	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
	ASSERT(p->p_proc_flag & P_PR_LOCK);

	/*
	 * If this is our first watchpoint, enable watchpoints for the process.
	 */
	if (!pr_watch_active(p)) {
		kthread_t *t;

		mutex_enter(&p->p_lock);
		if ((t = p->p_tlist) != NULL) {
			do {
				watch_enable(t);
			} while ((t = t->t_forw) != p->p_tlist);
		}
		mutex_exit(&p->p_lock);
	}

	target = pr_find_watched_area(p, pwa, &where);
	if (target != NULL) {
		/*
		 * We discovered an existing, overlapping watched area.
		 * Allow it only if it is an exact match.
		 */
		if (target->wa_vaddr != vaddr ||
		    target->wa_eaddr != eaddr)
			error = EINVAL;
		else if (target->wa_flags != flags) {
			error = set_watched_page(p, vaddr, eaddr,
			    flags, target->wa_flags);
			target->wa_flags = flags;
		}
		kmem_free(pwa, sizeof (struct watched_area));
	} else {
		avl_insert(&p->p_warea, pwa, where);
		error = set_watched_page(p, vaddr, eaddr, flags, 0);
	}

	return (error);
}

/*
 * Utility routine for clearing a watched area in the process.
 * Must be an exact match of the virtual address.
 * size and flags don't matter.
 */
int
clear_watched_area(proc_t *p, struct watched_area *pwa)
{
	struct watched_area *found;

	/* we must not be holding p->p_lock, but the process must be locked */
	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
	ASSERT(p->p_proc_flag & P_PR_LOCK);


	if (!pr_watch_active(p)) {
		kmem_free(pwa, sizeof (struct watched_area));
		return (0);
	}

	/*
	 * Look for a matching address in the watched areas.  If a match is
	 * found, clear the old watched area and adjust the watched page(s).  It
	 * is not an error if there is no match.
	 */
	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
	    found->wa_vaddr == pwa->wa_vaddr) {
		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
		    found->wa_flags);
		avl_remove(&p->p_warea, found);
		kmem_free(found, sizeof (struct watched_area));
	}

	kmem_free(pwa, sizeof (struct watched_area));

	/*
	 * If we removed the last watched area from the process, disable
	 * watchpoints.
	 */
	if (!pr_watch_active(p)) {
		kthread_t *t;

		mutex_enter(&p->p_lock);
		if ((t = p->p_tlist) != NULL) {
			do {
				watch_disable(t);
			} while ((t = t->t_forw) != p->p_tlist);
		}
		mutex_exit(&p->p_lock);
	}

	return (0);
}

/*
 * Frees all the watched_area structures
 */
void
pr_free_watchpoints(proc_t *p)
{
	struct watched_area *delp;
	void *cookie;

	cookie = NULL;
	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
		kmem_free(delp, sizeof (struct watched_area));

	avl_destroy(&p->p_warea);
}

/*
 * This one is called by the traced process to unwatch all the
 * pages while deallocating the list of watched_page structs.
 */
void
pr_free_watched_pages(proc_t *p)
{
	struct as *as = p->p_as;
	struct watched_page *pwp;
	uint_t prot;
	int    retrycnt, err;
	void *cookie;

	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
		return;

	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
	AS_LOCK_ENTER(as, RW_WRITER);

	pwp = avl_first(&as->a_wpage);

	cookie = NULL;
	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
		retrycnt = 0;
		if ((prot = pwp->wp_oprot) != 0) {
			caddr_t addr = pwp->wp_vaddr;
			struct seg *seg;
		retry:

			if ((pwp->wp_prot != prot ||
			    (pwp->wp_flags & WP_NOWATCH)) &&
			    (seg = as_segat(as, addr)) != NULL) {
				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
				if (err == IE_RETRY) {
					ASSERT(retrycnt == 0);
					retrycnt++;
					goto retry;
				}
			}
		}
		kmem_free(pwp, sizeof (struct watched_page));
	}

	avl_destroy(&as->a_wpage);
	p->p_wprot = NULL;

	AS_LOCK_EXIT(as);
}

/*
 * Insert a watched area into the list of watched pages.
 * If oflags is zero then we are adding a new watched area.
 * Otherwise we are changing the flags of an existing watched area.
 */
static int
set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
    ulong_t flags, ulong_t oflags)
{
	struct as *as = p->p_as;
	avl_tree_t *pwp_tree;
	struct watched_page *pwp, *newpwp;
	struct watched_page tpw;
	avl_index_t where;
	struct seg *seg;
	uint_t prot;
	caddr_t addr;

	/*
	 * We need to pre-allocate a list of structures before we grab the
	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
	 * held.
	 */
	newpwp = NULL;
	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
	    addr < eaddr; addr += PAGESIZE) {
		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
		pwp->wp_list = newpwp;
		newpwp = pwp;
	}

	AS_LOCK_ENTER(as, RW_WRITER);

	/*
	 * Search for an existing watched page to contain the watched area.
	 * If none is found, grab a new one from the available list
	 * and insert it in the active list, keeping the list sorted
	 * by user-level virtual address.
	 */
	if (p->p_flag & SVFWAIT)
		pwp_tree = &p->p_wpage;
	else
		pwp_tree = &as->a_wpage;

again:
	if (avl_numnodes(pwp_tree) > prnwatch) {
		AS_LOCK_EXIT(as);
		while (newpwp != NULL) {
			pwp = newpwp->wp_list;
			kmem_free(newpwp, sizeof (struct watched_page));
			newpwp = pwp;
		}
		return (E2BIG);
	}

	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
		pwp = newpwp;
		newpwp = newpwp->wp_list;
		pwp->wp_list = NULL;
		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
		    (uintptr_t)PAGEMASK);
		avl_insert(pwp_tree, pwp, where);
	}

	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);

	if (oflags & WA_READ)
		pwp->wp_read--;
	if (oflags & WA_WRITE)
		pwp->wp_write--;
	if (oflags & WA_EXEC)
		pwp->wp_exec--;

	ASSERT(pwp->wp_read >= 0);
	ASSERT(pwp->wp_write >= 0);
	ASSERT(pwp->wp_exec >= 0);

	if (flags & WA_READ)
		pwp->wp_read++;
	if (flags & WA_WRITE)
		pwp->wp_write++;
	if (flags & WA_EXEC)
		pwp->wp_exec++;

	if (!(p->p_flag & SVFWAIT)) {
		vaddr = pwp->wp_vaddr;
		if (pwp->wp_oprot == 0 &&
		    (seg = as_segat(as, vaddr)) != NULL) {
			SEGOP_GETPROT(seg, vaddr, 0, &prot);
			pwp->wp_oprot = (uchar_t)prot;
			pwp->wp_prot = (uchar_t)prot;
		}
		if (pwp->wp_oprot != 0) {
			prot = pwp->wp_oprot;
			if (pwp->wp_read)
				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
			if (pwp->wp_write)
				prot &= ~PROT_WRITE;
			if (pwp->wp_exec)
				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
			if (!(pwp->wp_flags & WP_NOWATCH) &&
			    pwp->wp_prot != prot &&
			    (pwp->wp_flags & WP_SETPROT) == 0) {
				pwp->wp_flags |= WP_SETPROT;
				pwp->wp_list = p->p_wprot;
				p->p_wprot = pwp;
			}
			pwp->wp_prot = (uchar_t)prot;
		}
	}

	/*
	 * If the watched area extends into the next page then do
	 * it over again with the virtual address of the next page.
	 */
	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
		goto again;

	AS_LOCK_EXIT(as);

	/*
	 * Free any pages we may have over-allocated
	 */
	while (newpwp != NULL) {
		pwp = newpwp->wp_list;
		kmem_free(newpwp, sizeof (struct watched_page));
		newpwp = pwp;
	}

	return (0);
}

/*
 * Remove a watched area from the list of watched pages.
 * A watched area may extend over more than one page.
 */
static void
clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
{
	struct as *as = p->p_as;
	struct watched_page *pwp;
	struct watched_page tpw;
	avl_tree_t *tree;
	avl_index_t where;

	AS_LOCK_ENTER(as, RW_WRITER);

	if (p->p_flag & SVFWAIT)
		tree = &p->p_wpage;
	else
		tree = &as->a_wpage;

	tpw.wp_vaddr = vaddr =
	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
	pwp = avl_find(tree, &tpw, &where);
	if (pwp == NULL)
		pwp = avl_nearest(tree, where, AVL_AFTER);

	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
		ASSERT(vaddr <=  pwp->wp_vaddr);

		if (flags & WA_READ)
			pwp->wp_read--;
		if (flags & WA_WRITE)
			pwp->wp_write--;
		if (flags & WA_EXEC)
			pwp->wp_exec--;

		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
			/*
			 * Reset the hat layer's protections on this page.
			 */
			if (pwp->wp_oprot != 0) {
				uint_t prot = pwp->wp_oprot;

				if (pwp->wp_read)
					prot &=
					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
				if (pwp->wp_write)
					prot &= ~PROT_WRITE;
				if (pwp->wp_exec)
					prot &=
					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
				if (!(pwp->wp_flags & WP_NOWATCH) &&
				    pwp->wp_prot != prot &&
				    (pwp->wp_flags & WP_SETPROT) == 0) {
					pwp->wp_flags |= WP_SETPROT;
					pwp->wp_list = p->p_wprot;
					p->p_wprot = pwp;
				}
				pwp->wp_prot = (uchar_t)prot;
			}
		} else {
			/*
			 * No watched areas remain in this page.
			 * Reset everything to normal.
			 */
			if (pwp->wp_oprot != 0) {
				pwp->wp_prot = pwp->wp_oprot;
				if ((pwp->wp_flags & WP_SETPROT) == 0) {
					pwp->wp_flags |= WP_SETPROT;
					pwp->wp_list = p->p_wprot;
					p->p_wprot = pwp;
				}
			}
		}

		pwp = AVL_NEXT(tree, pwp);
	}

	AS_LOCK_EXIT(as);
}

/*
 * Return the original protections for the specified page.
 */
static void
getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
{
	struct watched_page *pwp;
	struct watched_page tpw;

	ASSERT(AS_LOCK_HELD(as));

	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
		*prot = pwp->wp_oprot;
}

static prpagev_t *
pr_pagev_create(struct seg *seg, int check_noreserve)
{
	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
	size_t total_pages = seg_pages(seg);

	/*
	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
	 * 4 or 5 bytes of storage per page, so this means we limit ourself
	 * to about a megabyte of kernel heap by default.
	 */
	pagev->pg_npages = MIN(total_pages, pagev_lim);
	pagev->pg_pnbase = 0;

	pagev->pg_protv =
	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);

	if (check_noreserve)
		pagev->pg_incore =
		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
	else
		pagev->pg_incore = NULL;

	return (pagev);
}

static void
pr_pagev_destroy(prpagev_t *pagev)
{
	if (pagev->pg_incore != NULL)
		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));

	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
	kmem_free(pagev, sizeof (prpagev_t));
}

static caddr_t
pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
{
	ulong_t lastpg = seg_page(seg, eaddr - 1);
	ulong_t pn, pnlim;
	caddr_t saddr;
	size_t len;

	ASSERT(addr >= seg->s_base && addr <= eaddr);

	if (addr == eaddr)
		return (eaddr);

refill:
	ASSERT(addr < eaddr);
	pagev->pg_pnbase = seg_page(seg, addr);
	pnlim = pagev->pg_pnbase + pagev->pg_npages;
	saddr = addr;

	if (lastpg < pnlim)
		len = (size_t)(eaddr - addr);
	else
		len = pagev->pg_npages * PAGESIZE;

	if (pagev->pg_incore != NULL) {
		/*
		 * INCORE cleverly has different semantics than GETPROT:
		 * it returns info on pages up to but NOT including addr + len.
		 */
		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
		pn = pagev->pg_pnbase;

		do {
			/*
			 * Guilty knowledge here:  We know that segvn_incore
			 * returns more than just the low-order bit that
			 * indicates the page is actually in memory.  If any
			 * bits are set, then the page has backing store.
			 */
			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
				goto out;

		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);

		/*
		 * If we examined all the pages in the vector but we're not
		 * at the end of the segment, take another lap.
		 */
		if (addr < eaddr)
			goto refill;
	}

	/*
	 * Need to take len - 1 because addr + len is the address of the
	 * first byte of the page just past the end of what we want.
	 */
out:
	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
	return (addr);
}

static caddr_t
pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
    caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
{
	/*
	 * Our starting address is either the specified address, or the base
	 * address from the start of the pagev.  If the latter is greater,
	 * this means a previous call to pr_pagev_fill has already scanned
	 * further than the end of the previous mapping.
	 */
	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
	caddr_t addr = MAX(*saddrp, base);
	ulong_t pn = seg_page(seg, addr);
	uint_t prot, nprot;

	/*
	 * If we're dealing with noreserve pages, then advance addr to
	 * the address of the next page which has backing store.
	 */
	if (pagev->pg_incore != NULL) {
		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
			if ((addr += PAGESIZE) == eaddr) {
				*saddrp = addr;
				prot = 0;
				goto out;
			}
			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
				if (addr == eaddr) {
					*saddrp = addr;
					prot = 0;
					goto out;
				}
				pn = seg_page(seg, addr);
			}
		}
	}

	/*
	 * Get the protections on the page corresponding to addr.
	 */
	pn = seg_page(seg, addr);
	ASSERT(pn >= pagev->pg_pnbase);
	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));

	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
	getwatchprot(seg->s_as, addr, &prot);
	*saddrp = addr;

	/*
	 * Now loop until we find a backed page with different protections
	 * or we reach the end of this segment.
	 */
	while ((addr += PAGESIZE) < eaddr) {
		/*
		 * If pn has advanced to the page number following what we
		 * have information on, refill the page vector and reset
		 * addr and pn.  If pr_pagev_fill does not return the
		 * address of the next page, we have a discontiguity and
		 * thus have reached the end of the current mapping.
		 */
		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
			if (naddr != addr)
				goto out;
			pn = seg_page(seg, addr);
		}

		/*
		 * The previous page's protections are in prot, and it has
		 * backing.  If this page is MAP_NORESERVE and has no backing,
		 * then end this mapping and return the previous protections.
		 */
		if (pagev->pg_incore != NULL &&
		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
			break;

		/*
		 * Otherwise end the mapping if this page's protections (nprot)
		 * are different than those in the previous page (prot).
		 */
		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
		getwatchprot(seg->s_as, addr, &nprot);

		if (nprot != prot)
			break;
	}

out:
	*protp = prot;
	return (addr);
}

size_t
pr_getsegsize(struct seg *seg, int reserved)
{
	size_t size = seg->s_size;

	/*
	 * If we're interested in the reserved space, return the size of the
	 * segment itself.  Everything else in this function is a special case
	 * to determine the actual underlying size of various segment types.
	 */
	if (reserved)
		return (size);

	/*
	 * If this is a segvn mapping of a regular file, return the smaller
	 * of the segment size and the remaining size of the file beyond
	 * the file offset corresponding to seg->s_base.
	 */
	if (seg->s_ops == &segvn_ops) {
		vattr_t vattr;
		vnode_t *vp;

		vattr.va_mask = AT_SIZE;

		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
		    vp != NULL && vp->v_type == VREG &&
		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {

			u_offset_t fsize = vattr.va_size;
			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);

			if (fsize < offset)
				fsize = 0;
			else
				fsize -= offset;

			fsize = roundup(fsize, (u_offset_t)PAGESIZE);

			if (fsize < (u_offset_t)size)
				size = (size_t)fsize;
		}

		return (size);
	}

	/*
	 * If this is an ISM shared segment, don't include pages that are
	 * beyond the real size of the spt segment that backs it.
	 */
	if (seg->s_ops == &segspt_shmops)
		return (MIN(spt_realsize(seg), size));

	/*
	 * If this is segment is a mapping from /dev/null, then this is a
	 * reservation of virtual address space and has no actual size.
	 * Such segments are backed by segdev and have type set to neither
	 * MAP_SHARED nor MAP_PRIVATE.
	 */
	if (seg->s_ops == &segdev_ops &&
	    ((SEGOP_GETTYPE(seg, seg->s_base) &
	    (MAP_SHARED | MAP_PRIVATE)) == 0))
		return (0);

	/*
	 * If this segment doesn't match one of the special types we handle,
	 * just return the size of the segment itself.
	 */
	return (size);
}

uint_t
pr_getprot(struct seg *seg, int reserved, void **tmp,
    caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
{
	struct as *as = seg->s_as;

	caddr_t saddr = *saddrp;
	caddr_t naddr;

	int check_noreserve;
	uint_t prot;

	union {
		struct segvn_data *svd;
		struct segdev_data *sdp;
		void *data;
	} s;

	s.data = seg->s_data;

	ASSERT(AS_WRITE_HELD(as));
	ASSERT(saddr >= seg->s_base && saddr < eaddr);
	ASSERT(eaddr <= seg->s_base + seg->s_size);

	/*
	 * Don't include MAP_NORESERVE pages in the address range
	 * unless their mappings have actually materialized.
	 * We cheat by knowing that segvn is the only segment
	 * driver that supports MAP_NORESERVE.
	 */
	check_noreserve =
	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
	    (s.svd->flags & MAP_NORESERVE));

	/*
	 * Examine every page only as a last resort.  We use guilty knowledge
	 * of segvn and segdev to avoid this: if there are no per-page
	 * protections present in the segment and we don't care about
	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
	 */
	if (!check_noreserve && saddr == seg->s_base &&
	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
		prot = s.svd->prot;
		getwatchprot(as, saddr, &prot);
		naddr = eaddr;

	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
	    s.sdp != NULL && s.sdp->pageprot == 0) {
		prot = s.sdp->prot;
		getwatchprot(as, saddr, &prot);
		naddr = eaddr;

	} else {
		prpagev_t *pagev;

		/*
		 * If addr is sitting at the start of the segment, then
		 * create a page vector to store protection and incore
		 * information for pages in the segment, and fill it.
		 * Otherwise, we expect *tmp to address the prpagev_t
		 * allocated by a previous call to this function.
		 */
		if (saddr == seg->s_base) {
			pagev = pr_pagev_create(seg, check_noreserve);
			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);

			ASSERT(*tmp == NULL);
			*tmp = pagev;

			ASSERT(saddr <= eaddr);
			*saddrp = saddr;

			if (saddr == eaddr) {
				naddr = saddr;
				prot = 0;
				goto out;
			}

		} else {
			ASSERT(*tmp != NULL);
			pagev = (prpagev_t *)*tmp;
		}

		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
		ASSERT(naddr <= eaddr);
	}

out:
	if (naddr == eaddr)
		pr_getprot_done(tmp);
	*naddrp = naddr;
	return (prot);
}

void
pr_getprot_done(void **tmp)
{
	if (*tmp != NULL) {
		pr_pagev_destroy((prpagev_t *)*tmp);
		*tmp = NULL;
	}
}

/*
 * Return true iff the vnode is a /proc file from the object directory.
 */
int
pr_isobject(vnode_t *vp)
{
	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
}

/*
 * Return true iff the vnode is a /proc file opened by the process itself.
 */
int
pr_isself(vnode_t *vp)
{
	/*
	 * XXX: To retain binary compatibility with the old
	 * ioctl()-based version of /proc, we exempt self-opens
	 * of /proc/<pid> from being marked close-on-exec.
	 */
	return (vn_matchops(vp, prvnodeops) &&
	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
	    VTOP(vp)->pr_type != PR_PIDDIR);
}

static ssize_t
pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
{
	ssize_t pagesize, hatsize;

	ASSERT(AS_WRITE_HELD(seg->s_as));
	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
	ASSERT(saddr < eaddr);

	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
	ASSERT(pagesize != 0);

	if (pagesize == -1)
		pagesize = PAGESIZE;

	saddr += P2NPHASE((uintptr_t)saddr, pagesize);

	while (saddr < eaddr) {
		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
			break;
		ASSERT(IS_P2ALIGNED(saddr, pagesize));
		saddr += pagesize;
	}

	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
	return (hatsize);
}

/*
 * Return an array of structures with extended memory map information.
 * We allocate here; the caller must deallocate.
 */
int
prgetxmap(proc_t *p, list_t *iolhead)
{
	struct as *as = p->p_as;
	prxmap_t *mp;
	struct seg *seg;
	struct seg *brkseg, *stkseg;
	struct vnode *vp;
	struct vattr vattr;
	uint_t prot;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	/*
	 * Request an initial buffer size that doesn't waste memory
	 * if the address space has only a small number of segments.
	 */
	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	brkseg = break_seg(p);
	stkseg = as_segat(as, prgetstackbase(p));

	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr, baddr;
		void *tmp = NULL;
		ssize_t psz;
		char *parr;
		uint64_t npages;
		uint64_t pagenum;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}
		/*
		 * Segment loop part one: iterate from the base of the segment
		 * to its end, pausing at each address boundary (baddr) between
		 * ranges that have different virtual memory protections.
		 */
		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
			ASSERT(baddr >= saddr && baddr <= eaddr);

			/*
			 * Segment loop part two: iterate from the current
			 * position to the end of the protection boundary,
			 * pausing at each address boundary (naddr) between
			 * ranges that have different underlying page sizes.
			 */
			for (; saddr < baddr; saddr = naddr) {
				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
				ASSERT(naddr >= saddr && naddr <= baddr);

				mp = pr_iol_newbuf(iolhead, sizeof (*mp));

				mp->pr_vaddr = (uintptr_t)saddr;
				mp->pr_size = naddr - saddr;
				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
				mp->pr_mflags = 0;
				if (prot & PROT_READ)
					mp->pr_mflags |= MA_READ;
				if (prot & PROT_WRITE)
					mp->pr_mflags |= MA_WRITE;
				if (prot & PROT_EXEC)
					mp->pr_mflags |= MA_EXEC;
				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
					mp->pr_mflags |= MA_SHARED;
				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
					mp->pr_mflags |= MA_NORESERVE;
				if (seg->s_ops == &segspt_shmops ||
				    (seg->s_ops == &segvn_ops &&
				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
				    vp == NULL)))
					mp->pr_mflags |= MA_ANON;
				if (seg == brkseg)
					mp->pr_mflags |= MA_BREAK;
				else if (seg == stkseg)
					mp->pr_mflags |= MA_STACK;
				if (seg->s_ops == &segspt_shmops)
					mp->pr_mflags |= MA_ISM | MA_SHM;

				mp->pr_pagesize = PAGESIZE;
				if (psz == -1) {
					mp->pr_hatpagesize = 0;
				} else {
					mp->pr_hatpagesize = psz;
				}

				/*
				 * Manufacture a filename for the "object" dir.
				 */
				mp->pr_dev = PRNODEV;
				vattr.va_mask = AT_FSID|AT_NODEID;
				if (seg->s_ops == &segvn_ops &&
				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
				    vp != NULL && vp->v_type == VREG &&
				    VOP_GETATTR(vp, &vattr, 0, CRED(),
				    NULL) == 0) {
					mp->pr_dev = vattr.va_fsid;
					mp->pr_ino = vattr.va_nodeid;
					if (vp == p->p_exec)
						(void) strcpy(mp->pr_mapname,
						    "a.out");
					else
						pr_object_name(mp->pr_mapname,
						    vp, &vattr);
				}

				/*
				 * Get the SysV shared memory id, if any.
				 */
				if ((mp->pr_mflags & MA_SHARED) &&
				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
				    seg->s_base)) != SHMID_NONE) {
					if (mp->pr_shmid == SHMID_FREE)
						mp->pr_shmid = -1;

					mp->pr_mflags |= MA_SHM;
				} else {
					mp->pr_shmid = -1;
				}

				npages = ((uintptr_t)(naddr - saddr)) >>
				    PAGESHIFT;
				parr = kmem_zalloc(npages, KM_SLEEP);

				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);

				for (pagenum = 0; pagenum < npages; pagenum++) {
					if (parr[pagenum] & SEG_PAGE_INCORE)
						mp->pr_rss++;
					if (parr[pagenum] & SEG_PAGE_ANON)
						mp->pr_anon++;
					if (parr[pagenum] & SEG_PAGE_LOCKED)
						mp->pr_locked++;
				}
				kmem_free(parr, npages);
			}
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (0);
}

/*
 * Return the process's credentials.  We don't need a 32-bit equivalent of
 * this function because prcred_t and prcred32_t are actually the same.
 */
void
prgetcred(proc_t *p, prcred_t *pcrp)
{
	mutex_enter(&p->p_crlock);
	cred2prcred(p->p_cred, pcrp);
	mutex_exit(&p->p_crlock);
}

void
prgetsecflags(proc_t *p, prsecflags_t *psfp)
{
	ASSERT(psfp != NULL);

	bzero(psfp, sizeof (*psfp));
	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
	psfp->pr_lower = p->p_secflags.psf_lower;
	psfp->pr_upper = p->p_secflags.psf_upper;
	psfp->pr_effective = p->p_secflags.psf_effective;
	psfp->pr_inherit = p->p_secflags.psf_inherit;
}

/*
 * Compute actual size of the prpriv_t structure.
 */

size_t
prgetprivsize(void)
{
	return (priv_prgetprivsize(NULL));
}

/*
 * Return the process's privileges.  We don't need a 32-bit equivalent of
 * this function because prpriv_t and prpriv32_t are actually the same.
 */
void
prgetpriv(proc_t *p, prpriv_t *pprp)
{
	mutex_enter(&p->p_crlock);
	cred2prpriv(p->p_cred, pprp);
	mutex_exit(&p->p_crlock);
}

#ifdef _SYSCALL32_IMPL
/*
 * Return an array of structures with HAT memory map information.
 * We allocate here; the caller must deallocate.
 */
int
prgetxmap32(proc_t *p, list_t *iolhead)
{
	struct as *as = p->p_as;
	prxmap32_t *mp;
	struct seg *seg;
	struct seg *brkseg, *stkseg;
	struct vnode *vp;
	struct vattr vattr;
	uint_t prot;

	ASSERT(as != &kas && AS_WRITE_HELD(as));

	/*
	 * Request an initial buffer size that doesn't waste memory
	 * if the address space has only a small number of segments.
	 */
	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));

	if ((seg = AS_SEGFIRST(as)) == NULL)
		return (0);

	brkseg = break_seg(p);
	stkseg = as_segat(as, prgetstackbase(p));

	do {
		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
		caddr_t saddr, naddr, baddr;
		void *tmp = NULL;
		ssize_t psz;
		char *parr;
		uint64_t npages;
		uint64_t pagenum;

		if ((seg->s_flags & S_HOLE) != 0) {
			continue;
		}

		/*
		 * Segment loop part one: iterate from the base of the segment
		 * to its end, pausing at each address boundary (baddr) between
		 * ranges that have different virtual memory protections.
		 */
		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
			ASSERT(baddr >= saddr && baddr <= eaddr);

			/*
			 * Segment loop part two: iterate from the current
			 * position to the end of the protection boundary,
			 * pausing at each address boundary (naddr) between
			 * ranges that have different underlying page sizes.
			 */
			for (; saddr < baddr; saddr = naddr) {
				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
				ASSERT(naddr >= saddr && naddr <= baddr);

				mp = pr_iol_newbuf(iolhead, sizeof (*mp));

				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
				mp->pr_size = (size32_t)(naddr - saddr);
				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
				mp->pr_mflags = 0;
				if (prot & PROT_READ)
					mp->pr_mflags |= MA_READ;
				if (prot & PROT_WRITE)
					mp->pr_mflags |= MA_WRITE;
				if (prot & PROT_EXEC)
					mp->pr_mflags |= MA_EXEC;
				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
					mp->pr_mflags |= MA_SHARED;
				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
					mp->pr_mflags |= MA_NORESERVE;
				if (seg->s_ops == &segspt_shmops ||
				    (seg->s_ops == &segvn_ops &&
				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
				    vp == NULL)))
					mp->pr_mflags |= MA_ANON;
				if (seg == brkseg)
					mp->pr_mflags |= MA_BREAK;
				else if (seg == stkseg)
					mp->pr_mflags |= MA_STACK;
				if (seg->s_ops == &segspt_shmops)
					mp->pr_mflags |= MA_ISM | MA_SHM;

				mp->pr_pagesize = PAGESIZE;
				if (psz == -1) {
					mp->pr_hatpagesize = 0;
				} else {
					mp->pr_hatpagesize = psz;
				}

				/*
				 * Manufacture a filename for the "object" dir.
				 */
				mp->pr_dev = PRNODEV32;
				vattr.va_mask = AT_FSID|AT_NODEID;
				if (seg->s_ops == &segvn_ops &&
				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
				    vp != NULL && vp->v_type == VREG &&
				    VOP_GETATTR(vp, &vattr, 0, CRED(),
				    NULL) == 0) {
					(void) cmpldev(&mp->pr_dev,
					    vattr.va_fsid);
					mp->pr_ino = vattr.va_nodeid;
					if (vp == p->p_exec)
						(void) strcpy(mp->pr_mapname,
						    "a.out");
					else
						pr_object_name(mp->pr_mapname,
						    vp, &vattr);
				}

				/*
				 * Get the SysV shared memory id, if any.
				 */
				if ((mp->pr_mflags & MA_SHARED) &&
				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
				    seg->s_base)) != SHMID_NONE) {
					if (mp->pr_shmid == SHMID_FREE)
						mp->pr_shmid = -1;

					mp->pr_mflags |= MA_SHM;
				} else {
					mp->pr_shmid = -1;
				}

				npages = ((uintptr_t)(naddr - saddr)) >>
				    PAGESHIFT;
				parr = kmem_zalloc(npages, KM_SLEEP);

				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);

				for (pagenum = 0; pagenum < npages; pagenum++) {
					if (parr[pagenum] & SEG_PAGE_INCORE)
						mp->pr_rss++;
					if (parr[pagenum] & SEG_PAGE_ANON)
						mp->pr_anon++;
					if (parr[pagenum] & SEG_PAGE_LOCKED)
						mp->pr_locked++;
				}
				kmem_free(parr, npages);
			}
		}
		ASSERT(tmp == NULL);
	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);

	return (0);
}
#endif	/* _SYSCALL32_IMPL */