xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 609febc9a48c79a089214cb5d882759a72a38513)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  * Copyright 2022 MNX Cloud, Inc.
27  */
28 
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved	*/
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cred.h>
37 #include <sys/priv.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/inline.h>
41 #include <sys/kmem.h>
42 #include <sys/mman.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/sobject.h>
46 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/uio.h>
49 #include <sys/var.h>
50 #include <sys/vfs.h>
51 #include <sys/vnode.h>
52 #include <sys/session.h>
53 #include <sys/pcb.h>
54 #include <sys/signal.h>
55 #include <sys/user.h>
56 #include <sys/disp.h>
57 #include <sys/class.h>
58 #include <sys/ts.h>
59 #include <sys/bitmap.h>
60 #include <sys/poll.h>
61 #include <sys/shm_impl.h>
62 #include <sys/fault.h>
63 #include <sys/syscall.h>
64 #include <sys/procfs.h>
65 #include <sys/processor.h>
66 #include <sys/cpuvar.h>
67 #include <sys/copyops.h>
68 #include <sys/time.h>
69 #include <sys/msacct.h>
70 #include <sys/flock_impl.h>
71 #include <sys/stropts.h>
72 #include <sys/strsubr.h>
73 #include <sys/pathname.h>
74 #include <sys/mode.h>
75 #include <sys/socketvar.h>
76 #include <sys/autoconf.h>
77 #include <sys/dtrace.h>
78 #include <sys/timod.h>
79 #include <sys/fs/namenode.h>
80 #include <netinet/udp.h>
81 #include <netinet/tcp.h>
82 #include <inet/cc.h>
83 #include <vm/as.h>
84 #include <vm/rm.h>
85 #include <vm/seg.h>
86 #include <vm/seg_vn.h>
87 #include <vm/seg_dev.h>
88 #include <vm/seg_spt.h>
89 #include <vm/page.h>
90 #include <sys/vmparam.h>
91 #include <sys/swap.h>
92 #include <fs/proc/prdata.h>
93 #include <sys/task.h>
94 #include <sys/project.h>
95 #include <sys/contract_impl.h>
96 #include <sys/contract/process.h>
97 #include <sys/contract/process_impl.h>
98 #include <sys/schedctl.h>
99 #include <sys/pool.h>
100 #include <sys/zone.h>
101 #include <sys/atomic.h>
102 #include <sys/sdt.h>
103 
104 #define	MAX_ITERS_SPIN	5
105 
106 typedef struct prpagev {
107 	uint_t *pg_protv;	/* vector of page permissions */
108 	char *pg_incore;	/* vector of incore flags */
109 	size_t pg_npages;	/* number of pages in protv and incore */
110 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
111 } prpagev_t;
112 
113 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
114 
115 extern struct seg_ops segdev_ops;	/* needs a header file */
116 extern struct seg_ops segspt_shmops;	/* needs a header file */
117 
118 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
119 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
120 
121 /*
122  * Choose an lwp from the complete set of lwps for the process.
123  * This is called for any operation applied to the process
124  * file descriptor that requires an lwp to operate upon.
125  *
126  * Returns a pointer to the thread for the selected LWP,
127  * and with the dispatcher lock held for the thread.
128  *
129  * The algorithm for choosing an lwp is critical for /proc semantics;
130  * don't touch this code unless you know all of the implications.
131  */
132 kthread_t *
133 prchoose(proc_t *p)
134 {
135 	kthread_t *t;
136 	kthread_t *t_onproc = NULL;	/* running on processor */
137 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
138 	kthread_t *t_sleep = NULL;	/* sleeping */
139 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
140 	kthread_t *t_susp = NULL;	/* suspended stop */
141 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
142 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
143 	kthread_t *t_req = NULL;	/* requested stop */
144 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
145 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
146 
147 	ASSERT(MUTEX_HELD(&p->p_lock));
148 
149 	/*
150 	 * If the agent lwp exists, it takes precedence over all others.
151 	 */
152 	if ((t = p->p_agenttp) != NULL) {
153 		thread_lock(t);
154 		return (t);
155 	}
156 
157 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
158 		return (t);
159 	do {		/* for eacn lwp in the process */
160 		if (VSTOPPED(t)) {	/* virtually stopped */
161 			if (t_req == NULL)
162 				t_req = t;
163 			continue;
164 		}
165 
166 		/* If this is a process kernel thread, ignore it. */
167 		if ((t->t_proc_flag & TP_KTHREAD) != 0) {
168 			continue;
169 		}
170 
171 		thread_lock(t);		/* make sure thread is in good state */
172 		switch (t->t_state) {
173 		default:
174 			panic("prchoose: bad thread state %d, thread 0x%p",
175 			    t->t_state, (void *)t);
176 			/*NOTREACHED*/
177 		case TS_SLEEP:
178 			/* this is filthy */
179 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
180 			    t->t_wchan0 == NULL) {
181 				if (t_hold == NULL)
182 					t_hold = t;
183 			} else {
184 				if (t_sleep == NULL)
185 					t_sleep = t;
186 			}
187 			break;
188 		case TS_RUN:
189 		case TS_WAIT:
190 			if (t_run == NULL)
191 				t_run = t;
192 			break;
193 		case TS_ONPROC:
194 			if (t_onproc == NULL)
195 				t_onproc = t;
196 			break;
197 		case TS_ZOMB:		/* last possible choice */
198 			break;
199 		case TS_STOPPED:
200 			switch (t->t_whystop) {
201 			case PR_SUSPENDED:
202 				if (t_susp == NULL)
203 					t_susp = t;
204 				break;
205 			case PR_JOBCONTROL:
206 				if (t->t_proc_flag & TP_PRSTOP) {
207 					if (t_jdstop == NULL)
208 						t_jdstop = t;
209 				} else {
210 					if (t_jstop == NULL)
211 						t_jstop = t;
212 				}
213 				break;
214 			case PR_REQUESTED:
215 				if (t->t_dtrace_stop && t_dtrace == NULL)
216 					t_dtrace = t;
217 				else if (t_req == NULL)
218 					t_req = t;
219 				break;
220 			case PR_SYSENTRY:
221 			case PR_SYSEXIT:
222 			case PR_SIGNALLED:
223 			case PR_FAULTED:
224 				/*
225 				 * Make an lwp calling exit() be the
226 				 * last lwp seen in the process.
227 				 */
228 				if (t_istop == NULL ||
229 				    (t_istop->t_whystop == PR_SYSENTRY &&
230 				    t_istop->t_whatstop == SYS_exit))
231 					t_istop = t;
232 				break;
233 			case PR_CHECKPOINT:	/* can't happen? */
234 				break;
235 			default:
236 				panic("prchoose: bad t_whystop %d, thread 0x%p",
237 				    t->t_whystop, (void *)t);
238 				/*NOTREACHED*/
239 			}
240 			break;
241 		}
242 		thread_unlock(t);
243 	} while ((t = t->t_forw) != p->p_tlist);
244 
245 	if (t_onproc)
246 		t = t_onproc;
247 	else if (t_run)
248 		t = t_run;
249 	else if (t_sleep)
250 		t = t_sleep;
251 	else if (t_jstop)
252 		t = t_jstop;
253 	else if (t_jdstop)
254 		t = t_jdstop;
255 	else if (t_istop)
256 		t = t_istop;
257 	else if (t_dtrace)
258 		t = t_dtrace;
259 	else if (t_req)
260 		t = t_req;
261 	else if (t_hold)
262 		t = t_hold;
263 	else if (t_susp)
264 		t = t_susp;
265 	else			/* TS_ZOMB */
266 		t = p->p_tlist;
267 
268 	if (t != NULL)
269 		thread_lock(t);
270 	return (t);
271 }
272 
273 /*
274  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
275  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
276  * on the /proc file descriptor.  Called from stop() when a traced
277  * process stops on an event of interest.  Also called from exit()
278  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
279  */
280 void
281 prnotify(struct vnode *vp)
282 {
283 	prcommon_t *pcp = VTOP(vp)->pr_common;
284 
285 	mutex_enter(&pcp->prc_mutex);
286 	cv_broadcast(&pcp->prc_wait);
287 	mutex_exit(&pcp->prc_mutex);
288 	if (pcp->prc_flags & PRC_POLL) {
289 		/*
290 		 * We call pollwakeup() with POLLHUP to ensure that
291 		 * the pollers are awakened even if they are polling
292 		 * for nothing (i.e., waiting for the process to exit).
293 		 * This enables the use of the PRC_POLL flag for optimization
294 		 * (we can turn off PRC_POLL only if we know no pollers remain).
295 		 */
296 		pcp->prc_flags &= ~PRC_POLL;
297 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
298 	}
299 }
300 
301 /* called immediately below, in prfree() */
302 static void
303 prfreenotify(vnode_t *vp)
304 {
305 	prnode_t *pnp;
306 	prcommon_t *pcp;
307 
308 	while (vp != NULL) {
309 		pnp = VTOP(vp);
310 		pcp = pnp->pr_common;
311 		ASSERT(pcp->prc_thread == NULL);
312 		pcp->prc_proc = NULL;
313 		/*
314 		 * We can't call prnotify() here because we are holding
315 		 * pidlock.  We assert that there is no need to.
316 		 */
317 		mutex_enter(&pcp->prc_mutex);
318 		cv_broadcast(&pcp->prc_wait);
319 		mutex_exit(&pcp->prc_mutex);
320 		ASSERT(!(pcp->prc_flags & PRC_POLL));
321 
322 		vp = pnp->pr_next;
323 		pnp->pr_next = NULL;
324 	}
325 }
326 
327 /*
328  * Called from a hook in freeproc() when a traced process is removed
329  * from the process table.  The proc-table pointers of all associated
330  * /proc vnodes are cleared to indicate that the process has gone away.
331  */
332 void
333 prfree(proc_t *p)
334 {
335 	uint_t slot = p->p_slot;
336 
337 	ASSERT(MUTEX_HELD(&pidlock));
338 
339 	/*
340 	 * Block the process against /proc so it can be freed.
341 	 * It cannot be freed while locked by some controlling process.
342 	 * Lock ordering:
343 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
344 	 */
345 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
346 	mutex_enter(&p->p_lock);
347 	while (p->p_proc_flag & P_PR_LOCK) {
348 		mutex_exit(&pr_pidlock);
349 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
350 		mutex_exit(&p->p_lock);
351 		mutex_enter(&pr_pidlock);
352 		mutex_enter(&p->p_lock);
353 	}
354 
355 	ASSERT(p->p_tlist == NULL);
356 
357 	prfreenotify(p->p_plist);
358 	p->p_plist = NULL;
359 
360 	prfreenotify(p->p_trace);
361 	p->p_trace = NULL;
362 
363 	/*
364 	 * We broadcast to wake up everyone waiting for this process.
365 	 * No one can reach this process from this point on.
366 	 */
367 	cv_broadcast(&pr_pid_cv[slot]);
368 
369 	mutex_exit(&p->p_lock);
370 	mutex_exit(&pr_pidlock);
371 }
372 
373 /*
374  * Called from a hook in exit() when a traced process is becoming a zombie.
375  */
376 void
377 prexit(proc_t *p)
378 {
379 	ASSERT(MUTEX_HELD(&p->p_lock));
380 
381 	if (pr_watch_active(p)) {
382 		pr_free_watchpoints(p);
383 		watch_disable(curthread);
384 	}
385 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
386 	if (p->p_trace) {
387 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
388 		prnotify(p->p_trace);
389 	}
390 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
391 }
392 
393 /*
394  * Called when a thread calls lwp_exit().
395  */
396 void
397 prlwpexit(kthread_t *t)
398 {
399 	vnode_t *vp;
400 	prnode_t *pnp;
401 	prcommon_t *pcp;
402 	proc_t *p = ttoproc(t);
403 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
404 
405 	ASSERT(t == curthread);
406 	ASSERT(MUTEX_HELD(&p->p_lock));
407 
408 	/*
409 	 * The process must be blocked against /proc to do this safely.
410 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
411 	 * It is the caller's responsibility to have called prbarrier(p).
412 	 */
413 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
414 
415 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
416 		pnp = VTOP(vp);
417 		pcp = pnp->pr_common;
418 		if (pcp->prc_thread == t) {
419 			pcp->prc_thread = NULL;
420 			pcp->prc_flags |= PRC_DESTROY;
421 		}
422 	}
423 
424 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
425 		pnp = VTOP(vp);
426 		pcp = pnp->pr_common;
427 		pcp->prc_thread = NULL;
428 		pcp->prc_flags |= PRC_DESTROY;
429 		prnotify(vp);
430 	}
431 
432 	if (p->p_trace)
433 		prnotify(p->p_trace);
434 }
435 
436 /*
437  * Called when a zombie thread is joined or when a
438  * detached lwp exits.  Called from lwp_hash_out().
439  */
440 void
441 prlwpfree(proc_t *p, lwpent_t *lep)
442 {
443 	vnode_t *vp;
444 	prnode_t *pnp;
445 	prcommon_t *pcp;
446 
447 	ASSERT(MUTEX_HELD(&p->p_lock));
448 
449 	/*
450 	 * The process must be blocked against /proc to do this safely.
451 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
452 	 * It is the caller's responsibility to have called prbarrier(p).
453 	 */
454 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
455 
456 	vp = lep->le_trace;
457 	lep->le_trace = NULL;
458 	while (vp) {
459 		prnotify(vp);
460 		pnp = VTOP(vp);
461 		pcp = pnp->pr_common;
462 		ASSERT(pcp->prc_thread == NULL &&
463 		    (pcp->prc_flags & PRC_DESTROY));
464 		pcp->prc_tslot = -1;
465 		vp = pnp->pr_next;
466 		pnp->pr_next = NULL;
467 	}
468 
469 	if (p->p_trace)
470 		prnotify(p->p_trace);
471 }
472 
473 /*
474  * Called from a hook in exec() when a thread starts exec().
475  */
476 void
477 prexecstart(void)
478 {
479 	proc_t *p = ttoproc(curthread);
480 	klwp_t *lwp = ttolwp(curthread);
481 
482 	/*
483 	 * The P_PR_EXEC flag blocks /proc operations for
484 	 * the duration of the exec().
485 	 * We can't start exec() while the process is
486 	 * locked by /proc, so we call prbarrier().
487 	 * lwp_nostop keeps the process from being stopped
488 	 * via job control for the duration of the exec().
489 	 */
490 
491 	ASSERT(MUTEX_HELD(&p->p_lock));
492 	prbarrier(p);
493 	lwp->lwp_nostop++;
494 	p->p_proc_flag |= P_PR_EXEC;
495 }
496 
497 /*
498  * Called from a hook in exec() when a thread finishes exec().
499  * The thread may or may not have succeeded.  Some other thread
500  * may have beat it to the punch.
501  */
502 void
503 prexecend(void)
504 {
505 	proc_t *p = ttoproc(curthread);
506 	klwp_t *lwp = ttolwp(curthread);
507 	vnode_t *vp;
508 	prnode_t *pnp;
509 	prcommon_t *pcp;
510 	model_t model = p->p_model;
511 	id_t tid = curthread->t_tid;
512 	int tslot = curthread->t_dslot;
513 
514 	ASSERT(MUTEX_HELD(&p->p_lock));
515 
516 	lwp->lwp_nostop--;
517 	if (p->p_flag & SEXITLWPS) {
518 		/*
519 		 * We are on our way to exiting because some
520 		 * other thread beat us in the race to exec().
521 		 * Don't clear the P_PR_EXEC flag in this case.
522 		 */
523 		return;
524 	}
525 
526 	/*
527 	 * Wake up anyone waiting in /proc for the process to complete exec().
528 	 */
529 	p->p_proc_flag &= ~P_PR_EXEC;
530 	if ((vp = p->p_trace) != NULL) {
531 		pcp = VTOP(vp)->pr_common;
532 		mutex_enter(&pcp->prc_mutex);
533 		cv_broadcast(&pcp->prc_wait);
534 		mutex_exit(&pcp->prc_mutex);
535 		for (; vp != NULL; vp = pnp->pr_next) {
536 			pnp = VTOP(vp);
537 			pnp->pr_common->prc_datamodel = model;
538 		}
539 	}
540 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
541 		/*
542 		 * We dealt with the process common above.
543 		 */
544 		ASSERT(p->p_trace != NULL);
545 		pcp = VTOP(vp)->pr_common;
546 		mutex_enter(&pcp->prc_mutex);
547 		cv_broadcast(&pcp->prc_wait);
548 		mutex_exit(&pcp->prc_mutex);
549 		for (; vp != NULL; vp = pnp->pr_next) {
550 			pnp = VTOP(vp);
551 			pcp = pnp->pr_common;
552 			pcp->prc_datamodel = model;
553 			pcp->prc_tid = tid;
554 			pcp->prc_tslot = tslot;
555 		}
556 	}
557 }
558 
559 /*
560  * Called from a hook in relvm() just before freeing the address space.
561  * We free all the watched areas now.
562  */
563 void
564 prrelvm(void)
565 {
566 	proc_t *p = ttoproc(curthread);
567 
568 	mutex_enter(&p->p_lock);
569 	prbarrier(p);	/* block all other /proc operations */
570 	if (pr_watch_active(p)) {
571 		pr_free_watchpoints(p);
572 		watch_disable(curthread);
573 	}
574 	mutex_exit(&p->p_lock);
575 	pr_free_watched_pages(p);
576 }
577 
578 /*
579  * Called from hooks in exec-related code when a traced process
580  * attempts to exec(2) a setuid/setgid program or an unreadable
581  * file.  Rather than fail the exec we invalidate the associated
582  * /proc vnodes so that subsequent attempts to use them will fail.
583  *
584  * All /proc vnodes, except directory vnodes, are retained on a linked
585  * list (rooted at p_plist in the process structure) until last close.
586  *
587  * A controlling process must re-open the /proc files in order to
588  * regain control.
589  */
590 void
591 prinvalidate(struct user *up)
592 {
593 	kthread_t *t = curthread;
594 	proc_t *p = ttoproc(t);
595 	vnode_t *vp;
596 	prnode_t *pnp;
597 	int writers = 0;
598 
599 	mutex_enter(&p->p_lock);
600 	prbarrier(p);	/* block all other /proc operations */
601 
602 	/*
603 	 * At this moment, there can be only one lwp in the process.
604 	 */
605 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
606 
607 	/*
608 	 * Invalidate any currently active /proc vnodes.
609 	 */
610 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
611 		pnp = VTOP(vp);
612 		switch (pnp->pr_type) {
613 		case PR_PSINFO:		/* these files can read by anyone */
614 		case PR_LPSINFO:
615 		case PR_LWPSINFO:
616 		case PR_LWPDIR:
617 		case PR_LWPIDDIR:
618 		case PR_USAGE:
619 		case PR_LUSAGE:
620 		case PR_LWPUSAGE:
621 			break;
622 		default:
623 			pnp->pr_flags |= PR_INVAL;
624 			break;
625 		}
626 	}
627 	/*
628 	 * Wake up anyone waiting for the process or lwp.
629 	 * p->p_trace is guaranteed to be non-NULL if there
630 	 * are any open /proc files for this process.
631 	 */
632 	if ((vp = p->p_trace) != NULL) {
633 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
634 
635 		prnotify(vp);
636 		/*
637 		 * Are there any writers?
638 		 */
639 		if ((writers = pcp->prc_writers) != 0) {
640 			/*
641 			 * Clear the exclusive open flag (old /proc interface).
642 			 * Set prc_selfopens equal to prc_writers so that
643 			 * the next O_EXCL|O_WRITE open will succeed
644 			 * even with existing (though invalid) writers.
645 			 * prclose() must decrement prc_selfopens when
646 			 * the invalid files are closed.
647 			 */
648 			pcp->prc_flags &= ~PRC_EXCL;
649 			ASSERT(pcp->prc_selfopens <= writers);
650 			pcp->prc_selfopens = writers;
651 		}
652 	}
653 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
654 	while (vp != NULL) {
655 		/*
656 		 * We should not invalidate the lwpiddir vnodes,
657 		 * but the necessities of maintaining the old
658 		 * ioctl()-based version of /proc require it.
659 		 */
660 		pnp = VTOP(vp);
661 		pnp->pr_flags |= PR_INVAL;
662 		prnotify(vp);
663 		vp = pnp->pr_next;
664 	}
665 
666 	/*
667 	 * If any tracing flags are in effect and any vnodes are open for
668 	 * writing then set the requested-stop and run-on-last-close flags.
669 	 * Otherwise, clear all tracing flags.
670 	 */
671 	t->t_proc_flag &= ~TP_PAUSE;
672 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
673 		t->t_proc_flag |= TP_PRSTOP;
674 		aston(t);		/* so ISSIG will see the flag */
675 		p->p_proc_flag |= P_PR_RUNLCL;
676 	} else {
677 		premptyset(&up->u_entrymask);		/* syscalls */
678 		premptyset(&up->u_exitmask);
679 		up->u_systrap = 0;
680 		premptyset(&p->p_sigmask);		/* signals */
681 		premptyset(&p->p_fltmask);		/* faults */
682 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
683 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
684 		prnostep(ttolwp(t));
685 	}
686 
687 	mutex_exit(&p->p_lock);
688 }
689 
690 /*
691  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
692  * Return with pr_pidlock held in all cases.
693  * Return with p_lock held if the the process still exists.
694  * Return value is the process pointer if the process still exists, else NULL.
695  * If we lock the process, give ourself kernel priority to avoid deadlocks;
696  * this is undone in prunlock().
697  */
698 proc_t *
699 pr_p_lock(prnode_t *pnp)
700 {
701 	proc_t *p;
702 	prcommon_t *pcp;
703 
704 	mutex_enter(&pr_pidlock);
705 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
706 		return (NULL);
707 	mutex_enter(&p->p_lock);
708 	while (p->p_proc_flag & P_PR_LOCK) {
709 		/*
710 		 * This cv/mutex pair is persistent even if
711 		 * the process disappears while we sleep.
712 		 */
713 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
714 		kmutex_t *mp = &p->p_lock;
715 
716 		mutex_exit(&pr_pidlock);
717 		cv_wait(cv, mp);
718 		mutex_exit(mp);
719 		mutex_enter(&pr_pidlock);
720 		if (pcp->prc_proc == NULL)
721 			return (NULL);
722 		ASSERT(p == pcp->prc_proc);
723 		mutex_enter(&p->p_lock);
724 	}
725 	p->p_proc_flag |= P_PR_LOCK;
726 	return (p);
727 }
728 
729 /*
730  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
731  * This prevents any lwp of the process from disappearing and
732  * blocks most operations that a process can perform on itself.
733  * Returns 0 on success, a non-zero error number on failure.
734  *
735  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
736  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
737  *
738  * error returns:
739  *	ENOENT: process or lwp has disappeared or process is exiting
740  *		(or has become a zombie and zdisp == ZNO).
741  *	EAGAIN: procfs vnode has become invalid.
742  *	EINTR:  signal arrived while waiting for exec to complete.
743  */
744 int
745 prlock(prnode_t *pnp, int zdisp)
746 {
747 	prcommon_t *pcp;
748 	proc_t *p;
749 
750 again:
751 	pcp = pnp->pr_common;
752 	p = pr_p_lock(pnp);
753 	mutex_exit(&pr_pidlock);
754 
755 	/*
756 	 * Return ENOENT immediately if there is no process.
757 	 */
758 	if (p == NULL)
759 		return (ENOENT);
760 
761 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
762 
763 	/*
764 	 * Return ENOENT if process entered zombie state or is exiting
765 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
766 	 */
767 	if (zdisp == ZNO &&
768 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
769 		prunlock(pnp);
770 		return (ENOENT);
771 	}
772 
773 	/*
774 	 * If lwp-specific, check to see if lwp has disappeared.
775 	 */
776 	if (pcp->prc_flags & PRC_LWP) {
777 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
778 		    pcp->prc_tslot == -1) {
779 			prunlock(pnp);
780 			return (ENOENT);
781 		}
782 	}
783 
784 	/*
785 	 * Return EAGAIN if we have encountered a security violation.
786 	 * (The process exec'd a set-id or unreadable executable file.)
787 	 */
788 	if (pnp->pr_flags & PR_INVAL) {
789 		prunlock(pnp);
790 		return (EAGAIN);
791 	}
792 
793 	/*
794 	 * If process is undergoing an exec(), wait for
795 	 * completion and then start all over again.
796 	 */
797 	if (p->p_proc_flag & P_PR_EXEC) {
798 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
799 		mutex_enter(&pcp->prc_mutex);
800 		prunlock(pnp);
801 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
802 			mutex_exit(&pcp->prc_mutex);
803 			return (EINTR);
804 		}
805 		mutex_exit(&pcp->prc_mutex);
806 		goto again;
807 	}
808 
809 	/*
810 	 * We return holding p->p_lock.
811 	 */
812 	return (0);
813 }
814 
815 /*
816  * Undo prlock() and pr_p_lock().
817  * p->p_lock is still held; pr_pidlock is no longer held.
818  *
819  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
820  * if any, waiting for the flag to be dropped; it retains p->p_lock.
821  *
822  * prunlock() calls prunmark() and then drops p->p_lock.
823  */
824 void
825 prunmark(proc_t *p)
826 {
827 	ASSERT(p->p_proc_flag & P_PR_LOCK);
828 	ASSERT(MUTEX_HELD(&p->p_lock));
829 
830 	cv_signal(&pr_pid_cv[p->p_slot]);
831 	p->p_proc_flag &= ~P_PR_LOCK;
832 }
833 
834 void
835 prunlock(prnode_t *pnp)
836 {
837 	prcommon_t *pcp = pnp->pr_common;
838 	proc_t *p = pcp->prc_proc;
839 
840 	/*
841 	 * If we (or someone) gave it a SIGKILL, and it is not
842 	 * already a zombie, set it running unconditionally.
843 	 */
844 	if ((p->p_flag & SKILLED) &&
845 	    !(p->p_flag & SEXITING) &&
846 	    !(pcp->prc_flags & PRC_DESTROY) &&
847 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
848 		(void) pr_setrun(pnp, 0);
849 	prunmark(p);
850 	mutex_exit(&p->p_lock);
851 }
852 
853 /*
854  * Called while holding p->p_lock to delay until the process is unlocked.
855  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
856  * The process cannot become locked again until p->p_lock is dropped.
857  */
858 void
859 prbarrier(proc_t *p)
860 {
861 	ASSERT(MUTEX_HELD(&p->p_lock));
862 
863 	if (p->p_proc_flag & P_PR_LOCK) {
864 		/* The process is locked; delay until not locked */
865 		uint_t slot = p->p_slot;
866 
867 		while (p->p_proc_flag & P_PR_LOCK)
868 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
869 		cv_signal(&pr_pid_cv[slot]);
870 	}
871 }
872 
873 /*
874  * Return process/lwp status.
875  * The u-block is mapped in by this routine and unmapped at the end.
876  */
877 void
878 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
879 {
880 	kthread_t *t;
881 
882 	ASSERT(MUTEX_HELD(&p->p_lock));
883 
884 	t = prchoose(p);	/* returns locked thread */
885 	ASSERT(t != NULL);
886 	thread_unlock(t);
887 
888 	/* just bzero the process part, prgetlwpstatus() does the rest */
889 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
890 	sp->pr_nlwp = p->p_lwpcnt;
891 	sp->pr_nzomb = p->p_zombcnt;
892 	prassignset(&sp->pr_sigpend, &p->p_sig);
893 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
894 	sp->pr_brksize = p->p_brksize;
895 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
896 	sp->pr_stksize = p->p_stksize;
897 	sp->pr_pid = p->p_pid;
898 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
899 	    (p->p_flag & SZONETOP)) {
900 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
901 		/*
902 		 * Inside local zones, fake zsched's pid as parent pids for
903 		 * processes which reference processes outside of the zone.
904 		 */
905 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
906 	} else {
907 		sp->pr_ppid = p->p_ppid;
908 	}
909 	sp->pr_pgid  = p->p_pgrp;
910 	sp->pr_sid   = p->p_sessp->s_sid;
911 	sp->pr_taskid = p->p_task->tk_tkid;
912 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
913 	sp->pr_zoneid = p->p_zone->zone_id;
914 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
915 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
916 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
917 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
918 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
919 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
920 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
921 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
922 	switch (p->p_model) {
923 	case DATAMODEL_ILP32:
924 		sp->pr_dmodel = PR_MODEL_ILP32;
925 		break;
926 	case DATAMODEL_LP64:
927 		sp->pr_dmodel = PR_MODEL_LP64;
928 		break;
929 	}
930 	if (p->p_agenttp)
931 		sp->pr_agentid = p->p_agenttp->t_tid;
932 
933 	/* get the chosen lwp's status */
934 	prgetlwpstatus(t, &sp->pr_lwp, zp);
935 
936 	/* replicate the flags */
937 	sp->pr_flags = sp->pr_lwp.pr_flags;
938 }
939 
940 /*
941  * Query mask of held signals for a given thread.
942  *
943  * This makes use of schedctl_sigblock() to query if userspace has requested
944  * that all maskable signals be held.  While it would be tempting to call
945  * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
946  * done safely without the risk of racing with the thread under consideration.
947  */
948 void
949 prgethold(kthread_t *t, sigset_t *sp)
950 {
951 	k_sigset_t set;
952 
953 	if (schedctl_sigblock(t)) {
954 		set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
955 		set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
956 		set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
957 	} else {
958 		set = t->t_hold;
959 	}
960 	sigktou(&set, sp);
961 }
962 
963 #ifdef _SYSCALL32_IMPL
964 void
965 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
966 {
967 	proc_t *p = ttoproc(t);
968 	klwp_t *lwp = ttolwp(t);
969 	struct mstate *ms = &lwp->lwp_mstate;
970 	hrtime_t usr, sys;
971 	int flags;
972 	ulong_t instr;
973 
974 	ASSERT(MUTEX_HELD(&p->p_lock));
975 
976 	bzero(sp, sizeof (*sp));
977 	flags = 0L;
978 	if (t->t_state == TS_STOPPED) {
979 		flags |= PR_STOPPED;
980 		if ((t->t_schedflag & TS_PSTART) == 0)
981 			flags |= PR_ISTOP;
982 	} else if (VSTOPPED(t)) {
983 		flags |= PR_STOPPED|PR_ISTOP;
984 	}
985 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
986 		flags |= PR_DSTOP;
987 	if (lwp->lwp_asleep)
988 		flags |= PR_ASLEEP;
989 	if (t == p->p_agenttp)
990 		flags |= PR_AGENT;
991 	if (!(t->t_proc_flag & TP_TWAIT))
992 		flags |= PR_DETACH;
993 	if (t->t_proc_flag & TP_DAEMON)
994 		flags |= PR_DAEMON;
995 	if (p->p_proc_flag & P_PR_FORK)
996 		flags |= PR_FORK;
997 	if (p->p_proc_flag & P_PR_RUNLCL)
998 		flags |= PR_RLC;
999 	if (p->p_proc_flag & P_PR_KILLCL)
1000 		flags |= PR_KLC;
1001 	if (p->p_proc_flag & P_PR_ASYNC)
1002 		flags |= PR_ASYNC;
1003 	if (p->p_proc_flag & P_PR_BPTADJ)
1004 		flags |= PR_BPTADJ;
1005 	if (p->p_proc_flag & P_PR_PTRACE)
1006 		flags |= PR_PTRACE;
1007 	if (p->p_flag & SMSACCT)
1008 		flags |= PR_MSACCT;
1009 	if (p->p_flag & SMSFORK)
1010 		flags |= PR_MSFORK;
1011 	if (p->p_flag & SVFWAIT)
1012 		flags |= PR_VFORKP;
1013 	sp->pr_flags = flags;
1014 	if (VSTOPPED(t)) {
1015 		sp->pr_why   = PR_REQUESTED;
1016 		sp->pr_what  = 0;
1017 	} else {
1018 		sp->pr_why   = t->t_whystop;
1019 		sp->pr_what  = t->t_whatstop;
1020 	}
1021 	sp->pr_lwpid = t->t_tid;
1022 	sp->pr_cursig  = lwp->lwp_cursig;
1023 	prassignset(&sp->pr_lwppend, &t->t_sig);
1024 	prgethold(t, &sp->pr_lwphold);
1025 	if (t->t_whystop == PR_FAULTED) {
1026 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1027 		if (t->t_whatstop == FLTPAGE)
1028 			sp->pr_info.si_addr =
1029 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1030 	} else if (lwp->lwp_curinfo)
1031 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1032 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1033 	    sp->pr_info.si_zoneid != zp->zone_id) {
1034 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1035 		sp->pr_info.si_uid = 0;
1036 		sp->pr_info.si_ctid = -1;
1037 		sp->pr_info.si_zoneid = zp->zone_id;
1038 	}
1039 	sp->pr_altstack.ss_sp =
1040 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1041 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1042 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1043 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1044 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1045 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1046 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1047 	    sizeof (sp->pr_clname) - 1);
1048 	if (flags & PR_STOPPED)
1049 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1050 	usr = ms->ms_acct[LMS_USER];
1051 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1052 	scalehrtime(&usr);
1053 	scalehrtime(&sys);
1054 	hrt2ts32(usr, &sp->pr_utime);
1055 	hrt2ts32(sys, &sp->pr_stime);
1056 
1057 	/*
1058 	 * Fetch the current instruction, if not a system process.
1059 	 * We don't attempt this unless the lwp is stopped.
1060 	 */
1061 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1062 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1063 	else if (!(flags & PR_STOPPED))
1064 		sp->pr_flags |= PR_PCINVAL;
1065 	else if (!prfetchinstr(lwp, &instr))
1066 		sp->pr_flags |= PR_PCINVAL;
1067 	else
1068 		sp->pr_instr = (uint32_t)instr;
1069 
1070 	/*
1071 	 * Drop p_lock while touching the lwp's stack.
1072 	 */
1073 	mutex_exit(&p->p_lock);
1074 	if (prisstep(lwp))
1075 		sp->pr_flags |= PR_STEP;
1076 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1077 		int i;
1078 
1079 		sp->pr_syscall = get_syscall32_args(lwp,
1080 		    (int *)sp->pr_sysarg, &i);
1081 		sp->pr_nsysarg = (ushort_t)i;
1082 	}
1083 	if ((flags & PR_STOPPED) || t == curthread)
1084 		prgetprregs32(lwp, sp->pr_reg);
1085 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1086 	    (flags & PR_VFORKP)) {
1087 		long r1, r2;
1088 		user_t *up;
1089 		auxv_t *auxp;
1090 		int i;
1091 
1092 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1093 		if (sp->pr_errno == 0) {
1094 			sp->pr_rval1 = (int32_t)r1;
1095 			sp->pr_rval2 = (int32_t)r2;
1096 			sp->pr_errpriv = PRIV_NONE;
1097 		} else
1098 			sp->pr_errpriv = lwp->lwp_badpriv;
1099 
1100 		if (t->t_sysnum == SYS_execve) {
1101 			up = PTOU(p);
1102 			sp->pr_sysarg[0] = 0;
1103 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1104 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1105 			for (i = 0, auxp = up->u_auxv;
1106 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1107 			    i++, auxp++) {
1108 				if (auxp->a_type == AT_SUN_EXECNAME) {
1109 					sp->pr_sysarg[0] =
1110 					    (caddr32_t)
1111 					    (uintptr_t)auxp->a_un.a_ptr;
1112 					break;
1113 				}
1114 			}
1115 		}
1116 	}
1117 	if (prhasfp())
1118 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1119 	mutex_enter(&p->p_lock);
1120 }
1121 
1122 void
1123 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1124 {
1125 	kthread_t *t;
1126 
1127 	ASSERT(MUTEX_HELD(&p->p_lock));
1128 
1129 	t = prchoose(p);	/* returns locked thread */
1130 	ASSERT(t != NULL);
1131 	thread_unlock(t);
1132 
1133 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1134 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1135 	sp->pr_nlwp = p->p_lwpcnt;
1136 	sp->pr_nzomb = p->p_zombcnt;
1137 	prassignset(&sp->pr_sigpend, &p->p_sig);
1138 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1139 	sp->pr_brksize = (uint32_t)p->p_brksize;
1140 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1141 	sp->pr_stksize = (uint32_t)p->p_stksize;
1142 	sp->pr_pid   = p->p_pid;
1143 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1144 	    (p->p_flag & SZONETOP)) {
1145 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1146 		/*
1147 		 * Inside local zones, fake zsched's pid as parent pids for
1148 		 * processes which reference processes outside of the zone.
1149 		 */
1150 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1151 	} else {
1152 		sp->pr_ppid = p->p_ppid;
1153 	}
1154 	sp->pr_pgid  = p->p_pgrp;
1155 	sp->pr_sid   = p->p_sessp->s_sid;
1156 	sp->pr_taskid = p->p_task->tk_tkid;
1157 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1158 	sp->pr_zoneid = p->p_zone->zone_id;
1159 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1160 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1161 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1162 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1163 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1164 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1165 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1166 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1167 	switch (p->p_model) {
1168 	case DATAMODEL_ILP32:
1169 		sp->pr_dmodel = PR_MODEL_ILP32;
1170 		break;
1171 	case DATAMODEL_LP64:
1172 		sp->pr_dmodel = PR_MODEL_LP64;
1173 		break;
1174 	}
1175 	if (p->p_agenttp)
1176 		sp->pr_agentid = p->p_agenttp->t_tid;
1177 
1178 	/* get the chosen lwp's status */
1179 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1180 
1181 	/* replicate the flags */
1182 	sp->pr_flags = sp->pr_lwp.pr_flags;
1183 }
1184 #endif	/* _SYSCALL32_IMPL */
1185 
1186 /*
1187  * Return lwp status.
1188  */
1189 void
1190 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1191 {
1192 	proc_t *p = ttoproc(t);
1193 	klwp_t *lwp = ttolwp(t);
1194 	struct mstate *ms = &lwp->lwp_mstate;
1195 	hrtime_t usr, sys;
1196 	int flags;
1197 	ulong_t instr;
1198 
1199 	ASSERT(MUTEX_HELD(&p->p_lock));
1200 
1201 	bzero(sp, sizeof (*sp));
1202 	flags = 0L;
1203 	if (t->t_state == TS_STOPPED) {
1204 		flags |= PR_STOPPED;
1205 		if ((t->t_schedflag & TS_PSTART) == 0)
1206 			flags |= PR_ISTOP;
1207 	} else if (VSTOPPED(t)) {
1208 		flags |= PR_STOPPED|PR_ISTOP;
1209 	}
1210 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1211 		flags |= PR_DSTOP;
1212 	if (lwp->lwp_asleep)
1213 		flags |= PR_ASLEEP;
1214 	if (t == p->p_agenttp)
1215 		flags |= PR_AGENT;
1216 	if (!(t->t_proc_flag & TP_TWAIT))
1217 		flags |= PR_DETACH;
1218 	if (t->t_proc_flag & TP_DAEMON)
1219 		flags |= PR_DAEMON;
1220 	if (p->p_proc_flag & P_PR_FORK)
1221 		flags |= PR_FORK;
1222 	if (p->p_proc_flag & P_PR_RUNLCL)
1223 		flags |= PR_RLC;
1224 	if (p->p_proc_flag & P_PR_KILLCL)
1225 		flags |= PR_KLC;
1226 	if (p->p_proc_flag & P_PR_ASYNC)
1227 		flags |= PR_ASYNC;
1228 	if (p->p_proc_flag & P_PR_BPTADJ)
1229 		flags |= PR_BPTADJ;
1230 	if (p->p_proc_flag & P_PR_PTRACE)
1231 		flags |= PR_PTRACE;
1232 	if (p->p_flag & SMSACCT)
1233 		flags |= PR_MSACCT;
1234 	if (p->p_flag & SMSFORK)
1235 		flags |= PR_MSFORK;
1236 	if (p->p_flag & SVFWAIT)
1237 		flags |= PR_VFORKP;
1238 	if (p->p_pgidp->pid_pgorphaned)
1239 		flags |= PR_ORPHAN;
1240 	if (p->p_pidflag & CLDNOSIGCHLD)
1241 		flags |= PR_NOSIGCHLD;
1242 	if (p->p_pidflag & CLDWAITPID)
1243 		flags |= PR_WAITPID;
1244 	sp->pr_flags = flags;
1245 	if (VSTOPPED(t)) {
1246 		sp->pr_why   = PR_REQUESTED;
1247 		sp->pr_what  = 0;
1248 	} else {
1249 		sp->pr_why   = t->t_whystop;
1250 		sp->pr_what  = t->t_whatstop;
1251 	}
1252 	sp->pr_lwpid = t->t_tid;
1253 	sp->pr_cursig  = lwp->lwp_cursig;
1254 	prassignset(&sp->pr_lwppend, &t->t_sig);
1255 	prgethold(t, &sp->pr_lwphold);
1256 	if (t->t_whystop == PR_FAULTED)
1257 		bcopy(&lwp->lwp_siginfo,
1258 		    &sp->pr_info, sizeof (k_siginfo_t));
1259 	else if (lwp->lwp_curinfo)
1260 		bcopy(&lwp->lwp_curinfo->sq_info,
1261 		    &sp->pr_info, sizeof (k_siginfo_t));
1262 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1263 	    sp->pr_info.si_zoneid != zp->zone_id) {
1264 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1265 		sp->pr_info.si_uid = 0;
1266 		sp->pr_info.si_ctid = -1;
1267 		sp->pr_info.si_zoneid = zp->zone_id;
1268 	}
1269 	sp->pr_altstack = lwp->lwp_sigaltstack;
1270 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1271 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1272 	sp->pr_ustack = lwp->lwp_ustack;
1273 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1274 	    sizeof (sp->pr_clname) - 1);
1275 	if (flags & PR_STOPPED)
1276 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1277 	usr = ms->ms_acct[LMS_USER];
1278 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1279 	scalehrtime(&usr);
1280 	scalehrtime(&sys);
1281 	hrt2ts(usr, &sp->pr_utime);
1282 	hrt2ts(sys, &sp->pr_stime);
1283 
1284 	/*
1285 	 * Fetch the current instruction, if not a system process.
1286 	 * We don't attempt this unless the lwp is stopped.
1287 	 */
1288 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1289 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1290 	else if (!(flags & PR_STOPPED))
1291 		sp->pr_flags |= PR_PCINVAL;
1292 	else if (!prfetchinstr(lwp, &instr))
1293 		sp->pr_flags |= PR_PCINVAL;
1294 	else
1295 		sp->pr_instr = instr;
1296 
1297 	/*
1298 	 * Drop p_lock while touching the lwp's stack.
1299 	 */
1300 	mutex_exit(&p->p_lock);
1301 	if (prisstep(lwp))
1302 		sp->pr_flags |= PR_STEP;
1303 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1304 		int i;
1305 
1306 		sp->pr_syscall = get_syscall_args(lwp,
1307 		    (long *)sp->pr_sysarg, &i);
1308 		sp->pr_nsysarg = (ushort_t)i;
1309 	}
1310 	if ((flags & PR_STOPPED) || t == curthread)
1311 		prgetprregs(lwp, sp->pr_reg);
1312 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1313 	    (flags & PR_VFORKP)) {
1314 		user_t *up;
1315 		auxv_t *auxp;
1316 		int i;
1317 
1318 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1319 		if (sp->pr_errno == 0)
1320 			sp->pr_errpriv = PRIV_NONE;
1321 		else
1322 			sp->pr_errpriv = lwp->lwp_badpriv;
1323 
1324 		if (t->t_sysnum == SYS_execve) {
1325 			up = PTOU(p);
1326 			sp->pr_sysarg[0] = 0;
1327 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1328 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1329 			for (i = 0, auxp = up->u_auxv;
1330 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1331 			    i++, auxp++) {
1332 				if (auxp->a_type == AT_SUN_EXECNAME) {
1333 					sp->pr_sysarg[0] =
1334 					    (uintptr_t)auxp->a_un.a_ptr;
1335 					break;
1336 				}
1337 			}
1338 		}
1339 	}
1340 	if (prhasfp())
1341 		prgetprfpregs(lwp, &sp->pr_fpreg);
1342 	mutex_enter(&p->p_lock);
1343 }
1344 
1345 /*
1346  * Get the sigaction structure for the specified signal.  The u-block
1347  * must already have been mapped in by the caller.
1348  */
1349 void
1350 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1351 {
1352 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1353 
1354 	bzero(sp, sizeof (*sp));
1355 
1356 	if (sig != 0 && (unsigned)sig < nsig) {
1357 		sp->sa_handler = up->u_signal[sig-1];
1358 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1359 		if (sigismember(&up->u_sigonstack, sig))
1360 			sp->sa_flags |= SA_ONSTACK;
1361 		if (sigismember(&up->u_sigresethand, sig))
1362 			sp->sa_flags |= SA_RESETHAND;
1363 		if (sigismember(&up->u_sigrestart, sig))
1364 			sp->sa_flags |= SA_RESTART;
1365 		if (sigismember(&p->p_siginfo, sig))
1366 			sp->sa_flags |= SA_SIGINFO;
1367 		if (sigismember(&up->u_signodefer, sig))
1368 			sp->sa_flags |= SA_NODEFER;
1369 		if (sig == SIGCLD) {
1370 			if (p->p_flag & SNOWAIT)
1371 				sp->sa_flags |= SA_NOCLDWAIT;
1372 			if ((p->p_flag & SJCTL) == 0)
1373 				sp->sa_flags |= SA_NOCLDSTOP;
1374 		}
1375 	}
1376 }
1377 
1378 #ifdef _SYSCALL32_IMPL
1379 void
1380 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1381 {
1382 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1383 
1384 	bzero(sp, sizeof (*sp));
1385 
1386 	if (sig != 0 && (unsigned)sig < nsig) {
1387 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1388 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1389 		if (sigismember(&up->u_sigonstack, sig))
1390 			sp->sa_flags |= SA_ONSTACK;
1391 		if (sigismember(&up->u_sigresethand, sig))
1392 			sp->sa_flags |= SA_RESETHAND;
1393 		if (sigismember(&up->u_sigrestart, sig))
1394 			sp->sa_flags |= SA_RESTART;
1395 		if (sigismember(&p->p_siginfo, sig))
1396 			sp->sa_flags |= SA_SIGINFO;
1397 		if (sigismember(&up->u_signodefer, sig))
1398 			sp->sa_flags |= SA_NODEFER;
1399 		if (sig == SIGCLD) {
1400 			if (p->p_flag & SNOWAIT)
1401 				sp->sa_flags |= SA_NOCLDWAIT;
1402 			if ((p->p_flag & SJCTL) == 0)
1403 				sp->sa_flags |= SA_NOCLDSTOP;
1404 		}
1405 	}
1406 }
1407 #endif	/* _SYSCALL32_IMPL */
1408 
1409 /*
1410  * Count the number of segments in this process's address space.
1411  */
1412 uint_t
1413 prnsegs(struct as *as, int reserved)
1414 {
1415 	uint_t n = 0;
1416 	struct seg *seg;
1417 
1418 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1419 
1420 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1421 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1422 		caddr_t saddr, naddr;
1423 		void *tmp = NULL;
1424 
1425 		if ((seg->s_flags & S_HOLE) != 0) {
1426 			continue;
1427 		}
1428 
1429 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1430 			(void) pr_getprot(seg, reserved, &tmp,
1431 			    &saddr, &naddr, eaddr);
1432 			if (saddr != naddr) {
1433 				n++;
1434 				/*
1435 				 * prnsegs() was formerly designated to return
1436 				 * an 'int' despite having no ability or use
1437 				 * for negative results.  As part of changing
1438 				 * it to 'uint_t', keep the old effective limit
1439 				 * of INT_MAX in place.
1440 				 */
1441 				if (n == INT_MAX) {
1442 					pr_getprot_done(&tmp);
1443 					ASSERT(tmp == NULL);
1444 					return (n);
1445 				}
1446 			}
1447 		}
1448 
1449 		ASSERT(tmp == NULL);
1450 	}
1451 
1452 	return (n);
1453 }
1454 
1455 /*
1456  * Convert uint32_t to decimal string w/o leading zeros.
1457  * Add trailing null characters if 'len' is greater than string length.
1458  * Return the string length.
1459  */
1460 int
1461 pr_u32tos(uint32_t n, char *s, int len)
1462 {
1463 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1464 	char *cp = cbuf;
1465 	char *end = s + len;
1466 
1467 	do {
1468 		*cp++ = (char)(n % 10 + '0');
1469 		n /= 10;
1470 	} while (n);
1471 
1472 	len = (int)(cp - cbuf);
1473 
1474 	do {
1475 		*s++ = *--cp;
1476 	} while (cp > cbuf);
1477 
1478 	while (s < end)		/* optional pad */
1479 		*s++ = '\0';
1480 
1481 	return (len);
1482 }
1483 
1484 /*
1485  * Convert uint64_t to decimal string w/o leading zeros.
1486  * Return the string length.
1487  */
1488 static int
1489 pr_u64tos(uint64_t n, char *s)
1490 {
1491 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1492 	char *cp = cbuf;
1493 	int len;
1494 
1495 	do {
1496 		*cp++ = (char)(n % 10 + '0');
1497 		n /= 10;
1498 	} while (n);
1499 
1500 	len = (int)(cp - cbuf);
1501 
1502 	do {
1503 		*s++ = *--cp;
1504 	} while (cp > cbuf);
1505 
1506 	return (len);
1507 }
1508 
1509 /*
1510  * Similar to getf() / getf_gen(), but for the specified process.  On success,
1511  * returns the fp with fp->f_count incremented.  The caller MUST call
1512  * closef(fp) on the returned fp after completing any actions using that fp.
1513  * We return a reference-held (fp->f_count bumped) file_t so no other closef()
1514  * can invoke destructive VOP_CLOSE actions while we're inspecting the
1515  * process's FD.
1516  *
1517  * Returns NULL for errors: either an empty process-table slot post-fi_lock
1518  * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock.
1519  * Both failure modes have DTrace probes.
1520  *
1521  * The current design of the procfs "close" code path uses the following lock
1522  * order of:
1523  *
1524  *   1: (file_t) f_tlock
1525  *   2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK
1526  *
1527  * That happens because closef() holds f_tlock while calling fop_close(),
1528  * which can be prclose(), which currently waits on and sets P_PR_LOCK at its
1529  * beginning.
1530  *
1531  * That lock order creates a challenge for pr_getf, which needs to take those
1532  * locks in the opposite order when the fd points to a procfs file descriptor.
1533  * The solution chosen here is to use mutex_tryenter on f_tlock and retry some
1534  * (limited) number of times, failing if we don't get both locks.
1535  *
1536  * The cases where this can fail are rare, and all involve a procfs caller
1537  * asking for info (eg. FDINFO) on another procfs FD.  In these cases,
1538  * returning EBADF (which results from a NULL return from pr_getf()) is
1539  * acceptable.
1540  *
1541  * One can increase the number of tries in pr_getf_maxtries if one is worried
1542  * about the contentuous case.
1543  */
1544 
1545 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */
1546 int pr_getf_maxtries = 3;  /* So you can tune it from /etc/system */
1547 
1548 file_t *
1549 pr_getf(proc_t *p, uint_t fd, short *flag)
1550 {
1551 	uf_entry_t *ufp;
1552 	uf_info_t *fip;
1553 	file_t *fp;
1554 	int tries = 0;
1555 
1556 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1557 
1558 retry:
1559 	fip = P_FINFO(p);
1560 
1561 	if (fd >= fip->fi_nfiles)
1562 		return (NULL);
1563 
1564 	mutex_exit(&p->p_lock);
1565 	mutex_enter(&fip->fi_lock);
1566 	UF_ENTER(ufp, fip, fd);
1567 	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1568 		if (mutex_tryenter(&fp->f_tlock)) {
1569 			ASSERT(fp->f_count > 0);
1570 			fp->f_count++;
1571 			mutex_exit(&fp->f_tlock);
1572 			if (flag != NULL)
1573 				*flag = ufp->uf_flag;
1574 		} else {
1575 			/*
1576 			 * Note the number of mutex_trylock attempts.
1577 			 *
1578 			 * The exit path will catch this and try again if we
1579 			 * are below the retry threshhold (pr_getf_maxtries).
1580 			 */
1581 			tries++;
1582 			pr_getf_tryfails++;
1583 			/*
1584 			 * If we hit pr_getf_maxtries, we'll return NULL.
1585 			 * DTrace scripts looking for this sort of failure
1586 			 * should check when arg1 is pr_getf_maxtries.
1587 			 */
1588 			DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int,
1589 			    tries);
1590 			fp = NULL;
1591 		}
1592 	} else {
1593 		fp = NULL;
1594 		/* If we fail here, someone else closed this FD. */
1595 		DTRACE_PROBE1(pr_getf_emptyslot, int, tries);
1596 		tries = pr_getf_maxtries; /* Don't bother retrying. */
1597 	}
1598 	UF_EXIT(ufp);
1599 	mutex_exit(&fip->fi_lock);
1600 	mutex_enter(&p->p_lock);
1601 
1602 	/* Use goto instead of tail-recursion so we can keep "tries" around. */
1603 	if (fp == NULL) {
1604 		/* "tries" starts at 1. */
1605 		if (tries < pr_getf_maxtries)
1606 			goto retry;
1607 	} else {
1608 		/*
1609 		 * Probes here will detect successes after arg1's number of
1610 		 * mutex_tryenter() calls.
1611 		 */
1612 		DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1);
1613 	}
1614 
1615 	return (fp);
1616 }
1617 
1618 void
1619 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1620 {
1621 	char *s = name;
1622 	struct vfs *vfsp;
1623 	struct vfssw *vfsswp;
1624 
1625 	if ((vfsp = vp->v_vfsp) != NULL &&
1626 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1627 	    *vfsswp->vsw_name) {
1628 		(void) strcpy(s, vfsswp->vsw_name);
1629 		s += strlen(s);
1630 		*s++ = '.';
1631 	}
1632 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1633 	*s++ = '.';
1634 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1635 	*s++ = '.';
1636 	s += pr_u64tos(vattr->va_nodeid, s);
1637 	*s++ = '\0';
1638 }
1639 
1640 struct seg *
1641 break_seg(proc_t *p)
1642 {
1643 	caddr_t addr = p->p_brkbase;
1644 	struct seg *seg;
1645 	struct vnode *vp;
1646 
1647 	if (p->p_brksize != 0)
1648 		addr += p->p_brksize - 1;
1649 	seg = as_segat(p->p_as, addr);
1650 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1651 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1652 		return (seg);
1653 	return (NULL);
1654 }
1655 
1656 /*
1657  * Implementation of service functions to handle procfs generic chained
1658  * copyout buffers.
1659  */
1660 typedef struct pr_iobuf_list {
1661 	list_node_t	piol_link;	/* buffer linkage */
1662 	size_t		piol_size;	/* total size (header + data) */
1663 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1664 } piol_t;
1665 
1666 #define	MAPSIZE	(64 * 1024)
1667 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1668 
1669 void
1670 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1671 {
1672 	piol_t	*iol;
1673 	size_t	initial_size = MIN(1, n) * itemsize;
1674 
1675 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1676 
1677 	ASSERT(list_head(iolhead) == NULL);
1678 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1679 	ASSERT(initial_size > 0);
1680 
1681 	/*
1682 	 * Someone creating chained copyout buffers may ask for less than
1683 	 * MAPSIZE if the amount of data to be buffered is known to be
1684 	 * smaller than that.
1685 	 * But in order to prevent involuntary self-denial of service,
1686 	 * the requested input size is clamped at MAPSIZE.
1687 	 */
1688 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1689 	iol = kmem_alloc(initial_size, KM_SLEEP);
1690 	list_insert_head(iolhead, iol);
1691 	iol->piol_usedsize = 0;
1692 	iol->piol_size = initial_size;
1693 }
1694 
1695 void *
1696 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1697 {
1698 	piol_t	*iol;
1699 	char	*new;
1700 
1701 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1702 	ASSERT(list_head(iolhead) != NULL);
1703 
1704 	iol = (piol_t *)list_tail(iolhead);
1705 
1706 	if (iol->piol_size <
1707 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1708 		/*
1709 		 * Out of space in the current buffer. Allocate more.
1710 		 */
1711 		piol_t *newiol;
1712 
1713 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1714 		newiol->piol_size = MAPSIZE;
1715 		newiol->piol_usedsize = 0;
1716 
1717 		list_insert_after(iolhead, iol, newiol);
1718 		iol = list_next(iolhead, iol);
1719 		ASSERT(iol == newiol);
1720 	}
1721 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1722 	iol->piol_usedsize += itemsize;
1723 	bzero(new, itemsize);
1724 	return (new);
1725 }
1726 
1727 void
1728 pr_iol_freelist(list_t *iolhead)
1729 {
1730 	piol_t	*iol;
1731 
1732 	while ((iol = list_head(iolhead)) != NULL) {
1733 		list_remove(iolhead, iol);
1734 		kmem_free(iol, iol->piol_size);
1735 	}
1736 	list_destroy(iolhead);
1737 }
1738 
1739 int
1740 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1741 {
1742 	int error = errin;
1743 	piol_t	*iol;
1744 
1745 	while ((iol = list_head(iolhead)) != NULL) {
1746 		list_remove(iolhead, iol);
1747 		if (!error) {
1748 			if (copyout(PIOL_DATABUF(iol), *tgt,
1749 			    iol->piol_usedsize))
1750 				error = EFAULT;
1751 			*tgt += iol->piol_usedsize;
1752 		}
1753 		kmem_free(iol, iol->piol_size);
1754 	}
1755 	list_destroy(iolhead);
1756 
1757 	return (error);
1758 }
1759 
1760 int
1761 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1762 {
1763 	offset_t	off = uiop->uio_offset;
1764 	char		*base;
1765 	size_t		size;
1766 	piol_t		*iol;
1767 	int		error = errin;
1768 
1769 	while ((iol = list_head(iolhead)) != NULL) {
1770 		list_remove(iolhead, iol);
1771 		base = PIOL_DATABUF(iol);
1772 		size = iol->piol_usedsize;
1773 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1774 			error = uiomove(base + off, size - off,
1775 			    UIO_READ, uiop);
1776 		off = MAX(0, off - (offset_t)size);
1777 		kmem_free(iol, iol->piol_size);
1778 	}
1779 	list_destroy(iolhead);
1780 
1781 	return (error);
1782 }
1783 
1784 /*
1785  * Return an array of structures with memory map information.
1786  * We allocate here; the caller must deallocate.
1787  */
1788 int
1789 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1790 {
1791 	struct as *as = p->p_as;
1792 	prmap_t *mp;
1793 	struct seg *seg;
1794 	struct seg *brkseg, *stkseg;
1795 	struct vnode *vp;
1796 	struct vattr vattr;
1797 	uint_t prot;
1798 
1799 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1800 
1801 	/*
1802 	 * Request an initial buffer size that doesn't waste memory
1803 	 * if the address space has only a small number of segments.
1804 	 */
1805 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1806 
1807 	if ((seg = AS_SEGFIRST(as)) == NULL)
1808 		return (0);
1809 
1810 	brkseg = break_seg(p);
1811 	stkseg = as_segat(as, prgetstackbase(p));
1812 
1813 	do {
1814 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1815 		caddr_t saddr, naddr;
1816 		void *tmp = NULL;
1817 
1818 		if ((seg->s_flags & S_HOLE) != 0) {
1819 			continue;
1820 		}
1821 
1822 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1823 			prot = pr_getprot(seg, reserved, &tmp,
1824 			    &saddr, &naddr, eaddr);
1825 			if (saddr == naddr)
1826 				continue;
1827 
1828 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1829 
1830 			mp->pr_vaddr = (uintptr_t)saddr;
1831 			mp->pr_size = naddr - saddr;
1832 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1833 			mp->pr_mflags = 0;
1834 			if (prot & PROT_READ)
1835 				mp->pr_mflags |= MA_READ;
1836 			if (prot & PROT_WRITE)
1837 				mp->pr_mflags |= MA_WRITE;
1838 			if (prot & PROT_EXEC)
1839 				mp->pr_mflags |= MA_EXEC;
1840 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1841 				mp->pr_mflags |= MA_SHARED;
1842 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1843 				mp->pr_mflags |= MA_NORESERVE;
1844 			if (seg->s_ops == &segspt_shmops ||
1845 			    (seg->s_ops == &segvn_ops &&
1846 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1847 				mp->pr_mflags |= MA_ANON;
1848 			if (seg == brkseg)
1849 				mp->pr_mflags |= MA_BREAK;
1850 			else if (seg == stkseg) {
1851 				mp->pr_mflags |= MA_STACK;
1852 				if (reserved) {
1853 					size_t maxstack =
1854 					    ((size_t)p->p_stk_ctl +
1855 					    PAGEOFFSET) & PAGEMASK;
1856 					mp->pr_vaddr =
1857 					    (uintptr_t)prgetstackbase(p) +
1858 					    p->p_stksize - maxstack;
1859 					mp->pr_size = (uintptr_t)naddr -
1860 					    mp->pr_vaddr;
1861 				}
1862 			}
1863 			if (seg->s_ops == &segspt_shmops)
1864 				mp->pr_mflags |= MA_ISM | MA_SHM;
1865 			mp->pr_pagesize = PAGESIZE;
1866 
1867 			/*
1868 			 * Manufacture a filename for the "object" directory.
1869 			 */
1870 			vattr.va_mask = AT_FSID|AT_NODEID;
1871 			if (seg->s_ops == &segvn_ops &&
1872 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1873 			    vp != NULL && vp->v_type == VREG &&
1874 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1875 				if (vp == p->p_exec)
1876 					(void) strcpy(mp->pr_mapname, "a.out");
1877 				else
1878 					pr_object_name(mp->pr_mapname,
1879 					    vp, &vattr);
1880 			}
1881 
1882 			/*
1883 			 * Get the SysV shared memory id, if any.
1884 			 */
1885 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1886 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1887 			    SHMID_NONE) {
1888 				if (mp->pr_shmid == SHMID_FREE)
1889 					mp->pr_shmid = -1;
1890 
1891 				mp->pr_mflags |= MA_SHM;
1892 			} else {
1893 				mp->pr_shmid = -1;
1894 			}
1895 		}
1896 		ASSERT(tmp == NULL);
1897 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1898 
1899 	return (0);
1900 }
1901 
1902 #ifdef _SYSCALL32_IMPL
1903 int
1904 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1905 {
1906 	struct as *as = p->p_as;
1907 	prmap32_t *mp;
1908 	struct seg *seg;
1909 	struct seg *brkseg, *stkseg;
1910 	struct vnode *vp;
1911 	struct vattr vattr;
1912 	uint_t prot;
1913 
1914 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1915 
1916 	/*
1917 	 * Request an initial buffer size that doesn't waste memory
1918 	 * if the address space has only a small number of segments.
1919 	 */
1920 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1921 
1922 	if ((seg = AS_SEGFIRST(as)) == NULL)
1923 		return (0);
1924 
1925 	brkseg = break_seg(p);
1926 	stkseg = as_segat(as, prgetstackbase(p));
1927 
1928 	do {
1929 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1930 		caddr_t saddr, naddr;
1931 		void *tmp = NULL;
1932 
1933 		if ((seg->s_flags & S_HOLE) != 0) {
1934 			continue;
1935 		}
1936 
1937 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1938 			prot = pr_getprot(seg, reserved, &tmp,
1939 			    &saddr, &naddr, eaddr);
1940 			if (saddr == naddr)
1941 				continue;
1942 
1943 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1944 
1945 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1946 			mp->pr_size = (size32_t)(naddr - saddr);
1947 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1948 			mp->pr_mflags = 0;
1949 			if (prot & PROT_READ)
1950 				mp->pr_mflags |= MA_READ;
1951 			if (prot & PROT_WRITE)
1952 				mp->pr_mflags |= MA_WRITE;
1953 			if (prot & PROT_EXEC)
1954 				mp->pr_mflags |= MA_EXEC;
1955 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1956 				mp->pr_mflags |= MA_SHARED;
1957 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1958 				mp->pr_mflags |= MA_NORESERVE;
1959 			if (seg->s_ops == &segspt_shmops ||
1960 			    (seg->s_ops == &segvn_ops &&
1961 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1962 				mp->pr_mflags |= MA_ANON;
1963 			if (seg == brkseg)
1964 				mp->pr_mflags |= MA_BREAK;
1965 			else if (seg == stkseg) {
1966 				mp->pr_mflags |= MA_STACK;
1967 				if (reserved) {
1968 					size_t maxstack =
1969 					    ((size_t)p->p_stk_ctl +
1970 					    PAGEOFFSET) & PAGEMASK;
1971 					uintptr_t vaddr =
1972 					    (uintptr_t)prgetstackbase(p) +
1973 					    p->p_stksize - maxstack;
1974 					mp->pr_vaddr = (caddr32_t)vaddr;
1975 					mp->pr_size = (size32_t)
1976 					    ((uintptr_t)naddr - vaddr);
1977 				}
1978 			}
1979 			if (seg->s_ops == &segspt_shmops)
1980 				mp->pr_mflags |= MA_ISM | MA_SHM;
1981 			mp->pr_pagesize = PAGESIZE;
1982 
1983 			/*
1984 			 * Manufacture a filename for the "object" directory.
1985 			 */
1986 			vattr.va_mask = AT_FSID|AT_NODEID;
1987 			if (seg->s_ops == &segvn_ops &&
1988 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1989 			    vp != NULL && vp->v_type == VREG &&
1990 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1991 				if (vp == p->p_exec)
1992 					(void) strcpy(mp->pr_mapname, "a.out");
1993 				else
1994 					pr_object_name(mp->pr_mapname,
1995 					    vp, &vattr);
1996 			}
1997 
1998 			/*
1999 			 * Get the SysV shared memory id, if any.
2000 			 */
2001 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
2002 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
2003 			    SHMID_NONE) {
2004 				if (mp->pr_shmid == SHMID_FREE)
2005 					mp->pr_shmid = -1;
2006 
2007 				mp->pr_mflags |= MA_SHM;
2008 			} else {
2009 				mp->pr_shmid = -1;
2010 			}
2011 		}
2012 		ASSERT(tmp == NULL);
2013 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2014 
2015 	return (0);
2016 }
2017 #endif	/* _SYSCALL32_IMPL */
2018 
2019 /*
2020  * Return the size of the /proc page data file.
2021  */
2022 size_t
2023 prpdsize(struct as *as)
2024 {
2025 	struct seg *seg;
2026 	size_t size;
2027 
2028 	ASSERT(as != &kas && AS_WRITE_HELD(as));
2029 
2030 	if ((seg = AS_SEGFIRST(as)) == NULL)
2031 		return (0);
2032 
2033 	size = sizeof (prpageheader_t);
2034 	do {
2035 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2036 		caddr_t saddr, naddr;
2037 		void *tmp = NULL;
2038 		size_t npage;
2039 
2040 		if ((seg->s_flags & S_HOLE) != 0) {
2041 			continue;
2042 		}
2043 
2044 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2045 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2046 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2047 				size += sizeof (prasmap_t) + round8(npage);
2048 		}
2049 		ASSERT(tmp == NULL);
2050 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2051 
2052 	return (size);
2053 }
2054 
2055 #ifdef _SYSCALL32_IMPL
2056 size_t
2057 prpdsize32(struct as *as)
2058 {
2059 	struct seg *seg;
2060 	size_t size;
2061 
2062 	ASSERT(as != &kas && AS_WRITE_HELD(as));
2063 
2064 	if ((seg = AS_SEGFIRST(as)) == NULL)
2065 		return (0);
2066 
2067 	size = sizeof (prpageheader32_t);
2068 	do {
2069 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2070 		caddr_t saddr, naddr;
2071 		void *tmp = NULL;
2072 		size_t npage;
2073 
2074 		if ((seg->s_flags & S_HOLE) != 0) {
2075 			continue;
2076 		}
2077 
2078 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2079 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2080 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2081 				size += sizeof (prasmap32_t) + round8(npage);
2082 		}
2083 		ASSERT(tmp == NULL);
2084 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2085 
2086 	return (size);
2087 }
2088 #endif	/* _SYSCALL32_IMPL */
2089 
2090 /*
2091  * Read page data information.
2092  */
2093 int
2094 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2095 {
2096 	struct as *as = p->p_as;
2097 	caddr_t buf;
2098 	size_t size;
2099 	prpageheader_t *php;
2100 	prasmap_t *pmp;
2101 	struct seg *seg;
2102 	int error;
2103 
2104 again:
2105 	AS_LOCK_ENTER(as, RW_WRITER);
2106 
2107 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2108 		AS_LOCK_EXIT(as);
2109 		return (0);
2110 	}
2111 	size = prpdsize(as);
2112 	if (uiop->uio_resid < size) {
2113 		AS_LOCK_EXIT(as);
2114 		return (E2BIG);
2115 	}
2116 
2117 	buf = kmem_zalloc(size, KM_SLEEP);
2118 	php = (prpageheader_t *)buf;
2119 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2120 
2121 	hrt2ts(gethrtime(), &php->pr_tstamp);
2122 	php->pr_nmap = 0;
2123 	php->pr_npage = 0;
2124 	do {
2125 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2126 		caddr_t saddr, naddr;
2127 		void *tmp = NULL;
2128 
2129 		if ((seg->s_flags & S_HOLE) != 0) {
2130 			continue;
2131 		}
2132 
2133 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2134 			struct vnode *vp;
2135 			struct vattr vattr;
2136 			size_t len;
2137 			size_t npage;
2138 			uint_t prot;
2139 			uintptr_t next;
2140 
2141 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2142 			if ((len = (size_t)(naddr - saddr)) == 0)
2143 				continue;
2144 			npage = len / PAGESIZE;
2145 			next = (uintptr_t)(pmp + 1) + round8(npage);
2146 			/*
2147 			 * It's possible that the address space can change
2148 			 * subtlely even though we're holding as->a_lock
2149 			 * due to the nondeterminism of page_exists() in
2150 			 * the presence of asychronously flushed pages or
2151 			 * mapped files whose sizes are changing.
2152 			 * page_exists() may be called indirectly from
2153 			 * pr_getprot() by a SEGOP_INCORE() routine.
2154 			 * If this happens we need to make sure we don't
2155 			 * overrun the buffer whose size we computed based
2156 			 * on the initial iteration through the segments.
2157 			 * Once we've detected an overflow, we need to clean
2158 			 * up the temporary memory allocated in pr_getprot()
2159 			 * and retry. If there's a pending signal, we return
2160 			 * EINTR so that this thread can be dislodged if
2161 			 * a latent bug causes us to spin indefinitely.
2162 			 */
2163 			if (next > (uintptr_t)buf + size) {
2164 				pr_getprot_done(&tmp);
2165 				AS_LOCK_EXIT(as);
2166 
2167 				kmem_free(buf, size);
2168 
2169 				if (ISSIG(curthread, JUSTLOOKING))
2170 					return (EINTR);
2171 
2172 				goto again;
2173 			}
2174 
2175 			php->pr_nmap++;
2176 			php->pr_npage += npage;
2177 			pmp->pr_vaddr = (uintptr_t)saddr;
2178 			pmp->pr_npage = npage;
2179 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2180 			pmp->pr_mflags = 0;
2181 			if (prot & PROT_READ)
2182 				pmp->pr_mflags |= MA_READ;
2183 			if (prot & PROT_WRITE)
2184 				pmp->pr_mflags |= MA_WRITE;
2185 			if (prot & PROT_EXEC)
2186 				pmp->pr_mflags |= MA_EXEC;
2187 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2188 				pmp->pr_mflags |= MA_SHARED;
2189 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2190 				pmp->pr_mflags |= MA_NORESERVE;
2191 			if (seg->s_ops == &segspt_shmops ||
2192 			    (seg->s_ops == &segvn_ops &&
2193 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2194 				pmp->pr_mflags |= MA_ANON;
2195 			if (seg->s_ops == &segspt_shmops)
2196 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2197 			pmp->pr_pagesize = PAGESIZE;
2198 			/*
2199 			 * Manufacture a filename for the "object" directory.
2200 			 */
2201 			vattr.va_mask = AT_FSID|AT_NODEID;
2202 			if (seg->s_ops == &segvn_ops &&
2203 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2204 			    vp != NULL && vp->v_type == VREG &&
2205 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2206 				if (vp == p->p_exec)
2207 					(void) strcpy(pmp->pr_mapname, "a.out");
2208 				else
2209 					pr_object_name(pmp->pr_mapname,
2210 					    vp, &vattr);
2211 			}
2212 
2213 			/*
2214 			 * Get the SysV shared memory id, if any.
2215 			 */
2216 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2217 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2218 			    SHMID_NONE) {
2219 				if (pmp->pr_shmid == SHMID_FREE)
2220 					pmp->pr_shmid = -1;
2221 
2222 				pmp->pr_mflags |= MA_SHM;
2223 			} else {
2224 				pmp->pr_shmid = -1;
2225 			}
2226 
2227 			hat_getstat(as, saddr, len, hatid,
2228 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2229 			pmp = (prasmap_t *)next;
2230 		}
2231 		ASSERT(tmp == NULL);
2232 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2233 
2234 	AS_LOCK_EXIT(as);
2235 
2236 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2237 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2238 	kmem_free(buf, size);
2239 
2240 	return (error);
2241 }
2242 
2243 #ifdef _SYSCALL32_IMPL
2244 int
2245 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2246 {
2247 	struct as *as = p->p_as;
2248 	caddr_t buf;
2249 	size_t size;
2250 	prpageheader32_t *php;
2251 	prasmap32_t *pmp;
2252 	struct seg *seg;
2253 	int error;
2254 
2255 again:
2256 	AS_LOCK_ENTER(as, RW_WRITER);
2257 
2258 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2259 		AS_LOCK_EXIT(as);
2260 		return (0);
2261 	}
2262 	size = prpdsize32(as);
2263 	if (uiop->uio_resid < size) {
2264 		AS_LOCK_EXIT(as);
2265 		return (E2BIG);
2266 	}
2267 
2268 	buf = kmem_zalloc(size, KM_SLEEP);
2269 	php = (prpageheader32_t *)buf;
2270 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2271 
2272 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2273 	php->pr_nmap = 0;
2274 	php->pr_npage = 0;
2275 	do {
2276 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2277 		caddr_t saddr, naddr;
2278 		void *tmp = NULL;
2279 
2280 		if ((seg->s_flags & S_HOLE) != 0) {
2281 			continue;
2282 		}
2283 
2284 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2285 			struct vnode *vp;
2286 			struct vattr vattr;
2287 			size_t len;
2288 			size_t npage;
2289 			uint_t prot;
2290 			uintptr_t next;
2291 
2292 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2293 			if ((len = (size_t)(naddr - saddr)) == 0)
2294 				continue;
2295 			npage = len / PAGESIZE;
2296 			next = (uintptr_t)(pmp + 1) + round8(npage);
2297 			/*
2298 			 * It's possible that the address space can change
2299 			 * subtlely even though we're holding as->a_lock
2300 			 * due to the nondeterminism of page_exists() in
2301 			 * the presence of asychronously flushed pages or
2302 			 * mapped files whose sizes are changing.
2303 			 * page_exists() may be called indirectly from
2304 			 * pr_getprot() by a SEGOP_INCORE() routine.
2305 			 * If this happens we need to make sure we don't
2306 			 * overrun the buffer whose size we computed based
2307 			 * on the initial iteration through the segments.
2308 			 * Once we've detected an overflow, we need to clean
2309 			 * up the temporary memory allocated in pr_getprot()
2310 			 * and retry. If there's a pending signal, we return
2311 			 * EINTR so that this thread can be dislodged if
2312 			 * a latent bug causes us to spin indefinitely.
2313 			 */
2314 			if (next > (uintptr_t)buf + size) {
2315 				pr_getprot_done(&tmp);
2316 				AS_LOCK_EXIT(as);
2317 
2318 				kmem_free(buf, size);
2319 
2320 				if (ISSIG(curthread, JUSTLOOKING))
2321 					return (EINTR);
2322 
2323 				goto again;
2324 			}
2325 
2326 			php->pr_nmap++;
2327 			php->pr_npage += npage;
2328 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2329 			pmp->pr_npage = (size32_t)npage;
2330 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2331 			pmp->pr_mflags = 0;
2332 			if (prot & PROT_READ)
2333 				pmp->pr_mflags |= MA_READ;
2334 			if (prot & PROT_WRITE)
2335 				pmp->pr_mflags |= MA_WRITE;
2336 			if (prot & PROT_EXEC)
2337 				pmp->pr_mflags |= MA_EXEC;
2338 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2339 				pmp->pr_mflags |= MA_SHARED;
2340 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2341 				pmp->pr_mflags |= MA_NORESERVE;
2342 			if (seg->s_ops == &segspt_shmops ||
2343 			    (seg->s_ops == &segvn_ops &&
2344 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2345 				pmp->pr_mflags |= MA_ANON;
2346 			if (seg->s_ops == &segspt_shmops)
2347 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2348 			pmp->pr_pagesize = PAGESIZE;
2349 			/*
2350 			 * Manufacture a filename for the "object" directory.
2351 			 */
2352 			vattr.va_mask = AT_FSID|AT_NODEID;
2353 			if (seg->s_ops == &segvn_ops &&
2354 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2355 			    vp != NULL && vp->v_type == VREG &&
2356 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2357 				if (vp == p->p_exec)
2358 					(void) strcpy(pmp->pr_mapname, "a.out");
2359 				else
2360 					pr_object_name(pmp->pr_mapname,
2361 					    vp, &vattr);
2362 			}
2363 
2364 			/*
2365 			 * Get the SysV shared memory id, if any.
2366 			 */
2367 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2368 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2369 			    SHMID_NONE) {
2370 				if (pmp->pr_shmid == SHMID_FREE)
2371 					pmp->pr_shmid = -1;
2372 
2373 				pmp->pr_mflags |= MA_SHM;
2374 			} else {
2375 				pmp->pr_shmid = -1;
2376 			}
2377 
2378 			hat_getstat(as, saddr, len, hatid,
2379 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2380 			pmp = (prasmap32_t *)next;
2381 		}
2382 		ASSERT(tmp == NULL);
2383 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2384 
2385 	AS_LOCK_EXIT(as);
2386 
2387 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2388 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2389 	kmem_free(buf, size);
2390 
2391 	return (error);
2392 }
2393 #endif	/* _SYSCALL32_IMPL */
2394 
2395 ushort_t
2396 prgetpctcpu(uint64_t pct)
2397 {
2398 	/*
2399 	 * The value returned will be relevant in the zone of the examiner,
2400 	 * which may not be the same as the zone which performed the procfs
2401 	 * mount.
2402 	 */
2403 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2404 
2405 	/*
2406 	 * Prorate over online cpus so we don't exceed 100%
2407 	 */
2408 	if (nonline > 1)
2409 		pct /= nonline;
2410 	pct >>= 16;		/* convert to 16-bit scaled integer */
2411 	if (pct > 0x8000)	/* might happen, due to rounding */
2412 		pct = 0x8000;
2413 	return ((ushort_t)pct);
2414 }
2415 
2416 /*
2417  * Return information used by ps(1).
2418  */
2419 void
2420 prgetpsinfo(proc_t *p, psinfo_t *psp)
2421 {
2422 	kthread_t *t;
2423 	struct cred *cred;
2424 	hrtime_t hrutime, hrstime;
2425 
2426 	ASSERT(MUTEX_HELD(&p->p_lock));
2427 
2428 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2429 		bzero(psp, sizeof (*psp));
2430 	else {
2431 		thread_unlock(t);
2432 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2433 	}
2434 
2435 	/*
2436 	 * only export SSYS and SMSACCT; everything else is off-limits to
2437 	 * userland apps.
2438 	 */
2439 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2440 	psp->pr_nlwp = p->p_lwpcnt;
2441 	psp->pr_nzomb = p->p_zombcnt;
2442 	mutex_enter(&p->p_crlock);
2443 	cred = p->p_cred;
2444 	psp->pr_uid = crgetruid(cred);
2445 	psp->pr_euid = crgetuid(cred);
2446 	psp->pr_gid = crgetrgid(cred);
2447 	psp->pr_egid = crgetgid(cred);
2448 	mutex_exit(&p->p_crlock);
2449 	psp->pr_pid = p->p_pid;
2450 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2451 	    (p->p_flag & SZONETOP)) {
2452 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2453 		/*
2454 		 * Inside local zones, fake zsched's pid as parent pids for
2455 		 * processes which reference processes outside of the zone.
2456 		 */
2457 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2458 	} else {
2459 		psp->pr_ppid = p->p_ppid;
2460 	}
2461 	psp->pr_pgid = p->p_pgrp;
2462 	psp->pr_sid = p->p_sessp->s_sid;
2463 	psp->pr_taskid = p->p_task->tk_tkid;
2464 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2465 	psp->pr_poolid = p->p_pool->pool_id;
2466 	psp->pr_zoneid = p->p_zone->zone_id;
2467 	if ((psp->pr_contract = PRCTID(p)) == 0)
2468 		psp->pr_contract = -1;
2469 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2470 	switch (p->p_model) {
2471 	case DATAMODEL_ILP32:
2472 		psp->pr_dmodel = PR_MODEL_ILP32;
2473 		break;
2474 	case DATAMODEL_LP64:
2475 		psp->pr_dmodel = PR_MODEL_LP64;
2476 		break;
2477 	}
2478 	hrutime = mstate_aggr_state(p, LMS_USER);
2479 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2480 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2481 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2482 
2483 	if (t == NULL) {
2484 		int wcode = p->p_wcode;		/* must be atomic read */
2485 
2486 		if (wcode)
2487 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2488 		psp->pr_ttydev = PRNODEV;
2489 		psp->pr_lwp.pr_state = SZOMB;
2490 		psp->pr_lwp.pr_sname = 'Z';
2491 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2492 		psp->pr_lwp.pr_bindpset = PS_NONE;
2493 	} else {
2494 		user_t *up = PTOU(p);
2495 		struct as *as;
2496 		dev_t d;
2497 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2498 
2499 		d = cttydev(p);
2500 		/*
2501 		 * If the controlling terminal is the real
2502 		 * or workstation console device, map to what the
2503 		 * user thinks is the console device. Handle case when
2504 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2505 		 */
2506 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2507 			d = uconsdev;
2508 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2509 		psp->pr_start = up->u_start;
2510 		bcopy(up->u_comm, psp->pr_fname,
2511 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2512 		bcopy(up->u_psargs, psp->pr_psargs,
2513 		    MIN(PRARGSZ-1, PSARGSZ));
2514 		psp->pr_argc = up->u_argc;
2515 		psp->pr_argv = up->u_argv;
2516 		psp->pr_envp = up->u_envp;
2517 
2518 		/* get the chosen lwp's lwpsinfo */
2519 		prgetlwpsinfo(t, &psp->pr_lwp);
2520 
2521 		/* compute %cpu for the process */
2522 		if (p->p_lwpcnt == 1)
2523 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2524 		else {
2525 			uint64_t pct = 0;
2526 			hrtime_t cur_time = gethrtime_unscaled();
2527 
2528 			t = p->p_tlist;
2529 			do {
2530 				pct += cpu_update_pct(t, cur_time);
2531 			} while ((t = t->t_forw) != p->p_tlist);
2532 
2533 			psp->pr_pctcpu = prgetpctcpu(pct);
2534 		}
2535 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2536 			psp->pr_size = 0;
2537 			psp->pr_rssize = 0;
2538 		} else {
2539 			mutex_exit(&p->p_lock);
2540 			AS_LOCK_ENTER(as, RW_READER);
2541 			psp->pr_size = btopr(as->a_resvsize) *
2542 			    (PAGESIZE / 1024);
2543 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2544 			psp->pr_pctmem = rm_pctmemory(as);
2545 			AS_LOCK_EXIT(as);
2546 			mutex_enter(&p->p_lock);
2547 		}
2548 	}
2549 }
2550 
2551 static size_t
2552 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2553 {
2554 	pr_misc_header_t *misc;
2555 	size_t len;
2556 
2557 	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2558 
2559 	if (data != NULL) {
2560 		misc = pr_iol_newbuf(data, len);
2561 		misc->pr_misc_type = type;
2562 		misc->pr_misc_size = len;
2563 		misc++;
2564 		bcopy((char *)val, (char *)misc, vlen);
2565 	}
2566 
2567 	return (len);
2568 }
2569 
2570 /*
2571  * There's no elegant way to determine if a character device
2572  * supports TLI, so just check a hardcoded list of known TLI
2573  * devices.
2574  */
2575 
2576 static boolean_t
2577 pristli(vnode_t *vp)
2578 {
2579 	static const char *tlidevs[] = {
2580 	    "udp", "udp6", "tcp", "tcp6"
2581 	};
2582 	char *devname;
2583 	uint_t i;
2584 
2585 	ASSERT(vp != NULL);
2586 
2587 	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2588 		return (B_FALSE);
2589 
2590 	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2591 		return (B_FALSE);
2592 
2593 	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2594 		if (strcmp(devname, tlidevs[i]) == 0)
2595 			return (B_TRUE);
2596 	}
2597 
2598 	return (B_FALSE);
2599 }
2600 
2601 static size_t
2602 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2603 {
2604 	char *pathname;
2605 	size_t pathlen;
2606 	size_t sz = 0;
2607 
2608 	/*
2609 	 * The global zone's path to a file in a non-global zone can exceed
2610 	 * MAXPATHLEN.
2611 	 */
2612 	pathlen = MAXPATHLEN * 2 + 1;
2613 	pathname = kmem_alloc(pathlen, KM_SLEEP);
2614 
2615 	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2616 		sz += prfdinfomisc(data, PR_PATHNAME,
2617 		    pathname, strlen(pathname) + 1);
2618 	}
2619 
2620 	kmem_free(pathname, pathlen);
2621 
2622 	return (sz);
2623 }
2624 
2625 static size_t
2626 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2627 {
2628 	strcmd_t strcmd;
2629 	int32_t rval;
2630 	size_t sz = 0;
2631 
2632 	strcmd.sc_cmd = TI_GETMYNAME;
2633 	strcmd.sc_timeout = 1;
2634 	strcmd.sc_len = STRCMDBUFSIZE;
2635 
2636 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2637 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2638 		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2639 		    strcmd.sc_len);
2640 	}
2641 
2642 	strcmd.sc_cmd = TI_GETPEERNAME;
2643 	strcmd.sc_timeout = 1;
2644 	strcmd.sc_len = STRCMDBUFSIZE;
2645 
2646 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2647 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2648 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2649 		    strcmd.sc_len);
2650 	}
2651 
2652 	return (sz);
2653 }
2654 
2655 static size_t
2656 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2657 {
2658 	sonode_t *so;
2659 	socklen_t vlen;
2660 	size_t sz = 0;
2661 	uint_t i;
2662 
2663 	if (vp->v_stream != NULL) {
2664 		so = VTOSO(vp->v_stream->sd_vnode);
2665 
2666 		if (so->so_version == SOV_STREAM)
2667 			so = NULL;
2668 	} else {
2669 		so = VTOSO(vp);
2670 	}
2671 
2672 	if (so == NULL)
2673 		return (0);
2674 
2675 	DTRACE_PROBE1(sonode, sonode_t *, so);
2676 
2677 	/* prmisc - PR_SOCKETNAME */
2678 
2679 	struct sockaddr_storage buf;
2680 	struct sockaddr *name = (struct sockaddr *)&buf;
2681 
2682 	vlen = sizeof (buf);
2683 	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2684 		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2685 
2686 	/* prmisc - PR_PEERSOCKNAME */
2687 
2688 	vlen = sizeof (buf);
2689 	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2690 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2691 
2692 	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2693 
2694 	static struct boolopt {
2695 		int		level;
2696 		int		opt;
2697 		int		bopt;
2698 	} boolopts[] = {
2699 		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
2700 		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
2701 #ifdef SO_REUSEPORT
2702 		/* SmartOS and OmniOS have SO_REUSEPORT */
2703 		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
2704 #endif
2705 		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
2706 		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
2707 		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
2708 		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
2709 		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
2710 		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
2711 		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
2712 		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
2713 		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
2714 		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
2715 		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2716 		    PR_UDP_NAT_T_ENDPOINT }
2717 	};
2718 	prsockopts_bool_opts_t opts;
2719 	int val;
2720 
2721 	if (data != NULL) {
2722 		opts.prsock_bool_opts = 0;
2723 
2724 		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2725 			vlen = sizeof (val);
2726 			if (SOP_GETSOCKOPT(so, boolopts[i].level,
2727 			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2728 			    val != 0) {
2729 				opts.prsock_bool_opts |= boolopts[i].bopt;
2730 			}
2731 		}
2732 	}
2733 
2734 	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2735 
2736 	/* prmisc - PR_SOCKOPT_LINGER */
2737 
2738 	struct linger l;
2739 
2740 	vlen = sizeof (l);
2741 	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2742 	    0, cred) == 0 && vlen > 0) {
2743 		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2744 	}
2745 
2746 	/* prmisc - PR_SOCKOPT_* int types */
2747 
2748 	static struct sopt {
2749 		int		level;
2750 		int		opt;
2751 		int		bopt;
2752 	} sopts[] = {
2753 		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
2754 		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
2755 		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
2756 	};
2757 
2758 	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2759 		vlen = sizeof (val);
2760 		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2761 		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
2762 			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2763 		}
2764 	}
2765 
2766 	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2767 
2768 	in_addr_t nexthop_val;
2769 
2770 	vlen = sizeof (nexthop_val);
2771 	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2772 	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2773 		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2774 		    &nexthop_val, vlen);
2775 	}
2776 
2777 	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2778 
2779 	struct sockaddr_in6 nexthop6_val;
2780 
2781 	vlen = sizeof (nexthop6_val);
2782 	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2783 	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2784 		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2785 		    &nexthop6_val, vlen);
2786 	}
2787 
2788 	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2789 
2790 	char cong[CC_ALGO_NAME_MAX];
2791 
2792 	vlen = sizeof (cong);
2793 	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2794 	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2795 		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2796 	}
2797 
2798 	/* prmisc - PR_SOCKFILTERS_PRIV */
2799 
2800 	struct fil_info fi;
2801 
2802 	vlen = sizeof (fi);
2803 	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2804 	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2805 		pr_misc_header_t *misc;
2806 		size_t len;
2807 
2808 		/*
2809 		 * We limit the number of returned filters to 32.
2810 		 * This is the maximum number that pfiles will print
2811 		 * anyway.
2812 		 */
2813 		vlen = MIN(32, fi.fi_pos + 1);
2814 		vlen *= sizeof (fi);
2815 
2816 		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2817 		sz += len;
2818 
2819 		if (data != NULL) {
2820 			/*
2821 			 * So that the filter list can be built incrementally,
2822 			 * prfdinfomisc() is not used here. Instead we
2823 			 * allocate a buffer directly on the copyout list using
2824 			 * pr_iol_newbuf()
2825 			 */
2826 			misc = pr_iol_newbuf(data, len);
2827 			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2828 			misc->pr_misc_size = len;
2829 			misc++;
2830 			len = vlen;
2831 			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2832 			    misc, &vlen, 0, cred) == 0) {
2833 				/*
2834 				 * In case the number of filters has reduced
2835 				 * since the first call, explicitly zero out
2836 				 * any unpopulated space.
2837 				 */
2838 				if (vlen < len)
2839 					bzero(misc + vlen, len - vlen);
2840 			} else {
2841 				/* Something went wrong, zero out the result */
2842 				bzero(misc, vlen);
2843 			}
2844 		}
2845 	}
2846 
2847 	return (sz);
2848 }
2849 
2850 typedef struct prfdinfo_nm_path_cbdata {
2851 	proc_t		*nmp_p;
2852 	u_offset_t	nmp_sz;
2853 	list_t		*nmp_data;
2854 } prfdinfo_nm_path_cbdata_t;
2855 
2856 static int
2857 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2858 {
2859 	prfdinfo_nm_path_cbdata_t *cb = arg;
2860 
2861 	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2862 
2863 	return (0);
2864 }
2865 
2866 u_offset_t
2867 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2868 {
2869 	u_offset_t sz;
2870 
2871 	/*
2872 	 * All fdinfo files will be at least this big -
2873 	 * sizeof fdinfo struct + zero length trailer
2874 	 */
2875 	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2876 
2877 	/* Pathname */
2878 	switch (vp->v_type) {
2879 	case VDOOR: {
2880 		prfdinfo_nm_path_cbdata_t cb = {
2881 			.nmp_p		= p,
2882 			.nmp_data	= NULL,
2883 			.nmp_sz		= 0
2884 		};
2885 
2886 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2887 		sz += cb.nmp_sz;
2888 		break;
2889 	}
2890 	case VSOCK:
2891 		break;
2892 	default:
2893 		sz += prfdinfopath(p, vp, NULL, cred);
2894 	}
2895 
2896 	/* Socket options */
2897 	if (vp->v_type == VSOCK)
2898 		sz += prfdinfosockopt(vp, NULL, cred);
2899 
2900 	/* TLI/XTI sockets */
2901 	if (pristli(vp))
2902 		sz += prfdinfotlisockopt(vp, NULL, cred);
2903 
2904 	return (sz);
2905 }
2906 
2907 int
2908 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2909     cred_t *file_cred, list_t *data)
2910 {
2911 	vattr_t vattr;
2912 	int error;
2913 
2914 	/*
2915 	 * The buffer has been initialised to zero by pr_iol_newbuf().
2916 	 * Initialise defaults for any values that should not default to zero.
2917 	 */
2918 	fdinfo->pr_uid = (uid_t)-1;
2919 	fdinfo->pr_gid = (gid_t)-1;
2920 	fdinfo->pr_size = -1;
2921 	fdinfo->pr_locktype = F_UNLCK;
2922 	fdinfo->pr_lockpid = -1;
2923 	fdinfo->pr_locksysid = -1;
2924 	fdinfo->pr_peerpid = -1;
2925 
2926 	/* Offset */
2927 
2928 	/*
2929 	 * pr_offset has already been set from the underlying file_t.
2930 	 * Check if it is plausible and reset to -1 if not.
2931 	 */
2932 	if (fdinfo->pr_offset != -1 &&
2933 	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2934 		fdinfo->pr_offset = -1;
2935 
2936 	/*
2937 	 * Attributes
2938 	 *
2939 	 * We have two cred_t structures available here.
2940 	 * 'cred' is the caller's credential, and 'file_cred' is the credential
2941 	 * for the file being inspected.
2942 	 *
2943 	 * When looking up the file attributes, file_cred is used in order
2944 	 * that the correct ownership is set for doors and FIFOs. Since the
2945 	 * caller has permission to read the fdinfo file in proc, this does
2946 	 * not expose any additional information.
2947 	 */
2948 	vattr.va_mask = AT_STAT;
2949 	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
2950 		fdinfo->pr_major = getmajor(vattr.va_fsid);
2951 		fdinfo->pr_minor = getminor(vattr.va_fsid);
2952 		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2953 		fdinfo->pr_rminor = getminor(vattr.va_rdev);
2954 		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2955 		fdinfo->pr_size = (off64_t)vattr.va_size;
2956 		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2957 		fdinfo->pr_uid = vattr.va_uid;
2958 		fdinfo->pr_gid = vattr.va_gid;
2959 		if (vp->v_type == VSOCK)
2960 			fdinfo->pr_fileflags |= sock_getfasync(vp);
2961 	}
2962 
2963 	/* locks */
2964 
2965 	flock64_t bf;
2966 
2967 	bzero(&bf, sizeof (bf));
2968 	bf.l_type = F_WRLCK;
2969 
2970 	if (VOP_FRLOCK(vp, F_GETLK, &bf,
2971 	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
2972 	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
2973 		fdinfo->pr_locktype = bf.l_type;
2974 		fdinfo->pr_lockpid = bf.l_pid;
2975 		fdinfo->pr_locksysid = bf.l_sysid;
2976 	}
2977 
2978 	/* peer cred */
2979 
2980 	k_peercred_t kpc;
2981 
2982 	switch (vp->v_type) {
2983 	case VFIFO:
2984 	case VSOCK: {
2985 		int32_t rval;
2986 
2987 		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
2988 		    FKIOCTL, cred, &rval, NULL);
2989 		break;
2990 	}
2991 	case VCHR: {
2992 		struct strioctl strioc;
2993 		int32_t rval;
2994 
2995 		if (vp->v_stream == NULL) {
2996 			error = ENOTSUP;
2997 			break;
2998 		}
2999 		strioc.ic_cmd = _I_GETPEERCRED;
3000 		strioc.ic_timout = INFTIM;
3001 		strioc.ic_len = (int)sizeof (k_peercred_t);
3002 		strioc.ic_dp = (char *)&kpc;
3003 
3004 		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
3005 		    STR_NOSIG | K_TO_K, cred, &rval);
3006 		break;
3007 	}
3008 	default:
3009 		error = ENOTSUP;
3010 		break;
3011 	}
3012 
3013 	if (error == 0 && kpc.pc_cr != NULL) {
3014 		proc_t *peerp;
3015 
3016 		fdinfo->pr_peerpid = kpc.pc_cpid;
3017 
3018 		crfree(kpc.pc_cr);
3019 
3020 		mutex_enter(&pidlock);
3021 		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
3022 			user_t *up;
3023 
3024 			mutex_enter(&peerp->p_lock);
3025 			mutex_exit(&pidlock);
3026 
3027 			up = PTOU(peerp);
3028 			bcopy(up->u_comm, fdinfo->pr_peername,
3029 			    MIN(sizeof (up->u_comm),
3030 			    sizeof (fdinfo->pr_peername) - 1));
3031 
3032 			mutex_exit(&peerp->p_lock);
3033 		} else {
3034 			mutex_exit(&pidlock);
3035 		}
3036 	}
3037 
3038 	/* pathname */
3039 
3040 	switch (vp->v_type) {
3041 	case VDOOR: {
3042 		prfdinfo_nm_path_cbdata_t cb = {
3043 			.nmp_p		= p,
3044 			.nmp_data	= data,
3045 			.nmp_sz		= 0
3046 		};
3047 
3048 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
3049 		break;
3050 	}
3051 	case VSOCK:
3052 		/*
3053 		 * Don't attempt to determine the path for a socket as the
3054 		 * vnode has no associated v_path. It will cause a linear scan
3055 		 * of the dnlc table and result in no path being found.
3056 		 */
3057 		break;
3058 	default:
3059 		(void) prfdinfopath(p, vp, data, cred);
3060 	}
3061 
3062 	/* socket options */
3063 	if (vp->v_type == VSOCK)
3064 		(void) prfdinfosockopt(vp, data, cred);
3065 
3066 	/* TLI/XTI stream sockets */
3067 	if (pristli(vp))
3068 		(void) prfdinfotlisockopt(vp, data, cred);
3069 
3070 	/*
3071 	 * Add a terminating header with a zero size.
3072 	 */
3073 	pr_misc_header_t *misc;
3074 
3075 	misc = pr_iol_newbuf(data, sizeof (*misc));
3076 	misc->pr_misc_size = 0;
3077 	misc->pr_misc_type = (uint_t)-1;
3078 
3079 	return (0);
3080 }
3081 
3082 #ifdef _SYSCALL32_IMPL
3083 void
3084 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
3085 {
3086 	kthread_t *t;
3087 	struct cred *cred;
3088 	hrtime_t hrutime, hrstime;
3089 
3090 	ASSERT(MUTEX_HELD(&p->p_lock));
3091 
3092 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
3093 		bzero(psp, sizeof (*psp));
3094 	else {
3095 		thread_unlock(t);
3096 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3097 	}
3098 
3099 	/*
3100 	 * only export SSYS and SMSACCT; everything else is off-limits to
3101 	 * userland apps.
3102 	 */
3103 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3104 	psp->pr_nlwp = p->p_lwpcnt;
3105 	psp->pr_nzomb = p->p_zombcnt;
3106 	mutex_enter(&p->p_crlock);
3107 	cred = p->p_cred;
3108 	psp->pr_uid = crgetruid(cred);
3109 	psp->pr_euid = crgetuid(cred);
3110 	psp->pr_gid = crgetrgid(cred);
3111 	psp->pr_egid = crgetgid(cred);
3112 	mutex_exit(&p->p_crlock);
3113 	psp->pr_pid = p->p_pid;
3114 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3115 	    (p->p_flag & SZONETOP)) {
3116 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3117 		/*
3118 		 * Inside local zones, fake zsched's pid as parent pids for
3119 		 * processes which reference processes outside of the zone.
3120 		 */
3121 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3122 	} else {
3123 		psp->pr_ppid = p->p_ppid;
3124 	}
3125 	psp->pr_pgid = p->p_pgrp;
3126 	psp->pr_sid = p->p_sessp->s_sid;
3127 	psp->pr_taskid = p->p_task->tk_tkid;
3128 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
3129 	psp->pr_poolid = p->p_pool->pool_id;
3130 	psp->pr_zoneid = p->p_zone->zone_id;
3131 	if ((psp->pr_contract = PRCTID(p)) == 0)
3132 		psp->pr_contract = -1;
3133 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3134 	switch (p->p_model) {
3135 	case DATAMODEL_ILP32:
3136 		psp->pr_dmodel = PR_MODEL_ILP32;
3137 		break;
3138 	case DATAMODEL_LP64:
3139 		psp->pr_dmodel = PR_MODEL_LP64;
3140 		break;
3141 	}
3142 	hrutime = mstate_aggr_state(p, LMS_USER);
3143 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3144 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3145 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3146 
3147 	if (t == NULL) {
3148 		extern int wstat(int, int);	/* needs a header file */
3149 		int wcode = p->p_wcode;		/* must be atomic read */
3150 
3151 		if (wcode)
3152 			psp->pr_wstat = wstat(wcode, p->p_wdata);
3153 		psp->pr_ttydev = PRNODEV32;
3154 		psp->pr_lwp.pr_state = SZOMB;
3155 		psp->pr_lwp.pr_sname = 'Z';
3156 	} else {
3157 		user_t *up = PTOU(p);
3158 		struct as *as;
3159 		dev_t d;
3160 		extern dev_t rwsconsdev, rconsdev, uconsdev;
3161 
3162 		d = cttydev(p);
3163 		/*
3164 		 * If the controlling terminal is the real
3165 		 * or workstation console device, map to what the
3166 		 * user thinks is the console device. Handle case when
3167 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3168 		 */
3169 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3170 			d = uconsdev;
3171 		(void) cmpldev(&psp->pr_ttydev, d);
3172 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3173 		bcopy(up->u_comm, psp->pr_fname,
3174 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3175 		bcopy(up->u_psargs, psp->pr_psargs,
3176 		    MIN(PRARGSZ-1, PSARGSZ));
3177 		psp->pr_argc = up->u_argc;
3178 		psp->pr_argv = (caddr32_t)up->u_argv;
3179 		psp->pr_envp = (caddr32_t)up->u_envp;
3180 
3181 		/* get the chosen lwp's lwpsinfo */
3182 		prgetlwpsinfo32(t, &psp->pr_lwp);
3183 
3184 		/* compute %cpu for the process */
3185 		if (p->p_lwpcnt == 1)
3186 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3187 		else {
3188 			uint64_t pct = 0;
3189 			hrtime_t cur_time;
3190 
3191 			t = p->p_tlist;
3192 			cur_time = gethrtime_unscaled();
3193 			do {
3194 				pct += cpu_update_pct(t, cur_time);
3195 			} while ((t = t->t_forw) != p->p_tlist);
3196 
3197 			psp->pr_pctcpu = prgetpctcpu(pct);
3198 		}
3199 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3200 			psp->pr_size = 0;
3201 			psp->pr_rssize = 0;
3202 		} else {
3203 			mutex_exit(&p->p_lock);
3204 			AS_LOCK_ENTER(as, RW_READER);
3205 			psp->pr_size = (size32_t)
3206 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3207 			psp->pr_rssize = (size32_t)
3208 			    (rm_asrss(as) * (PAGESIZE / 1024));
3209 			psp->pr_pctmem = rm_pctmemory(as);
3210 			AS_LOCK_EXIT(as);
3211 			mutex_enter(&p->p_lock);
3212 		}
3213 	}
3214 
3215 	/*
3216 	 * If we are looking at an LP64 process, zero out
3217 	 * the fields that cannot be represented in ILP32.
3218 	 */
3219 	if (p->p_model != DATAMODEL_ILP32) {
3220 		psp->pr_size = 0;
3221 		psp->pr_rssize = 0;
3222 		psp->pr_argv = 0;
3223 		psp->pr_envp = 0;
3224 	}
3225 }
3226 
3227 #endif	/* _SYSCALL32_IMPL */
3228 
3229 void
3230 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3231 {
3232 	klwp_t *lwp = ttolwp(t);
3233 	sobj_ops_t *sobj;
3234 	char c, state;
3235 	uint64_t pct;
3236 	int retval, niceval;
3237 	hrtime_t hrutime, hrstime;
3238 
3239 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3240 
3241 	bzero(psp, sizeof (*psp));
3242 
3243 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3244 	psp->pr_lwpid = t->t_tid;
3245 	psp->pr_addr = (uintptr_t)t;
3246 	psp->pr_wchan = (uintptr_t)t->t_wchan;
3247 
3248 	/* map the thread state enum into a process state enum */
3249 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3250 	switch (state) {
3251 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3252 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3253 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3254 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3255 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3256 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3257 	default:		state = 0;		c = '?';	break;
3258 	}
3259 	psp->pr_state = state;
3260 	psp->pr_sname = c;
3261 	if ((sobj = t->t_sobj_ops) != NULL)
3262 		psp->pr_stype = SOBJ_TYPE(sobj);
3263 	retval = CL_DONICE(t, NULL, 0, &niceval);
3264 	if (retval == 0) {
3265 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3266 		psp->pr_nice = niceval + NZERO;
3267 	}
3268 	psp->pr_syscall = t->t_sysnum;
3269 	psp->pr_pri = t->t_pri;
3270 	psp->pr_start.tv_sec = t->t_start;
3271 	psp->pr_start.tv_nsec = 0L;
3272 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3273 	scalehrtime(&hrutime);
3274 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3275 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3276 	scalehrtime(&hrstime);
3277 	hrt2ts(hrutime + hrstime, &psp->pr_time);
3278 	/* compute %cpu for the lwp */
3279 	pct = cpu_update_pct(t, gethrtime_unscaled());
3280 	psp->pr_pctcpu = prgetpctcpu(pct);
3281 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3282 	if (psp->pr_cpu > 99)
3283 		psp->pr_cpu = 99;
3284 
3285 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3286 	    sizeof (psp->pr_clname) - 1);
3287 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3288 	psp->pr_onpro = t->t_cpu->cpu_id;
3289 	psp->pr_bindpro = t->t_bind_cpu;
3290 	psp->pr_bindpset = t->t_bind_pset;
3291 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3292 }
3293 
3294 #ifdef _SYSCALL32_IMPL
3295 void
3296 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3297 {
3298 	klwp_t *lwp = ttolwp(t);
3299 	sobj_ops_t *sobj;
3300 	char c, state;
3301 	uint64_t pct;
3302 	int retval, niceval;
3303 	hrtime_t hrutime, hrstime;
3304 
3305 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3306 
3307 	bzero(psp, sizeof (*psp));
3308 
3309 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3310 	psp->pr_lwpid = t->t_tid;
3311 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3312 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
3313 
3314 	/* map the thread state enum into a process state enum */
3315 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3316 	switch (state) {
3317 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3318 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3319 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3320 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3321 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3322 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3323 	default:		state = 0;		c = '?';	break;
3324 	}
3325 	psp->pr_state = state;
3326 	psp->pr_sname = c;
3327 	if ((sobj = t->t_sobj_ops) != NULL)
3328 		psp->pr_stype = SOBJ_TYPE(sobj);
3329 	retval = CL_DONICE(t, NULL, 0, &niceval);
3330 	if (retval == 0) {
3331 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3332 		psp->pr_nice = niceval + NZERO;
3333 	} else {
3334 		psp->pr_oldpri = 0;
3335 		psp->pr_nice = 0;
3336 	}
3337 	psp->pr_syscall = t->t_sysnum;
3338 	psp->pr_pri = t->t_pri;
3339 	psp->pr_start.tv_sec = (time32_t)t->t_start;
3340 	psp->pr_start.tv_nsec = 0L;
3341 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3342 	scalehrtime(&hrutime);
3343 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3344 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3345 	scalehrtime(&hrstime);
3346 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3347 	/* compute %cpu for the lwp */
3348 	pct = cpu_update_pct(t, gethrtime_unscaled());
3349 	psp->pr_pctcpu = prgetpctcpu(pct);
3350 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3351 	if (psp->pr_cpu > 99)
3352 		psp->pr_cpu = 99;
3353 
3354 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3355 	    sizeof (psp->pr_clname) - 1);
3356 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3357 	psp->pr_onpro = t->t_cpu->cpu_id;
3358 	psp->pr_bindpro = t->t_bind_cpu;
3359 	psp->pr_bindpset = t->t_bind_pset;
3360 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3361 }
3362 #endif	/* _SYSCALL32_IMPL */
3363 
3364 #ifdef _SYSCALL32_IMPL
3365 
3366 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
3367 
3368 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
3369 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
3370 		d->field = s->field;				\
3371 	}
3372 
3373 #define	PR_COPY_TIMESPEC(s, d, field)				\
3374 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3375 
3376 #define	PR_COPY_BUF(s, d, field)				\
3377 	bcopy(s->field, d->field, sizeof (d->field));
3378 
3379 #define	PR_IGNORE_FIELD(s, d, field)
3380 
3381 void
3382 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3383 {
3384 	bzero(dest, sizeof (*dest));
3385 
3386 	PR_COPY_FIELD(src, dest, pr_flag);
3387 	PR_COPY_FIELD(src, dest, pr_lwpid);
3388 	PR_IGNORE_FIELD(src, dest, pr_addr);
3389 	PR_IGNORE_FIELD(src, dest, pr_wchan);
3390 	PR_COPY_FIELD(src, dest, pr_stype);
3391 	PR_COPY_FIELD(src, dest, pr_state);
3392 	PR_COPY_FIELD(src, dest, pr_sname);
3393 	PR_COPY_FIELD(src, dest, pr_nice);
3394 	PR_COPY_FIELD(src, dest, pr_syscall);
3395 	PR_COPY_FIELD(src, dest, pr_oldpri);
3396 	PR_COPY_FIELD(src, dest, pr_cpu);
3397 	PR_COPY_FIELD(src, dest, pr_pri);
3398 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3399 	PR_COPY_TIMESPEC(src, dest, pr_start);
3400 	PR_COPY_BUF(src, dest, pr_clname);
3401 	PR_COPY_BUF(src, dest, pr_name);
3402 	PR_COPY_FIELD(src, dest, pr_onpro);
3403 	PR_COPY_FIELD(src, dest, pr_bindpro);
3404 	PR_COPY_FIELD(src, dest, pr_bindpset);
3405 	PR_COPY_FIELD(src, dest, pr_lgrp);
3406 }
3407 
3408 void
3409 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3410 {
3411 	bzero(dest, sizeof (*dest));
3412 
3413 	PR_COPY_FIELD(src, dest, pr_flag);
3414 	PR_COPY_FIELD(src, dest, pr_nlwp);
3415 	PR_COPY_FIELD(src, dest, pr_pid);
3416 	PR_COPY_FIELD(src, dest, pr_ppid);
3417 	PR_COPY_FIELD(src, dest, pr_pgid);
3418 	PR_COPY_FIELD(src, dest, pr_sid);
3419 	PR_COPY_FIELD(src, dest, pr_uid);
3420 	PR_COPY_FIELD(src, dest, pr_euid);
3421 	PR_COPY_FIELD(src, dest, pr_gid);
3422 	PR_COPY_FIELD(src, dest, pr_egid);
3423 	PR_IGNORE_FIELD(src, dest, pr_addr);
3424 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
3425 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3426 	PR_COPY_FIELD(src, dest, pr_ttydev);
3427 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3428 	PR_COPY_FIELD(src, dest, pr_pctmem);
3429 	PR_COPY_TIMESPEC(src, dest, pr_start);
3430 	PR_COPY_TIMESPEC(src, dest, pr_time);
3431 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
3432 	PR_COPY_BUF(src, dest, pr_fname);
3433 	PR_COPY_BUF(src, dest, pr_psargs);
3434 	PR_COPY_FIELD(src, dest, pr_wstat);
3435 	PR_COPY_FIELD(src, dest, pr_argc);
3436 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3437 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3438 	PR_COPY_FIELD(src, dest, pr_dmodel);
3439 	PR_COPY_FIELD(src, dest, pr_taskid);
3440 	PR_COPY_FIELD(src, dest, pr_projid);
3441 	PR_COPY_FIELD(src, dest, pr_nzomb);
3442 	PR_COPY_FIELD(src, dest, pr_poolid);
3443 	PR_COPY_FIELD(src, dest, pr_contract);
3444 	PR_COPY_FIELD(src, dest, pr_poolid);
3445 	PR_COPY_FIELD(src, dest, pr_poolid);
3446 
3447 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3448 }
3449 
3450 #undef	PR_COPY_FIELD
3451 #undef	PR_COPY_FIELD_ILP32
3452 #undef	PR_COPY_TIMESPEC
3453 #undef	PR_COPY_BUF
3454 #undef	PR_IGNORE_FIELD
3455 
3456 #endif	/* _SYSCALL32_IMPL */
3457 
3458 /*
3459  * This used to get called when microstate accounting was disabled but
3460  * microstate information was requested.  Since Microstate accounting is on
3461  * regardless of the proc flags, this simply makes it appear to procfs that
3462  * microstate accounting is on.  This is relatively meaningless since you
3463  * can't turn it off, but this is here for the sake of appearances.
3464  */
3465 
3466 /*ARGSUSED*/
3467 void
3468 estimate_msacct(kthread_t *t, hrtime_t curtime)
3469 {
3470 	proc_t *p;
3471 
3472 	if (t == NULL)
3473 		return;
3474 
3475 	p = ttoproc(t);
3476 	ASSERT(MUTEX_HELD(&p->p_lock));
3477 
3478 	/*
3479 	 * A system process (p0) could be referenced if the thread is
3480 	 * in the process of exiting.  Don't turn on microstate accounting
3481 	 * in that case.
3482 	 */
3483 	if (p->p_flag & SSYS)
3484 		return;
3485 
3486 	/*
3487 	 * Loop through all the LWPs (kernel threads) in the process.
3488 	 */
3489 	t = p->p_tlist;
3490 	do {
3491 		t->t_proc_flag |= TP_MSACCT;
3492 	} while ((t = t->t_forw) != p->p_tlist);
3493 
3494 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
3495 }
3496 
3497 /*
3498  * It's not really possible to disable microstate accounting anymore.
3499  * However, this routine simply turns off the ms accounting flags in a process
3500  * This way procfs can still pretend to turn microstate accounting on and
3501  * off for a process, but it actually doesn't do anything.  This is
3502  * a neutered form of preemptive idiot-proofing.
3503  */
3504 void
3505 disable_msacct(proc_t *p)
3506 {
3507 	kthread_t *t;
3508 
3509 	ASSERT(MUTEX_HELD(&p->p_lock));
3510 
3511 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
3512 	/*
3513 	 * Loop through all the LWPs (kernel threads) in the process.
3514 	 */
3515 	if ((t = p->p_tlist) != NULL) {
3516 		do {
3517 			/* clear per-thread flag */
3518 			t->t_proc_flag &= ~TP_MSACCT;
3519 		} while ((t = t->t_forw) != p->p_tlist);
3520 	}
3521 }
3522 
3523 /*
3524  * Return resource usage information.
3525  */
3526 void
3527 prgetusage(kthread_t *t, prhusage_t *pup)
3528 {
3529 	klwp_t *lwp = ttolwp(t);
3530 	hrtime_t *mstimep;
3531 	struct mstate *ms = &lwp->lwp_mstate;
3532 	int state;
3533 	int i;
3534 	hrtime_t curtime;
3535 	hrtime_t waitrq;
3536 	hrtime_t tmp1;
3537 
3538 	curtime = gethrtime_unscaled();
3539 
3540 	pup->pr_lwpid	= t->t_tid;
3541 	pup->pr_count	= 1;
3542 	pup->pr_create	= ms->ms_start;
3543 	pup->pr_term    = ms->ms_term;
3544 	scalehrtime(&pup->pr_create);
3545 	scalehrtime(&pup->pr_term);
3546 	if (ms->ms_term == 0) {
3547 		pup->pr_rtime = curtime - ms->ms_start;
3548 		scalehrtime(&pup->pr_rtime);
3549 	} else {
3550 		pup->pr_rtime = ms->ms_term - ms->ms_start;
3551 		scalehrtime(&pup->pr_rtime);
3552 	}
3553 
3554 
3555 	pup->pr_utime    = ms->ms_acct[LMS_USER];
3556 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
3557 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
3558 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
3559 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
3560 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
3561 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
3562 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
3563 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
3564 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3565 
3566 	prscaleusage(pup);
3567 
3568 	/*
3569 	 * Adjust for time waiting in the dispatcher queue.
3570 	 */
3571 	waitrq = t->t_waitrq;	/* hopefully atomic */
3572 	if (waitrq != 0) {
3573 		if (waitrq > curtime) {
3574 			curtime = gethrtime_unscaled();
3575 		}
3576 		tmp1 = curtime - waitrq;
3577 		scalehrtime(&tmp1);
3578 		pup->pr_wtime += tmp1;
3579 		curtime = waitrq;
3580 	}
3581 
3582 	/*
3583 	 * Adjust for time spent in current microstate.
3584 	 */
3585 	if (ms->ms_state_start > curtime) {
3586 		curtime = gethrtime_unscaled();
3587 	}
3588 
3589 	i = 0;
3590 	do {
3591 		switch (state = t->t_mstate) {
3592 		case LMS_SLEEP:
3593 			/*
3594 			 * Update the timer for the current sleep state.
3595 			 */
3596 			switch (state = ms->ms_prev) {
3597 			case LMS_TFAULT:
3598 			case LMS_DFAULT:
3599 			case LMS_KFAULT:
3600 			case LMS_USER_LOCK:
3601 				break;
3602 			default:
3603 				state = LMS_SLEEP;
3604 				break;
3605 			}
3606 			break;
3607 		case LMS_TFAULT:
3608 		case LMS_DFAULT:
3609 		case LMS_KFAULT:
3610 		case LMS_USER_LOCK:
3611 			state = LMS_SYSTEM;
3612 			break;
3613 		}
3614 		switch (state) {
3615 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3616 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3617 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3618 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3619 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3620 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3621 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3622 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3623 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3624 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3625 		default:		panic("prgetusage: unknown microstate");
3626 		}
3627 		tmp1 = curtime - ms->ms_state_start;
3628 		if (tmp1 < 0) {
3629 			curtime = gethrtime_unscaled();
3630 			i++;
3631 			continue;
3632 		}
3633 		scalehrtime(&tmp1);
3634 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3635 
3636 	*mstimep += tmp1;
3637 
3638 	/* update pup timestamp */
3639 	pup->pr_tstamp = curtime;
3640 	scalehrtime(&pup->pr_tstamp);
3641 
3642 	/*
3643 	 * Resource usage counters.
3644 	 */
3645 	pup->pr_minf  = lwp->lwp_ru.minflt;
3646 	pup->pr_majf  = lwp->lwp_ru.majflt;
3647 	pup->pr_nswap = lwp->lwp_ru.nswap;
3648 	pup->pr_inblk = lwp->lwp_ru.inblock;
3649 	pup->pr_oublk = lwp->lwp_ru.oublock;
3650 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
3651 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
3652 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
3653 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
3654 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
3655 	pup->pr_sysc  = lwp->lwp_ru.sysc;
3656 	pup->pr_ioch  = lwp->lwp_ru.ioch;
3657 }
3658 
3659 /*
3660  * Convert ms_acct stats from unscaled high-res time to nanoseconds
3661  */
3662 void
3663 prscaleusage(prhusage_t *usg)
3664 {
3665 	scalehrtime(&usg->pr_utime);
3666 	scalehrtime(&usg->pr_stime);
3667 	scalehrtime(&usg->pr_ttime);
3668 	scalehrtime(&usg->pr_tftime);
3669 	scalehrtime(&usg->pr_dftime);
3670 	scalehrtime(&usg->pr_kftime);
3671 	scalehrtime(&usg->pr_ltime);
3672 	scalehrtime(&usg->pr_slptime);
3673 	scalehrtime(&usg->pr_wtime);
3674 	scalehrtime(&usg->pr_stoptime);
3675 }
3676 
3677 
3678 /*
3679  * Sum resource usage information.
3680  */
3681 void
3682 praddusage(kthread_t *t, prhusage_t *pup)
3683 {
3684 	klwp_t *lwp = ttolwp(t);
3685 	hrtime_t *mstimep;
3686 	struct mstate *ms = &lwp->lwp_mstate;
3687 	int state;
3688 	int i;
3689 	hrtime_t curtime;
3690 	hrtime_t waitrq;
3691 	hrtime_t tmp;
3692 	prhusage_t conv;
3693 
3694 	curtime = gethrtime_unscaled();
3695 
3696 	if (ms->ms_term == 0) {
3697 		tmp = curtime - ms->ms_start;
3698 		scalehrtime(&tmp);
3699 		pup->pr_rtime += tmp;
3700 	} else {
3701 		tmp = ms->ms_term - ms->ms_start;
3702 		scalehrtime(&tmp);
3703 		pup->pr_rtime += tmp;
3704 	}
3705 
3706 	conv.pr_utime = ms->ms_acct[LMS_USER];
3707 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3708 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3709 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3710 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3711 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3712 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3713 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3714 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3715 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3716 
3717 	prscaleusage(&conv);
3718 
3719 	pup->pr_utime	+= conv.pr_utime;
3720 	pup->pr_stime	+= conv.pr_stime;
3721 	pup->pr_ttime	+= conv.pr_ttime;
3722 	pup->pr_tftime	+= conv.pr_tftime;
3723 	pup->pr_dftime	+= conv.pr_dftime;
3724 	pup->pr_kftime	+= conv.pr_kftime;
3725 	pup->pr_ltime	+= conv.pr_ltime;
3726 	pup->pr_slptime	+= conv.pr_slptime;
3727 	pup->pr_wtime	+= conv.pr_wtime;
3728 	pup->pr_stoptime += conv.pr_stoptime;
3729 
3730 	/*
3731 	 * Adjust for time waiting in the dispatcher queue.
3732 	 */
3733 	waitrq = t->t_waitrq;	/* hopefully atomic */
3734 	if (waitrq != 0) {
3735 		if (waitrq > curtime) {
3736 			curtime = gethrtime_unscaled();
3737 		}
3738 		tmp = curtime - waitrq;
3739 		scalehrtime(&tmp);
3740 		pup->pr_wtime += tmp;
3741 		curtime = waitrq;
3742 	}
3743 
3744 	/*
3745 	 * Adjust for time spent in current microstate.
3746 	 */
3747 	if (ms->ms_state_start > curtime) {
3748 		curtime = gethrtime_unscaled();
3749 	}
3750 
3751 	i = 0;
3752 	do {
3753 		switch (state = t->t_mstate) {
3754 		case LMS_SLEEP:
3755 			/*
3756 			 * Update the timer for the current sleep state.
3757 			 */
3758 			switch (state = ms->ms_prev) {
3759 			case LMS_TFAULT:
3760 			case LMS_DFAULT:
3761 			case LMS_KFAULT:
3762 			case LMS_USER_LOCK:
3763 				break;
3764 			default:
3765 				state = LMS_SLEEP;
3766 				break;
3767 			}
3768 			break;
3769 		case LMS_TFAULT:
3770 		case LMS_DFAULT:
3771 		case LMS_KFAULT:
3772 		case LMS_USER_LOCK:
3773 			state = LMS_SYSTEM;
3774 			break;
3775 		}
3776 		switch (state) {
3777 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3778 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3779 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3780 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3781 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3782 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3783 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3784 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3785 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3786 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3787 		default:		panic("praddusage: unknown microstate");
3788 		}
3789 		tmp = curtime - ms->ms_state_start;
3790 		if (tmp < 0) {
3791 			curtime = gethrtime_unscaled();
3792 			i++;
3793 			continue;
3794 		}
3795 		scalehrtime(&tmp);
3796 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3797 
3798 	*mstimep += tmp;
3799 
3800 	/* update pup timestamp */
3801 	pup->pr_tstamp = curtime;
3802 	scalehrtime(&pup->pr_tstamp);
3803 
3804 	/*
3805 	 * Resource usage counters.
3806 	 */
3807 	pup->pr_minf  += lwp->lwp_ru.minflt;
3808 	pup->pr_majf  += lwp->lwp_ru.majflt;
3809 	pup->pr_nswap += lwp->lwp_ru.nswap;
3810 	pup->pr_inblk += lwp->lwp_ru.inblock;
3811 	pup->pr_oublk += lwp->lwp_ru.oublock;
3812 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3813 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3814 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3815 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3816 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3817 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3818 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3819 }
3820 
3821 /*
3822  * Convert a prhusage_t to a prusage_t.
3823  * This means convert each hrtime_t to a timestruc_t
3824  * and copy the count fields uint64_t => ulong_t.
3825  */
3826 void
3827 prcvtusage(prhusage_t *pup, prusage_t *upup)
3828 {
3829 	uint64_t *ullp;
3830 	ulong_t *ulp;
3831 	int i;
3832 
3833 	upup->pr_lwpid = pup->pr_lwpid;
3834 	upup->pr_count = pup->pr_count;
3835 
3836 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3837 	hrt2ts(pup->pr_create,	&upup->pr_create);
3838 	hrt2ts(pup->pr_term,	&upup->pr_term);
3839 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3840 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3841 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3842 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3843 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3844 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3845 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3846 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3847 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3848 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3849 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3850 	bzero(upup->filltime, sizeof (upup->filltime));
3851 
3852 	ullp = &pup->pr_minf;
3853 	ulp = &upup->pr_minf;
3854 	for (i = 0; i < 22; i++)
3855 		*ulp++ = (ulong_t)*ullp++;
3856 }
3857 
3858 #ifdef _SYSCALL32_IMPL
3859 void
3860 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3861 {
3862 	uint64_t *ullp;
3863 	uint32_t *ulp;
3864 	int i;
3865 
3866 	upup->pr_lwpid = pup->pr_lwpid;
3867 	upup->pr_count = pup->pr_count;
3868 
3869 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3870 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3871 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3872 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3873 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3874 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3875 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3876 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3877 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3878 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3879 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3880 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3881 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3882 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3883 	bzero(upup->filltime, sizeof (upup->filltime));
3884 
3885 	ullp = &pup->pr_minf;
3886 	ulp = &upup->pr_minf;
3887 	for (i = 0; i < 22; i++)
3888 		*ulp++ = (uint32_t)*ullp++;
3889 }
3890 #endif	/* _SYSCALL32_IMPL */
3891 
3892 /*
3893  * Determine whether a set is empty.
3894  */
3895 int
3896 setisempty(uint32_t *sp, uint_t n)
3897 {
3898 	while (n--)
3899 		if (*sp++)
3900 			return (0);
3901 	return (1);
3902 }
3903 
3904 /*
3905  * Utility routine for establishing a watched area in the process.
3906  * Keep the list of watched areas sorted by virtual address.
3907  */
3908 int
3909 set_watched_area(proc_t *p, struct watched_area *pwa)
3910 {
3911 	caddr_t vaddr = pwa->wa_vaddr;
3912 	caddr_t eaddr = pwa->wa_eaddr;
3913 	ulong_t flags = pwa->wa_flags;
3914 	struct watched_area *target;
3915 	avl_index_t where;
3916 	int error = 0;
3917 
3918 	/* we must not be holding p->p_lock, but the process must be locked */
3919 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3920 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3921 
3922 	/*
3923 	 * If this is our first watchpoint, enable watchpoints for the process.
3924 	 */
3925 	if (!pr_watch_active(p)) {
3926 		kthread_t *t;
3927 
3928 		mutex_enter(&p->p_lock);
3929 		if ((t = p->p_tlist) != NULL) {
3930 			do {
3931 				watch_enable(t);
3932 			} while ((t = t->t_forw) != p->p_tlist);
3933 		}
3934 		mutex_exit(&p->p_lock);
3935 	}
3936 
3937 	target = pr_find_watched_area(p, pwa, &where);
3938 	if (target != NULL) {
3939 		/*
3940 		 * We discovered an existing, overlapping watched area.
3941 		 * Allow it only if it is an exact match.
3942 		 */
3943 		if (target->wa_vaddr != vaddr ||
3944 		    target->wa_eaddr != eaddr)
3945 			error = EINVAL;
3946 		else if (target->wa_flags != flags) {
3947 			error = set_watched_page(p, vaddr, eaddr,
3948 			    flags, target->wa_flags);
3949 			target->wa_flags = flags;
3950 		}
3951 		kmem_free(pwa, sizeof (struct watched_area));
3952 	} else {
3953 		avl_insert(&p->p_warea, pwa, where);
3954 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3955 	}
3956 
3957 	return (error);
3958 }
3959 
3960 /*
3961  * Utility routine for clearing a watched area in the process.
3962  * Must be an exact match of the virtual address.
3963  * size and flags don't matter.
3964  */
3965 int
3966 clear_watched_area(proc_t *p, struct watched_area *pwa)
3967 {
3968 	struct watched_area *found;
3969 
3970 	/* we must not be holding p->p_lock, but the process must be locked */
3971 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3972 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3973 
3974 
3975 	if (!pr_watch_active(p)) {
3976 		kmem_free(pwa, sizeof (struct watched_area));
3977 		return (0);
3978 	}
3979 
3980 	/*
3981 	 * Look for a matching address in the watched areas.  If a match is
3982 	 * found, clear the old watched area and adjust the watched page(s).  It
3983 	 * is not an error if there is no match.
3984 	 */
3985 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3986 	    found->wa_vaddr == pwa->wa_vaddr) {
3987 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3988 		    found->wa_flags);
3989 		avl_remove(&p->p_warea, found);
3990 		kmem_free(found, sizeof (struct watched_area));
3991 	}
3992 
3993 	kmem_free(pwa, sizeof (struct watched_area));
3994 
3995 	/*
3996 	 * If we removed the last watched area from the process, disable
3997 	 * watchpoints.
3998 	 */
3999 	if (!pr_watch_active(p)) {
4000 		kthread_t *t;
4001 
4002 		mutex_enter(&p->p_lock);
4003 		if ((t = p->p_tlist) != NULL) {
4004 			do {
4005 				watch_disable(t);
4006 			} while ((t = t->t_forw) != p->p_tlist);
4007 		}
4008 		mutex_exit(&p->p_lock);
4009 	}
4010 
4011 	return (0);
4012 }
4013 
4014 /*
4015  * Frees all the watched_area structures
4016  */
4017 void
4018 pr_free_watchpoints(proc_t *p)
4019 {
4020 	struct watched_area *delp;
4021 	void *cookie;
4022 
4023 	cookie = NULL;
4024 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
4025 		kmem_free(delp, sizeof (struct watched_area));
4026 
4027 	avl_destroy(&p->p_warea);
4028 }
4029 
4030 /*
4031  * This one is called by the traced process to unwatch all the
4032  * pages while deallocating the list of watched_page structs.
4033  */
4034 void
4035 pr_free_watched_pages(proc_t *p)
4036 {
4037 	struct as *as = p->p_as;
4038 	struct watched_page *pwp;
4039 	uint_t prot;
4040 	int    retrycnt, err;
4041 	void *cookie;
4042 
4043 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
4044 		return;
4045 
4046 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
4047 	AS_LOCK_ENTER(as, RW_WRITER);
4048 
4049 	pwp = avl_first(&as->a_wpage);
4050 
4051 	cookie = NULL;
4052 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
4053 		retrycnt = 0;
4054 		if ((prot = pwp->wp_oprot) != 0) {
4055 			caddr_t addr = pwp->wp_vaddr;
4056 			struct seg *seg;
4057 		retry:
4058 
4059 			if ((pwp->wp_prot != prot ||
4060 			    (pwp->wp_flags & WP_NOWATCH)) &&
4061 			    (seg = as_segat(as, addr)) != NULL) {
4062 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
4063 				if (err == IE_RETRY) {
4064 					ASSERT(retrycnt == 0);
4065 					retrycnt++;
4066 					goto retry;
4067 				}
4068 			}
4069 		}
4070 		kmem_free(pwp, sizeof (struct watched_page));
4071 	}
4072 
4073 	avl_destroy(&as->a_wpage);
4074 	p->p_wprot = NULL;
4075 
4076 	AS_LOCK_EXIT(as);
4077 }
4078 
4079 /*
4080  * Insert a watched area into the list of watched pages.
4081  * If oflags is zero then we are adding a new watched area.
4082  * Otherwise we are changing the flags of an existing watched area.
4083  */
4084 static int
4085 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
4086     ulong_t flags, ulong_t oflags)
4087 {
4088 	struct as *as = p->p_as;
4089 	avl_tree_t *pwp_tree;
4090 	struct watched_page *pwp, *newpwp;
4091 	struct watched_page tpw;
4092 	avl_index_t where;
4093 	struct seg *seg;
4094 	uint_t prot;
4095 	caddr_t addr;
4096 
4097 	/*
4098 	 * We need to pre-allocate a list of structures before we grab the
4099 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4100 	 * held.
4101 	 */
4102 	newpwp = NULL;
4103 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4104 	    addr < eaddr; addr += PAGESIZE) {
4105 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4106 		pwp->wp_list = newpwp;
4107 		newpwp = pwp;
4108 	}
4109 
4110 	AS_LOCK_ENTER(as, RW_WRITER);
4111 
4112 	/*
4113 	 * Search for an existing watched page to contain the watched area.
4114 	 * If none is found, grab a new one from the available list
4115 	 * and insert it in the active list, keeping the list sorted
4116 	 * by user-level virtual address.
4117 	 */
4118 	if (p->p_flag & SVFWAIT)
4119 		pwp_tree = &p->p_wpage;
4120 	else
4121 		pwp_tree = &as->a_wpage;
4122 
4123 again:
4124 	if (avl_numnodes(pwp_tree) > prnwatch) {
4125 		AS_LOCK_EXIT(as);
4126 		while (newpwp != NULL) {
4127 			pwp = newpwp->wp_list;
4128 			kmem_free(newpwp, sizeof (struct watched_page));
4129 			newpwp = pwp;
4130 		}
4131 		return (E2BIG);
4132 	}
4133 
4134 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4135 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4136 		pwp = newpwp;
4137 		newpwp = newpwp->wp_list;
4138 		pwp->wp_list = NULL;
4139 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4140 		    (uintptr_t)PAGEMASK);
4141 		avl_insert(pwp_tree, pwp, where);
4142 	}
4143 
4144 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4145 
4146 	if (oflags & WA_READ)
4147 		pwp->wp_read--;
4148 	if (oflags & WA_WRITE)
4149 		pwp->wp_write--;
4150 	if (oflags & WA_EXEC)
4151 		pwp->wp_exec--;
4152 
4153 	ASSERT(pwp->wp_read >= 0);
4154 	ASSERT(pwp->wp_write >= 0);
4155 	ASSERT(pwp->wp_exec >= 0);
4156 
4157 	if (flags & WA_READ)
4158 		pwp->wp_read++;
4159 	if (flags & WA_WRITE)
4160 		pwp->wp_write++;
4161 	if (flags & WA_EXEC)
4162 		pwp->wp_exec++;
4163 
4164 	if (!(p->p_flag & SVFWAIT)) {
4165 		vaddr = pwp->wp_vaddr;
4166 		if (pwp->wp_oprot == 0 &&
4167 		    (seg = as_segat(as, vaddr)) != NULL) {
4168 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
4169 			pwp->wp_oprot = (uchar_t)prot;
4170 			pwp->wp_prot = (uchar_t)prot;
4171 		}
4172 		if (pwp->wp_oprot != 0) {
4173 			prot = pwp->wp_oprot;
4174 			if (pwp->wp_read)
4175 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4176 			if (pwp->wp_write)
4177 				prot &= ~PROT_WRITE;
4178 			if (pwp->wp_exec)
4179 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4180 			if (!(pwp->wp_flags & WP_NOWATCH) &&
4181 			    pwp->wp_prot != prot &&
4182 			    (pwp->wp_flags & WP_SETPROT) == 0) {
4183 				pwp->wp_flags |= WP_SETPROT;
4184 				pwp->wp_list = p->p_wprot;
4185 				p->p_wprot = pwp;
4186 			}
4187 			pwp->wp_prot = (uchar_t)prot;
4188 		}
4189 	}
4190 
4191 	/*
4192 	 * If the watched area extends into the next page then do
4193 	 * it over again with the virtual address of the next page.
4194 	 */
4195 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4196 		goto again;
4197 
4198 	AS_LOCK_EXIT(as);
4199 
4200 	/*
4201 	 * Free any pages we may have over-allocated
4202 	 */
4203 	while (newpwp != NULL) {
4204 		pwp = newpwp->wp_list;
4205 		kmem_free(newpwp, sizeof (struct watched_page));
4206 		newpwp = pwp;
4207 	}
4208 
4209 	return (0);
4210 }
4211 
4212 /*
4213  * Remove a watched area from the list of watched pages.
4214  * A watched area may extend over more than one page.
4215  */
4216 static void
4217 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4218 {
4219 	struct as *as = p->p_as;
4220 	struct watched_page *pwp;
4221 	struct watched_page tpw;
4222 	avl_tree_t *tree;
4223 	avl_index_t where;
4224 
4225 	AS_LOCK_ENTER(as, RW_WRITER);
4226 
4227 	if (p->p_flag & SVFWAIT)
4228 		tree = &p->p_wpage;
4229 	else
4230 		tree = &as->a_wpage;
4231 
4232 	tpw.wp_vaddr = vaddr =
4233 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4234 	pwp = avl_find(tree, &tpw, &where);
4235 	if (pwp == NULL)
4236 		pwp = avl_nearest(tree, where, AVL_AFTER);
4237 
4238 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4239 		ASSERT(vaddr <=  pwp->wp_vaddr);
4240 
4241 		if (flags & WA_READ)
4242 			pwp->wp_read--;
4243 		if (flags & WA_WRITE)
4244 			pwp->wp_write--;
4245 		if (flags & WA_EXEC)
4246 			pwp->wp_exec--;
4247 
4248 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4249 			/*
4250 			 * Reset the hat layer's protections on this page.
4251 			 */
4252 			if (pwp->wp_oprot != 0) {
4253 				uint_t prot = pwp->wp_oprot;
4254 
4255 				if (pwp->wp_read)
4256 					prot &=
4257 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4258 				if (pwp->wp_write)
4259 					prot &= ~PROT_WRITE;
4260 				if (pwp->wp_exec)
4261 					prot &=
4262 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4263 				if (!(pwp->wp_flags & WP_NOWATCH) &&
4264 				    pwp->wp_prot != prot &&
4265 				    (pwp->wp_flags & WP_SETPROT) == 0) {
4266 					pwp->wp_flags |= WP_SETPROT;
4267 					pwp->wp_list = p->p_wprot;
4268 					p->p_wprot = pwp;
4269 				}
4270 				pwp->wp_prot = (uchar_t)prot;
4271 			}
4272 		} else {
4273 			/*
4274 			 * No watched areas remain in this page.
4275 			 * Reset everything to normal.
4276 			 */
4277 			if (pwp->wp_oprot != 0) {
4278 				pwp->wp_prot = pwp->wp_oprot;
4279 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
4280 					pwp->wp_flags |= WP_SETPROT;
4281 					pwp->wp_list = p->p_wprot;
4282 					p->p_wprot = pwp;
4283 				}
4284 			}
4285 		}
4286 
4287 		pwp = AVL_NEXT(tree, pwp);
4288 	}
4289 
4290 	AS_LOCK_EXIT(as);
4291 }
4292 
4293 /*
4294  * Return the original protections for the specified page.
4295  */
4296 static void
4297 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4298 {
4299 	struct watched_page *pwp;
4300 	struct watched_page tpw;
4301 
4302 	ASSERT(AS_LOCK_HELD(as));
4303 
4304 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4305 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4306 		*prot = pwp->wp_oprot;
4307 }
4308 
4309 static prpagev_t *
4310 pr_pagev_create(struct seg *seg, int check_noreserve)
4311 {
4312 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4313 	size_t total_pages = seg_pages(seg);
4314 
4315 	/*
4316 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
4317 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
4318 	 * to about a megabyte of kernel heap by default.
4319 	 */
4320 	pagev->pg_npages = MIN(total_pages, pagev_lim);
4321 	pagev->pg_pnbase = 0;
4322 
4323 	pagev->pg_protv =
4324 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4325 
4326 	if (check_noreserve)
4327 		pagev->pg_incore =
4328 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4329 	else
4330 		pagev->pg_incore = NULL;
4331 
4332 	return (pagev);
4333 }
4334 
4335 static void
4336 pr_pagev_destroy(prpagev_t *pagev)
4337 {
4338 	if (pagev->pg_incore != NULL)
4339 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4340 
4341 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4342 	kmem_free(pagev, sizeof (prpagev_t));
4343 }
4344 
4345 static caddr_t
4346 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4347 {
4348 	ulong_t lastpg = seg_page(seg, eaddr - 1);
4349 	ulong_t pn, pnlim;
4350 	caddr_t saddr;
4351 	size_t len;
4352 
4353 	ASSERT(addr >= seg->s_base && addr <= eaddr);
4354 
4355 	if (addr == eaddr)
4356 		return (eaddr);
4357 
4358 refill:
4359 	ASSERT(addr < eaddr);
4360 	pagev->pg_pnbase = seg_page(seg, addr);
4361 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
4362 	saddr = addr;
4363 
4364 	if (lastpg < pnlim)
4365 		len = (size_t)(eaddr - addr);
4366 	else
4367 		len = pagev->pg_npages * PAGESIZE;
4368 
4369 	if (pagev->pg_incore != NULL) {
4370 		/*
4371 		 * INCORE cleverly has different semantics than GETPROT:
4372 		 * it returns info on pages up to but NOT including addr + len.
4373 		 */
4374 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4375 		pn = pagev->pg_pnbase;
4376 
4377 		do {
4378 			/*
4379 			 * Guilty knowledge here:  We know that segvn_incore
4380 			 * returns more than just the low-order bit that
4381 			 * indicates the page is actually in memory.  If any
4382 			 * bits are set, then the page has backing store.
4383 			 */
4384 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4385 				goto out;
4386 
4387 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4388 
4389 		/*
4390 		 * If we examined all the pages in the vector but we're not
4391 		 * at the end of the segment, take another lap.
4392 		 */
4393 		if (addr < eaddr)
4394 			goto refill;
4395 	}
4396 
4397 	/*
4398 	 * Need to take len - 1 because addr + len is the address of the
4399 	 * first byte of the page just past the end of what we want.
4400 	 */
4401 out:
4402 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4403 	return (addr);
4404 }
4405 
4406 static caddr_t
4407 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4408     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4409 {
4410 	/*
4411 	 * Our starting address is either the specified address, or the base
4412 	 * address from the start of the pagev.  If the latter is greater,
4413 	 * this means a previous call to pr_pagev_fill has already scanned
4414 	 * further than the end of the previous mapping.
4415 	 */
4416 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4417 	caddr_t addr = MAX(*saddrp, base);
4418 	ulong_t pn = seg_page(seg, addr);
4419 	uint_t prot, nprot;
4420 
4421 	/*
4422 	 * If we're dealing with noreserve pages, then advance addr to
4423 	 * the address of the next page which has backing store.
4424 	 */
4425 	if (pagev->pg_incore != NULL) {
4426 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4427 			if ((addr += PAGESIZE) == eaddr) {
4428 				*saddrp = addr;
4429 				prot = 0;
4430 				goto out;
4431 			}
4432 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4433 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4434 				if (addr == eaddr) {
4435 					*saddrp = addr;
4436 					prot = 0;
4437 					goto out;
4438 				}
4439 				pn = seg_page(seg, addr);
4440 			}
4441 		}
4442 	}
4443 
4444 	/*
4445 	 * Get the protections on the page corresponding to addr.
4446 	 */
4447 	pn = seg_page(seg, addr);
4448 	ASSERT(pn >= pagev->pg_pnbase);
4449 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4450 
4451 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4452 	getwatchprot(seg->s_as, addr, &prot);
4453 	*saddrp = addr;
4454 
4455 	/*
4456 	 * Now loop until we find a backed page with different protections
4457 	 * or we reach the end of this segment.
4458 	 */
4459 	while ((addr += PAGESIZE) < eaddr) {
4460 		/*
4461 		 * If pn has advanced to the page number following what we
4462 		 * have information on, refill the page vector and reset
4463 		 * addr and pn.  If pr_pagev_fill does not return the
4464 		 * address of the next page, we have a discontiguity and
4465 		 * thus have reached the end of the current mapping.
4466 		 */
4467 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4468 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4469 			if (naddr != addr)
4470 				goto out;
4471 			pn = seg_page(seg, addr);
4472 		}
4473 
4474 		/*
4475 		 * The previous page's protections are in prot, and it has
4476 		 * backing.  If this page is MAP_NORESERVE and has no backing,
4477 		 * then end this mapping and return the previous protections.
4478 		 */
4479 		if (pagev->pg_incore != NULL &&
4480 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4481 			break;
4482 
4483 		/*
4484 		 * Otherwise end the mapping if this page's protections (nprot)
4485 		 * are different than those in the previous page (prot).
4486 		 */
4487 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4488 		getwatchprot(seg->s_as, addr, &nprot);
4489 
4490 		if (nprot != prot)
4491 			break;
4492 	}
4493 
4494 out:
4495 	*protp = prot;
4496 	return (addr);
4497 }
4498 
4499 size_t
4500 pr_getsegsize(struct seg *seg, int reserved)
4501 {
4502 	size_t size = seg->s_size;
4503 
4504 	/*
4505 	 * If we're interested in the reserved space, return the size of the
4506 	 * segment itself.  Everything else in this function is a special case
4507 	 * to determine the actual underlying size of various segment types.
4508 	 */
4509 	if (reserved)
4510 		return (size);
4511 
4512 	/*
4513 	 * If this is a segvn mapping of a regular file, return the smaller
4514 	 * of the segment size and the remaining size of the file beyond
4515 	 * the file offset corresponding to seg->s_base.
4516 	 */
4517 	if (seg->s_ops == &segvn_ops) {
4518 		vattr_t vattr;
4519 		vnode_t *vp;
4520 
4521 		vattr.va_mask = AT_SIZE;
4522 
4523 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4524 		    vp != NULL && vp->v_type == VREG &&
4525 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4526 
4527 			u_offset_t fsize = vattr.va_size;
4528 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4529 
4530 			if (fsize < offset)
4531 				fsize = 0;
4532 			else
4533 				fsize -= offset;
4534 
4535 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4536 
4537 			if (fsize < (u_offset_t)size)
4538 				size = (size_t)fsize;
4539 		}
4540 
4541 		return (size);
4542 	}
4543 
4544 	/*
4545 	 * If this is an ISM shared segment, don't include pages that are
4546 	 * beyond the real size of the spt segment that backs it.
4547 	 */
4548 	if (seg->s_ops == &segspt_shmops)
4549 		return (MIN(spt_realsize(seg), size));
4550 
4551 	/*
4552 	 * If this is segment is a mapping from /dev/null, then this is a
4553 	 * reservation of virtual address space and has no actual size.
4554 	 * Such segments are backed by segdev and have type set to neither
4555 	 * MAP_SHARED nor MAP_PRIVATE.
4556 	 */
4557 	if (seg->s_ops == &segdev_ops &&
4558 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
4559 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
4560 		return (0);
4561 
4562 	/*
4563 	 * If this segment doesn't match one of the special types we handle,
4564 	 * just return the size of the segment itself.
4565 	 */
4566 	return (size);
4567 }
4568 
4569 uint_t
4570 pr_getprot(struct seg *seg, int reserved, void **tmp,
4571     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4572 {
4573 	struct as *as = seg->s_as;
4574 
4575 	caddr_t saddr = *saddrp;
4576 	caddr_t naddr;
4577 
4578 	int check_noreserve;
4579 	uint_t prot;
4580 
4581 	union {
4582 		struct segvn_data *svd;
4583 		struct segdev_data *sdp;
4584 		void *data;
4585 	} s;
4586 
4587 	s.data = seg->s_data;
4588 
4589 	ASSERT(AS_WRITE_HELD(as));
4590 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
4591 	ASSERT(eaddr <= seg->s_base + seg->s_size);
4592 
4593 	/*
4594 	 * Don't include MAP_NORESERVE pages in the address range
4595 	 * unless their mappings have actually materialized.
4596 	 * We cheat by knowing that segvn is the only segment
4597 	 * driver that supports MAP_NORESERVE.
4598 	 */
4599 	check_noreserve =
4600 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4601 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4602 	    (s.svd->flags & MAP_NORESERVE));
4603 
4604 	/*
4605 	 * Examine every page only as a last resort.  We use guilty knowledge
4606 	 * of segvn and segdev to avoid this: if there are no per-page
4607 	 * protections present in the segment and we don't care about
4608 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4609 	 */
4610 	if (!check_noreserve && saddr == seg->s_base &&
4611 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4612 		prot = s.svd->prot;
4613 		getwatchprot(as, saddr, &prot);
4614 		naddr = eaddr;
4615 
4616 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4617 	    s.sdp != NULL && s.sdp->pageprot == 0) {
4618 		prot = s.sdp->prot;
4619 		getwatchprot(as, saddr, &prot);
4620 		naddr = eaddr;
4621 
4622 	} else {
4623 		prpagev_t *pagev;
4624 
4625 		/*
4626 		 * If addr is sitting at the start of the segment, then
4627 		 * create a page vector to store protection and incore
4628 		 * information for pages in the segment, and fill it.
4629 		 * Otherwise, we expect *tmp to address the prpagev_t
4630 		 * allocated by a previous call to this function.
4631 		 */
4632 		if (saddr == seg->s_base) {
4633 			pagev = pr_pagev_create(seg, check_noreserve);
4634 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4635 
4636 			ASSERT(*tmp == NULL);
4637 			*tmp = pagev;
4638 
4639 			ASSERT(saddr <= eaddr);
4640 			*saddrp = saddr;
4641 
4642 			if (saddr == eaddr) {
4643 				naddr = saddr;
4644 				prot = 0;
4645 				goto out;
4646 			}
4647 
4648 		} else {
4649 			ASSERT(*tmp != NULL);
4650 			pagev = (prpagev_t *)*tmp;
4651 		}
4652 
4653 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4654 		ASSERT(naddr <= eaddr);
4655 	}
4656 
4657 out:
4658 	if (naddr == eaddr)
4659 		pr_getprot_done(tmp);
4660 	*naddrp = naddr;
4661 	return (prot);
4662 }
4663 
4664 void
4665 pr_getprot_done(void **tmp)
4666 {
4667 	if (*tmp != NULL) {
4668 		pr_pagev_destroy((prpagev_t *)*tmp);
4669 		*tmp = NULL;
4670 	}
4671 }
4672 
4673 /*
4674  * Return true iff the vnode is a /proc file from the object directory.
4675  */
4676 int
4677 pr_isobject(vnode_t *vp)
4678 {
4679 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4680 }
4681 
4682 /*
4683  * Return true iff the vnode is a /proc file opened by the process itself.
4684  */
4685 int
4686 pr_isself(vnode_t *vp)
4687 {
4688 	/*
4689 	 * XXX: To retain binary compatibility with the old
4690 	 * ioctl()-based version of /proc, we exempt self-opens
4691 	 * of /proc/<pid> from being marked close-on-exec.
4692 	 */
4693 	return (vn_matchops(vp, prvnodeops) &&
4694 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
4695 	    VTOP(vp)->pr_type != PR_PIDDIR);
4696 }
4697 
4698 static ssize_t
4699 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4700 {
4701 	ssize_t pagesize, hatsize;
4702 
4703 	ASSERT(AS_WRITE_HELD(seg->s_as));
4704 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4705 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4706 	ASSERT(saddr < eaddr);
4707 
4708 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4709 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4710 	ASSERT(pagesize != 0);
4711 
4712 	if (pagesize == -1)
4713 		pagesize = PAGESIZE;
4714 
4715 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4716 
4717 	while (saddr < eaddr) {
4718 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4719 			break;
4720 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4721 		saddr += pagesize;
4722 	}
4723 
4724 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4725 	return (hatsize);
4726 }
4727 
4728 /*
4729  * Return an array of structures with extended memory map information.
4730  * We allocate here; the caller must deallocate.
4731  */
4732 int
4733 prgetxmap(proc_t *p, list_t *iolhead)
4734 {
4735 	struct as *as = p->p_as;
4736 	prxmap_t *mp;
4737 	struct seg *seg;
4738 	struct seg *brkseg, *stkseg;
4739 	struct vnode *vp;
4740 	struct vattr vattr;
4741 	uint_t prot;
4742 
4743 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4744 
4745 	/*
4746 	 * Request an initial buffer size that doesn't waste memory
4747 	 * if the address space has only a small number of segments.
4748 	 */
4749 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4750 
4751 	if ((seg = AS_SEGFIRST(as)) == NULL)
4752 		return (0);
4753 
4754 	brkseg = break_seg(p);
4755 	stkseg = as_segat(as, prgetstackbase(p));
4756 
4757 	do {
4758 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4759 		caddr_t saddr, naddr, baddr;
4760 		void *tmp = NULL;
4761 		ssize_t psz;
4762 		char *parr;
4763 		uint64_t npages;
4764 		uint64_t pagenum;
4765 
4766 		if ((seg->s_flags & S_HOLE) != 0) {
4767 			continue;
4768 		}
4769 		/*
4770 		 * Segment loop part one: iterate from the base of the segment
4771 		 * to its end, pausing at each address boundary (baddr) between
4772 		 * ranges that have different virtual memory protections.
4773 		 */
4774 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4775 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4776 			ASSERT(baddr >= saddr && baddr <= eaddr);
4777 
4778 			/*
4779 			 * Segment loop part two: iterate from the current
4780 			 * position to the end of the protection boundary,
4781 			 * pausing at each address boundary (naddr) between
4782 			 * ranges that have different underlying page sizes.
4783 			 */
4784 			for (; saddr < baddr; saddr = naddr) {
4785 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4786 				ASSERT(naddr >= saddr && naddr <= baddr);
4787 
4788 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4789 
4790 				mp->pr_vaddr = (uintptr_t)saddr;
4791 				mp->pr_size = naddr - saddr;
4792 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4793 				mp->pr_mflags = 0;
4794 				if (prot & PROT_READ)
4795 					mp->pr_mflags |= MA_READ;
4796 				if (prot & PROT_WRITE)
4797 					mp->pr_mflags |= MA_WRITE;
4798 				if (prot & PROT_EXEC)
4799 					mp->pr_mflags |= MA_EXEC;
4800 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4801 					mp->pr_mflags |= MA_SHARED;
4802 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4803 					mp->pr_mflags |= MA_NORESERVE;
4804 				if (seg->s_ops == &segspt_shmops ||
4805 				    (seg->s_ops == &segvn_ops &&
4806 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4807 				    vp == NULL)))
4808 					mp->pr_mflags |= MA_ANON;
4809 				if (seg == brkseg)
4810 					mp->pr_mflags |= MA_BREAK;
4811 				else if (seg == stkseg)
4812 					mp->pr_mflags |= MA_STACK;
4813 				if (seg->s_ops == &segspt_shmops)
4814 					mp->pr_mflags |= MA_ISM | MA_SHM;
4815 
4816 				mp->pr_pagesize = PAGESIZE;
4817 				if (psz == -1) {
4818 					mp->pr_hatpagesize = 0;
4819 				} else {
4820 					mp->pr_hatpagesize = psz;
4821 				}
4822 
4823 				/*
4824 				 * Manufacture a filename for the "object" dir.
4825 				 */
4826 				mp->pr_dev = PRNODEV;
4827 				vattr.va_mask = AT_FSID|AT_NODEID;
4828 				if (seg->s_ops == &segvn_ops &&
4829 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4830 				    vp != NULL && vp->v_type == VREG &&
4831 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4832 				    NULL) == 0) {
4833 					mp->pr_dev = vattr.va_fsid;
4834 					mp->pr_ino = vattr.va_nodeid;
4835 					if (vp == p->p_exec)
4836 						(void) strcpy(mp->pr_mapname,
4837 						    "a.out");
4838 					else
4839 						pr_object_name(mp->pr_mapname,
4840 						    vp, &vattr);
4841 				}
4842 
4843 				/*
4844 				 * Get the SysV shared memory id, if any.
4845 				 */
4846 				if ((mp->pr_mflags & MA_SHARED) &&
4847 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4848 				    seg->s_base)) != SHMID_NONE) {
4849 					if (mp->pr_shmid == SHMID_FREE)
4850 						mp->pr_shmid = -1;
4851 
4852 					mp->pr_mflags |= MA_SHM;
4853 				} else {
4854 					mp->pr_shmid = -1;
4855 				}
4856 
4857 				npages = ((uintptr_t)(naddr - saddr)) >>
4858 				    PAGESHIFT;
4859 				parr = kmem_zalloc(npages, KM_SLEEP);
4860 
4861 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4862 
4863 				for (pagenum = 0; pagenum < npages; pagenum++) {
4864 					if (parr[pagenum] & SEG_PAGE_INCORE)
4865 						mp->pr_rss++;
4866 					if (parr[pagenum] & SEG_PAGE_ANON)
4867 						mp->pr_anon++;
4868 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4869 						mp->pr_locked++;
4870 				}
4871 				kmem_free(parr, npages);
4872 			}
4873 		}
4874 		ASSERT(tmp == NULL);
4875 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4876 
4877 	return (0);
4878 }
4879 
4880 /*
4881  * Return the process's credentials.  We don't need a 32-bit equivalent of
4882  * this function because prcred_t and prcred32_t are actually the same.
4883  */
4884 void
4885 prgetcred(proc_t *p, prcred_t *pcrp)
4886 {
4887 	mutex_enter(&p->p_crlock);
4888 	cred2prcred(p->p_cred, pcrp);
4889 	mutex_exit(&p->p_crlock);
4890 }
4891 
4892 void
4893 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4894 {
4895 	ASSERT(psfp != NULL);
4896 
4897 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4898 	psfp->pr_lower = p->p_secflags.psf_lower;
4899 	psfp->pr_upper = p->p_secflags.psf_upper;
4900 	psfp->pr_effective = p->p_secflags.psf_effective;
4901 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4902 }
4903 
4904 /*
4905  * Compute actual size of the prpriv_t structure.
4906  */
4907 
4908 size_t
4909 prgetprivsize(void)
4910 {
4911 	return (priv_prgetprivsize(NULL));
4912 }
4913 
4914 /*
4915  * Return the process's privileges.  We don't need a 32-bit equivalent of
4916  * this function because prpriv_t and prpriv32_t are actually the same.
4917  */
4918 void
4919 prgetpriv(proc_t *p, prpriv_t *pprp)
4920 {
4921 	mutex_enter(&p->p_crlock);
4922 	cred2prpriv(p->p_cred, pprp);
4923 	mutex_exit(&p->p_crlock);
4924 }
4925 
4926 #ifdef _SYSCALL32_IMPL
4927 /*
4928  * Return an array of structures with HAT memory map information.
4929  * We allocate here; the caller must deallocate.
4930  */
4931 int
4932 prgetxmap32(proc_t *p, list_t *iolhead)
4933 {
4934 	struct as *as = p->p_as;
4935 	prxmap32_t *mp;
4936 	struct seg *seg;
4937 	struct seg *brkseg, *stkseg;
4938 	struct vnode *vp;
4939 	struct vattr vattr;
4940 	uint_t prot;
4941 
4942 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4943 
4944 	/*
4945 	 * Request an initial buffer size that doesn't waste memory
4946 	 * if the address space has only a small number of segments.
4947 	 */
4948 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4949 
4950 	if ((seg = AS_SEGFIRST(as)) == NULL)
4951 		return (0);
4952 
4953 	brkseg = break_seg(p);
4954 	stkseg = as_segat(as, prgetstackbase(p));
4955 
4956 	do {
4957 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4958 		caddr_t saddr, naddr, baddr;
4959 		void *tmp = NULL;
4960 		ssize_t psz;
4961 		char *parr;
4962 		uint64_t npages;
4963 		uint64_t pagenum;
4964 
4965 		if ((seg->s_flags & S_HOLE) != 0) {
4966 			continue;
4967 		}
4968 
4969 		/*
4970 		 * Segment loop part one: iterate from the base of the segment
4971 		 * to its end, pausing at each address boundary (baddr) between
4972 		 * ranges that have different virtual memory protections.
4973 		 */
4974 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4975 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4976 			ASSERT(baddr >= saddr && baddr <= eaddr);
4977 
4978 			/*
4979 			 * Segment loop part two: iterate from the current
4980 			 * position to the end of the protection boundary,
4981 			 * pausing at each address boundary (naddr) between
4982 			 * ranges that have different underlying page sizes.
4983 			 */
4984 			for (; saddr < baddr; saddr = naddr) {
4985 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4986 				ASSERT(naddr >= saddr && naddr <= baddr);
4987 
4988 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4989 
4990 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4991 				mp->pr_size = (size32_t)(naddr - saddr);
4992 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4993 				mp->pr_mflags = 0;
4994 				if (prot & PROT_READ)
4995 					mp->pr_mflags |= MA_READ;
4996 				if (prot & PROT_WRITE)
4997 					mp->pr_mflags |= MA_WRITE;
4998 				if (prot & PROT_EXEC)
4999 					mp->pr_mflags |= MA_EXEC;
5000 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
5001 					mp->pr_mflags |= MA_SHARED;
5002 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
5003 					mp->pr_mflags |= MA_NORESERVE;
5004 				if (seg->s_ops == &segspt_shmops ||
5005 				    (seg->s_ops == &segvn_ops &&
5006 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
5007 				    vp == NULL)))
5008 					mp->pr_mflags |= MA_ANON;
5009 				if (seg == brkseg)
5010 					mp->pr_mflags |= MA_BREAK;
5011 				else if (seg == stkseg)
5012 					mp->pr_mflags |= MA_STACK;
5013 				if (seg->s_ops == &segspt_shmops)
5014 					mp->pr_mflags |= MA_ISM | MA_SHM;
5015 
5016 				mp->pr_pagesize = PAGESIZE;
5017 				if (psz == -1) {
5018 					mp->pr_hatpagesize = 0;
5019 				} else {
5020 					mp->pr_hatpagesize = psz;
5021 				}
5022 
5023 				/*
5024 				 * Manufacture a filename for the "object" dir.
5025 				 */
5026 				mp->pr_dev = PRNODEV32;
5027 				vattr.va_mask = AT_FSID|AT_NODEID;
5028 				if (seg->s_ops == &segvn_ops &&
5029 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
5030 				    vp != NULL && vp->v_type == VREG &&
5031 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
5032 				    NULL) == 0) {
5033 					(void) cmpldev(&mp->pr_dev,
5034 					    vattr.va_fsid);
5035 					mp->pr_ino = vattr.va_nodeid;
5036 					if (vp == p->p_exec)
5037 						(void) strcpy(mp->pr_mapname,
5038 						    "a.out");
5039 					else
5040 						pr_object_name(mp->pr_mapname,
5041 						    vp, &vattr);
5042 				}
5043 
5044 				/*
5045 				 * Get the SysV shared memory id, if any.
5046 				 */
5047 				if ((mp->pr_mflags & MA_SHARED) &&
5048 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
5049 				    seg->s_base)) != SHMID_NONE) {
5050 					if (mp->pr_shmid == SHMID_FREE)
5051 						mp->pr_shmid = -1;
5052 
5053 					mp->pr_mflags |= MA_SHM;
5054 				} else {
5055 					mp->pr_shmid = -1;
5056 				}
5057 
5058 				npages = ((uintptr_t)(naddr - saddr)) >>
5059 				    PAGESHIFT;
5060 				parr = kmem_zalloc(npages, KM_SLEEP);
5061 
5062 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
5063 
5064 				for (pagenum = 0; pagenum < npages; pagenum++) {
5065 					if (parr[pagenum] & SEG_PAGE_INCORE)
5066 						mp->pr_rss++;
5067 					if (parr[pagenum] & SEG_PAGE_ANON)
5068 						mp->pr_anon++;
5069 					if (parr[pagenum] & SEG_PAGE_LOCKED)
5070 						mp->pr_locked++;
5071 				}
5072 				kmem_free(parr, npages);
5073 			}
5074 		}
5075 		ASSERT(tmp == NULL);
5076 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5077 
5078 	return (0);
5079 }
5080 #endif	/* _SYSCALL32_IMPL */
5081