xref: /illumos-gate/usr/src/uts/common/fs/proc/prvnops.c (revision b3783300013fa93b98278c901b855062f538f7e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  * Copyright (c) 2017 by Delphix. All rights reserved.
26  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27  * Copyright 2022 MNX Cloud, Inc.
28  * Copyright 2023 Oxide Computer Company
29  */
30 
31 /*	Copyright (c) 1984,	 1986, 1987, 1988, 1989 AT&T	*/
32 /*	  All Rights Reserved	*/
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/time.h>
37 #include <sys/cred.h>
38 #include <sys/policy.h>
39 #include <sys/debug.h>
40 #include <sys/dirent.h>
41 #include <sys/errno.h>
42 #include <sys/file.h>
43 #include <sys/inline.h>
44 #include <sys/kmem.h>
45 #include <sys/pathname.h>
46 #include <sys/proc.h>
47 #include <sys/brand.h>
48 #include <sys/signal.h>
49 #include <sys/stat.h>
50 #include <sys/sysmacros.h>
51 #include <sys/systm.h>
52 #include <sys/zone.h>
53 #include <sys/uio.h>
54 #include <sys/var.h>
55 #include <sys/mode.h>
56 #include <sys/poll.h>
57 #include <sys/user.h>
58 #include <sys/vfs.h>
59 #include <sys/vfs_opreg.h>
60 #include <sys/gfs.h>
61 #include <sys/vnode.h>
62 #include <sys/fault.h>
63 #include <sys/syscall.h>
64 #include <sys/procfs.h>
65 #include <sys/atomic.h>
66 #include <sys/cmn_err.h>
67 #include <sys/contract_impl.h>
68 #include <sys/ctfs.h>
69 #include <sys/avl.h>
70 #include <sys/ctype.h>
71 #include <fs/fs_subr.h>
72 #include <vm/rm.h>
73 #include <vm/as.h>
74 #include <vm/seg.h>
75 #include <vm/seg_vn.h>
76 #include <vm/hat.h>
77 #include <fs/proc/prdata.h>
78 #if defined(__sparc)
79 #include <sys/regset.h>
80 #endif
81 #if defined(__x86)
82 #include <sys/sysi86.h>
83 #endif
84 
85 /*
86  * Created by prinit.
87  */
88 vnodeops_t *prvnodeops;
89 
90 /*
91  * Directory characteristics (patterned after the s5 file system).
92  */
93 #define	PRROOTINO	2
94 
95 #define	PRDIRSIZE	14
96 struct prdirect {
97 	ushort_t	d_ino;
98 	char		d_name[PRDIRSIZE];
99 };
100 
101 #define	PRSDSIZE	(sizeof (struct prdirect))
102 
103 /*
104  * Directory characteristics.
105  */
106 typedef struct prdirent {
107 	ino64_t		d_ino;		/* "inode number" of entry */
108 	off64_t		d_off;		/* offset of disk directory entry */
109 	unsigned short	d_reclen;	/* length of this record */
110 	char		d_name[14];	/* name of file */
111 } prdirent_t;
112 
113 /*
114  * Contents of a /proc/<pid> directory.
115  * Reuse d_ino field for the /proc file type.
116  */
117 static prdirent_t piddir[] = {
118 	{ PR_PIDDIR,	 1 * sizeof (prdirent_t), sizeof (prdirent_t),
119 		"." },
120 	{ PR_PROCDIR,	 2 * sizeof (prdirent_t), sizeof (prdirent_t),
121 		".." },
122 	{ PR_AS,	 3 * sizeof (prdirent_t), sizeof (prdirent_t),
123 		"as" },
124 	{ PR_CTL,	 4 * sizeof (prdirent_t), sizeof (prdirent_t),
125 		"ctl" },
126 	{ PR_STATUS,	 5 * sizeof (prdirent_t), sizeof (prdirent_t),
127 		"status" },
128 	{ PR_LSTATUS,	 6 * sizeof (prdirent_t), sizeof (prdirent_t),
129 		"lstatus" },
130 	{ PR_PSINFO,	 7 * sizeof (prdirent_t), sizeof (prdirent_t),
131 		"psinfo" },
132 	{ PR_LPSINFO,	 8 * sizeof (prdirent_t), sizeof (prdirent_t),
133 		"lpsinfo" },
134 	{ PR_MAP,	 9 * sizeof (prdirent_t), sizeof (prdirent_t),
135 		"map" },
136 	{ PR_RMAP,	10 * sizeof (prdirent_t), sizeof (prdirent_t),
137 		"rmap" },
138 	{ PR_XMAP,	11 * sizeof (prdirent_t), sizeof (prdirent_t),
139 		"xmap" },
140 	{ PR_CRED,	12 * sizeof (prdirent_t), sizeof (prdirent_t),
141 		"cred" },
142 	{ PR_SIGACT,	13 * sizeof (prdirent_t), sizeof (prdirent_t),
143 		"sigact" },
144 	{ PR_AUXV,	14 * sizeof (prdirent_t), sizeof (prdirent_t),
145 		"auxv" },
146 	{ PR_USAGE,	15 * sizeof (prdirent_t), sizeof (prdirent_t),
147 		"usage" },
148 	{ PR_LUSAGE,	16 * sizeof (prdirent_t), sizeof (prdirent_t),
149 		"lusage" },
150 	{ PR_PAGEDATA,	17 * sizeof (prdirent_t), sizeof (prdirent_t),
151 		"pagedata" },
152 	{ PR_WATCH,	18 * sizeof (prdirent_t), sizeof (prdirent_t),
153 		"watch" },
154 	{ PR_CURDIR,	19 * sizeof (prdirent_t), sizeof (prdirent_t),
155 		"cwd" },
156 	{ PR_ROOTDIR,	20 * sizeof (prdirent_t), sizeof (prdirent_t),
157 		"root" },
158 	{ PR_FDDIR,	21 * sizeof (prdirent_t), sizeof (prdirent_t),
159 		"fd" },
160 	{ PR_FDINFODIR,	22 * sizeof (prdirent_t), sizeof (prdirent_t),
161 		"fdinfo" },
162 	{ PR_OBJECTDIR,	23 * sizeof (prdirent_t), sizeof (prdirent_t),
163 		"object" },
164 	{ PR_LWPDIR,	24 * sizeof (prdirent_t), sizeof (prdirent_t),
165 		"lwp" },
166 	{ PR_PRIV,	25 * sizeof (prdirent_t), sizeof (prdirent_t),
167 		"priv" },
168 	{ PR_PATHDIR,	26 * sizeof (prdirent_t), sizeof (prdirent_t),
169 		"path" },
170 	{ PR_CTDIR,	27 * sizeof (prdirent_t), sizeof (prdirent_t),
171 		"contracts" },
172 	{ PR_SECFLAGS,	28 * sizeof (prdirent_t), sizeof (prdirent_t),
173 		"secflags" },
174 #if defined(__x86)
175 	{ PR_LDT,	29 * sizeof (prdirent_t), sizeof (prdirent_t),
176 		"ldt" },
177 #endif
178 };
179 
180 #define	NPIDDIRFILES	(sizeof (piddir) / sizeof (piddir[0]) - 2)
181 
182 /*
183  * Contents of a /proc/<pid>/lwp/<lwpid> directory.
184  */
185 static prdirent_t lwpiddir[] = {
186 	{ PR_LWPIDDIR,	 1 * sizeof (prdirent_t), sizeof (prdirent_t),
187 		"." },
188 	{ PR_LWPDIR,	 2 * sizeof (prdirent_t), sizeof (prdirent_t),
189 		".." },
190 	{ PR_LWPCTL,	 3 * sizeof (prdirent_t), sizeof (prdirent_t),
191 		"lwpctl" },
192 	{ PR_LWPNAME,	 4 * sizeof (prdirent_t), sizeof (prdirent_t),
193 		"lwpname" },
194 	{ PR_LWPSTATUS,	 5 * sizeof (prdirent_t), sizeof (prdirent_t),
195 		"lwpstatus" },
196 	{ PR_LWPSINFO,	 6 * sizeof (prdirent_t), sizeof (prdirent_t),
197 		"lwpsinfo" },
198 	{ PR_LWPUSAGE,	 7 * sizeof (prdirent_t), sizeof (prdirent_t),
199 		"lwpusage" },
200 	{ PR_XREGS,	 8 * sizeof (prdirent_t), sizeof (prdirent_t),
201 		"xregs" },
202 	{ PR_TMPLDIR,	 9 * sizeof (prdirent_t), sizeof (prdirent_t),
203 		"templates" },
204 	{ PR_SPYMASTER,	 10 * sizeof (prdirent_t), sizeof (prdirent_t),
205 		"spymaster" },
206 #if defined(__sparc)
207 	{ PR_GWINDOWS,	11 * sizeof (prdirent_t), sizeof (prdirent_t),
208 		"gwindows" },
209 	{ PR_ASRS,	12 * sizeof (prdirent_t), sizeof (prdirent_t),
210 		"asrs" },
211 #endif
212 };
213 
214 #define	NLWPIDDIRFILES	(sizeof (lwpiddir) / sizeof (lwpiddir[0]) - 2)
215 
216 /*
217  * Span of entries in the array files (lstatus, lpsinfo, lusage).
218  * We make the span larger than the size of the structure on purpose,
219  * to make sure that programs cannot use the structure size by mistake.
220  * Align _ILP32 structures at 8 bytes, _LP64 structures at 16 bytes.
221  */
222 #ifdef _LP64
223 #define	LSPAN(type)	(round16(sizeof (type)) + 16)
224 #define	LSPAN32(type)	(round8(sizeof (type)) + 8)
225 #else
226 #define	LSPAN(type)	(round8(sizeof (type)) + 8)
227 #endif
228 
229 static void rebuild_objdir(struct as *);
230 static void prfreecommon(prcommon_t *);
231 static int praccess(vnode_t *, int, int, cred_t *, caller_context_t *);
232 
233 static int
234 propen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
235 {
236 	vnode_t *vp = *vpp;
237 	prnode_t *pnp = VTOP(vp);
238 	prcommon_t *pcp = pnp->pr_pcommon;
239 	prnodetype_t type = pnp->pr_type;
240 	vnode_t *rvp;
241 	vtype_t vtype;
242 	proc_t *p;
243 	int error = 0;
244 	prnode_t *npnp = NULL;
245 
246 	/*
247 	 * Nothing to do for the /proc directory itself.
248 	 */
249 	if (type == PR_PROCDIR)
250 		return (0);
251 
252 	/*
253 	 * If we are opening an underlying mapped object, reject opens
254 	 * for writing regardless of the objects's access modes.
255 	 * If we are opening a file in the /proc/pid/fd directory,
256 	 * reject the open for any but a regular file or directory.
257 	 * Just do it if we are opening the current or root directory.
258 	 */
259 	switch (type) {
260 	case PR_OBJECT:
261 	case PR_FD:
262 	case PR_CURDIR:
263 	case PR_ROOTDIR:
264 		rvp = pnp->pr_realvp;
265 		vtype = rvp->v_type;
266 		if ((type == PR_OBJECT && (flag & FWRITE)) ||
267 		    (type == PR_FD && vtype != VREG && vtype != VDIR))
268 			error = EACCES;
269 		else {
270 			/*
271 			 * Need to hold rvp since VOP_OPEN() may release it.
272 			 */
273 			VN_HOLD(rvp);
274 			error = VOP_OPEN(&rvp, flag, cr, ct);
275 			if (error) {
276 				VN_RELE(rvp);
277 			} else {
278 				*vpp = rvp;
279 				VN_RELE(vp);
280 			}
281 		}
282 		return (error);
283 	default:
284 		break;
285 	}
286 
287 	/*
288 	 * If we are opening the pagedata file, allocate a prnode now
289 	 * to avoid calling kmem_alloc() while holding p->p_lock.
290 	 */
291 	if (type == PR_PAGEDATA || type == PR_OPAGEDATA)
292 		npnp = prgetnode(vp, type);
293 
294 	/*
295 	 * If the process exists, lock it now.
296 	 * Otherwise we have a race condition with prclose().
297 	 */
298 	p = pr_p_lock(pnp);
299 	mutex_exit(&pr_pidlock);
300 	if (p == NULL) {
301 		if (npnp != NULL)
302 			prfreenode(npnp);
303 		return (ENOENT);
304 	}
305 	ASSERT(p == pcp->prc_proc);
306 	ASSERT(p->p_proc_flag & P_PR_LOCK);
307 
308 	/*
309 	 * Maintain a count of opens for write.  Allow exactly one
310 	 * O_WRITE|O_EXCL request and fail subsequent ones.
311 	 * Don't fail opens of old (bletch!) /proc lwp files.
312 	 * Special case for open by the process itself:
313 	 * Always allow the open by self and discount this
314 	 * open for other opens for writing.
315 	 */
316 	if (flag & FWRITE) {
317 		if (p == curproc) {
318 			pcp->prc_selfopens++;
319 			pnp->pr_flags |= PR_ISSELF;
320 		} else if (type == PR_LWPIDFILE) {
321 			/* EMPTY */;
322 		} else if (flag & FEXCL) {
323 			if (pcp->prc_writers > pcp->prc_selfopens) {
324 				error = EBUSY;
325 				goto out;
326 			}
327 			/* semantic for old /proc interface */
328 			if (type == PR_PIDDIR)
329 				pcp->prc_flags |= PRC_EXCL;
330 		} else if (pcp->prc_flags & PRC_EXCL) {
331 			ASSERT(pcp->prc_writers > pcp->prc_selfopens);
332 			error = secpolicy_proc_excl_open(cr);
333 			if (error)
334 				goto out;
335 		}
336 		pcp->prc_writers++;
337 		/*
338 		 * The vnode may have become invalid between the
339 		 * VOP_LOOKUP() of the /proc vnode and the VOP_OPEN().
340 		 * If so, do now what prinvalidate() should have done.
341 		 */
342 		if ((pnp->pr_flags & PR_INVAL) ||
343 		    (type == PR_PIDDIR &&
344 		    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
345 			if (p != curproc)
346 				pcp->prc_selfopens++;
347 			ASSERT(pcp->prc_selfopens <= pcp->prc_writers);
348 			if (pcp->prc_selfopens == pcp->prc_writers)
349 				pcp->prc_flags &= ~PRC_EXCL;
350 		}
351 	}
352 
353 	/*
354 	 * If this is a large file open, indicate that in our flags -- some
355 	 * procfs structures are not off_t-neutral (e.g., priovec_t), and
356 	 * the open will need to be differentiated where 32-bit processes
357 	 * pass these structures across the user/kernel boundary.
358 	 */
359 	if (flag & FOFFMAX)
360 		pnp->pr_flags |= PR_OFFMAX;
361 
362 	/*
363 	 * Do file-specific things.
364 	 */
365 	switch (type) {
366 	default:
367 		break;
368 	case PR_PAGEDATA:
369 	case PR_OPAGEDATA:
370 		/*
371 		 * Enable data collection for page data file;
372 		 * get unique id from the hat layer.
373 		 */
374 		{
375 			int id;
376 
377 			/*
378 			 * Drop p->p_lock to call hat_startstat()
379 			 */
380 			mutex_exit(&p->p_lock);
381 			if ((p->p_flag & SSYS) || p->p_as == &kas ||
382 			    (id = hat_startstat(p->p_as)) == -1) {
383 				mutex_enter(&p->p_lock);
384 				error = ENOMEM;
385 			} else if (pnp->pr_hatid == 0) {
386 				mutex_enter(&p->p_lock);
387 				pnp->pr_hatid = (uint_t)id;
388 			} else {
389 				mutex_enter(&p->p_lock);
390 				/*
391 				 * Use our newly allocated prnode.
392 				 */
393 				npnp->pr_hatid = (uint_t)id;
394 				/*
395 				 * prgetnode() initialized most of the prnode.
396 				 * Duplicate the remainder.
397 				 */
398 				npnp->pr_ino = pnp->pr_ino;
399 				npnp->pr_common = pnp->pr_common;
400 				npnp->pr_pcommon = pnp->pr_pcommon;
401 				npnp->pr_parent = pnp->pr_parent;
402 				VN_HOLD(npnp->pr_parent);
403 				npnp->pr_index = pnp->pr_index;
404 
405 				npnp->pr_next = p->p_plist;
406 				p->p_plist = PTOV(npnp);
407 
408 				VN_RELE(PTOV(pnp));
409 				pnp = npnp;
410 				npnp = NULL;
411 				*vpp = PTOV(pnp);
412 			}
413 		}
414 		break;
415 	}
416 
417 out:
418 	prunlock(pnp);
419 
420 	if (npnp != NULL)
421 		prfreenode(npnp);
422 	return (error);
423 }
424 
425 /* ARGSUSED */
426 static int
427 prclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
428     caller_context_t *ct)
429 {
430 	prnode_t *pnp = VTOP(vp);
431 	prcommon_t *pcp = pnp->pr_pcommon;
432 	prnodetype_t type = pnp->pr_type;
433 	proc_t *p;
434 	kthread_t *t;
435 	user_t *up;
436 
437 	/*
438 	 * Nothing to do for the /proc directory itself.
439 	 */
440 	if (type == PR_PROCDIR)
441 		return (0);
442 
443 	ASSERT(type != PR_OBJECT && type != PR_FD &&
444 	    type != PR_CURDIR && type != PR_ROOTDIR);
445 
446 	/*
447 	 * If the process exists, lock it now.
448 	 * Otherwise we have a race condition with propen().
449 	 * Hold pr_pidlock across the reference to prc_selfopens,
450 	 * and prc_writers in case there is no process anymore,
451 	 * to cover the case of concurrent calls to prclose()
452 	 * after the process has been reaped by freeproc().
453 	 */
454 	p = pr_p_lock(pnp);
455 
456 	/*
457 	 * There is nothing more to do until the last close of
458 	 * the file table entry except to clear the pr_owner
459 	 * field of the prnode and notify any waiters
460 	 * (their file descriptor may have just been closed).
461 	 */
462 	if (count > 1) {
463 		mutex_exit(&pr_pidlock);
464 		if (pnp->pr_owner == curproc && !fisopen(vp))
465 			pnp->pr_owner = NULL;
466 		if (p != NULL) {
467 			prnotify(vp);
468 			prunlock(pnp);
469 		}
470 		return (0);
471 	}
472 
473 	/*
474 	 * Decrement the count of self-opens for writing.
475 	 * Decrement the total count of opens for writing.
476 	 * Cancel exclusive opens when only self-opens remain.
477 	 */
478 	if (flag & FWRITE) {
479 		/*
480 		 * prc_selfopens also contains the count of
481 		 * invalid writers.  See prinvalidate().
482 		 */
483 		if ((pnp->pr_flags & (PR_ISSELF|PR_INVAL)) ||
484 		    (type == PR_PIDDIR &&
485 		    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
486 			ASSERT(pcp->prc_selfopens != 0);
487 			--pcp->prc_selfopens;
488 		}
489 		ASSERT(pcp->prc_writers != 0);
490 		if (--pcp->prc_writers == pcp->prc_selfopens)
491 			pcp->prc_flags &= ~PRC_EXCL;
492 	}
493 	ASSERT(pcp->prc_writers >= pcp->prc_selfopens);
494 	mutex_exit(&pr_pidlock);
495 	if (pnp->pr_owner == curproc && !fisopen(vp))
496 		pnp->pr_owner = NULL;
497 
498 	/*
499 	 * If there is no process, there is nothing more to do.
500 	 */
501 	if (p == NULL)
502 		return (0);
503 
504 	ASSERT(p == pcp->prc_proc);
505 	prnotify(vp);	/* notify waiters */
506 
507 	/*
508 	 * Do file-specific things.
509 	 */
510 	switch (type) {
511 	default:
512 		break;
513 	case PR_PAGEDATA:
514 	case PR_OPAGEDATA:
515 		/*
516 		 * This is a page data file.
517 		 * Free the hat level statistics.
518 		 * Drop p->p_lock before calling hat_freestat().
519 		 */
520 		mutex_exit(&p->p_lock);
521 		if (p->p_as != &kas && pnp->pr_hatid != 0)
522 			hat_freestat(p->p_as, pnp->pr_hatid);
523 		mutex_enter(&p->p_lock);
524 		pnp->pr_hatid = 0;
525 		break;
526 	}
527 
528 	/*
529 	 * On last close of all writable file descriptors,
530 	 * perform run-on-last-close and/or kill-on-last-close logic.
531 	 * Can't do this is the /proc agent lwp still exists.
532 	 */
533 	if (pcp->prc_writers == 0 &&
534 	    p->p_agenttp == NULL &&
535 	    !(pcp->prc_flags & PRC_DESTROY) &&
536 	    p->p_stat != SZOMB &&
537 	    (p->p_proc_flag & (P_PR_RUNLCL|P_PR_KILLCL))) {
538 		int killproc;
539 
540 		/*
541 		 * Cancel any watchpoints currently in effect.
542 		 * The process might disappear during this operation.
543 		 */
544 		if (pr_cancel_watch(pnp) == NULL)
545 			return (0);
546 		/*
547 		 * If any tracing flags are set, clear them.
548 		 */
549 		if (p->p_proc_flag & P_PR_TRACE) {
550 			up = PTOU(p);
551 			premptyset(&up->u_entrymask);
552 			premptyset(&up->u_exitmask);
553 			up->u_systrap = 0;
554 		}
555 		premptyset(&p->p_sigmask);
556 		premptyset(&p->p_fltmask);
557 		killproc = (p->p_proc_flag & P_PR_KILLCL);
558 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
559 		/*
560 		 * Cancel any outstanding single-step requests.
561 		 */
562 		if ((t = p->p_tlist) != NULL) {
563 			/*
564 			 * Drop p_lock because prnostep() touches the stack.
565 			 * The loop is safe because the process is P_PR_LOCK'd.
566 			 */
567 			mutex_exit(&p->p_lock);
568 			do {
569 				prnostep(ttolwp(t));
570 			} while ((t = t->t_forw) != p->p_tlist);
571 			mutex_enter(&p->p_lock);
572 		}
573 		/*
574 		 * Set runnable all lwps stopped by /proc.
575 		 */
576 		if (killproc)
577 			sigtoproc(p, NULL, SIGKILL);
578 		else
579 			allsetrun(p);
580 	}
581 
582 	prunlock(pnp);
583 	return (0);
584 }
585 
586 /*
587  * Array of read functions, indexed by /proc file type.
588  */
589 static int pr_read_inval(), pr_read_as(), pr_read_status(),
590 	pr_read_lstatus(), pr_read_psinfo(), pr_read_lpsinfo(),
591 	pr_read_map(), pr_read_rmap(), pr_read_xmap(),
592 	pr_read_cred(), pr_read_sigact(), pr_read_auxv(),
593 #if defined(__x86)
594 	pr_read_ldt(),
595 #endif
596 	pr_read_usage(), pr_read_lusage(), pr_read_pagedata(),
597 	pr_read_watch(), pr_read_lwpstatus(), pr_read_lwpsinfo(),
598 	pr_read_lwpusage(), pr_read_lwpname(),
599 	pr_read_xregs(), pr_read_priv(),
600 	pr_read_spymaster(), pr_read_secflags(),
601 #if defined(__sparc)
602 	pr_read_gwindows(), pr_read_asrs(),
603 #endif
604 	pr_read_piddir(), pr_read_pidfile(), pr_read_opagedata(),
605 	pr_read_fdinfo();
606 
607 static int (*pr_read_function[PR_NFILES])() = {
608 	pr_read_inval,		/* /proc				*/
609 	pr_read_inval,		/* /proc/self				*/
610 	pr_read_piddir,		/* /proc/<pid> (old /proc read())	*/
611 	pr_read_as,		/* /proc/<pid>/as			*/
612 	pr_read_inval,		/* /proc/<pid>/ctl			*/
613 	pr_read_status,		/* /proc/<pid>/status			*/
614 	pr_read_lstatus,	/* /proc/<pid>/lstatus			*/
615 	pr_read_psinfo,		/* /proc/<pid>/psinfo			*/
616 	pr_read_lpsinfo,	/* /proc/<pid>/lpsinfo			*/
617 	pr_read_map,		/* /proc/<pid>/map			*/
618 	pr_read_rmap,		/* /proc/<pid>/rmap			*/
619 	pr_read_xmap,		/* /proc/<pid>/xmap			*/
620 	pr_read_cred,		/* /proc/<pid>/cred			*/
621 	pr_read_sigact,		/* /proc/<pid>/sigact			*/
622 	pr_read_auxv,		/* /proc/<pid>/auxv			*/
623 #if defined(__x86)
624 	pr_read_ldt,		/* /proc/<pid>/ldt			*/
625 #endif
626 	pr_read_usage,		/* /proc/<pid>/usage			*/
627 	pr_read_lusage,		/* /proc/<pid>/lusage			*/
628 	pr_read_pagedata,	/* /proc/<pid>/pagedata			*/
629 	pr_read_watch,		/* /proc/<pid>/watch			*/
630 	pr_read_inval,		/* /proc/<pid>/cwd			*/
631 	pr_read_inval,		/* /proc/<pid>/root			*/
632 	pr_read_inval,		/* /proc/<pid>/fd			*/
633 	pr_read_inval,		/* /proc/<pid>/fd/nn			*/
634 	pr_read_inval,		/* /proc/<pid>/fdinfo			*/
635 	pr_read_fdinfo,		/* /proc/<pid>/fdinfo/nn		*/
636 	pr_read_inval,		/* /proc/<pid>/object			*/
637 	pr_read_inval,		/* /proc/<pid>/object/xxx		*/
638 	pr_read_inval,		/* /proc/<pid>/lwp			*/
639 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>		*/
640 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
641 	pr_read_lwpname,	/* /proc/<pid>/lwp/<lwpid>/lwpname	*/
642 	pr_read_lwpstatus,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
643 	pr_read_lwpsinfo,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
644 	pr_read_lwpusage,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
645 	pr_read_xregs,		/* /proc/<pid>/lwp/<lwpid>/xregs	*/
646 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates	*/
647 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
648 	pr_read_spymaster,	/* /proc/<pid>/lwp/<lwpid>/spymaster	*/
649 #if defined(__sparc)
650 	pr_read_gwindows,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
651 	pr_read_asrs,		/* /proc/<pid>/lwp/<lwpid>/asrs		*/
652 #endif
653 	pr_read_priv,		/* /proc/<pid>/priv			*/
654 	pr_read_inval,		/* /proc/<pid>/path			*/
655 	pr_read_inval,		/* /proc/<pid>/path/xxx			*/
656 	pr_read_inval,		/* /proc/<pid>/contracts		*/
657 	pr_read_inval,		/* /proc/<pid>/contracts/<ctid>		*/
658 	pr_read_secflags,	/* /proc/<pid>/secflags			*/
659 	pr_read_pidfile,	/* old process file			*/
660 	pr_read_pidfile,	/* old lwp file				*/
661 	pr_read_opagedata,	/* old pagedata file			*/
662 };
663 
664 /* ARGSUSED */
665 static int
666 pr_read_inval(prnode_t *pnp, uio_t *uiop, cred_t *cr)
667 {
668 	/*
669 	 * No read() on any /proc directory, use getdents(2) instead.
670 	 * Cannot read a control file either.
671 	 * An underlying mapped object file cannot get here.
672 	 */
673 	return (EINVAL);
674 }
675 
676 static int
677 pr_uioread(void *base, long count, uio_t *uiop)
678 {
679 	int error = 0;
680 
681 	ASSERT(count >= 0);
682 	count -= uiop->uio_offset;
683 	if (count > 0 && uiop->uio_offset >= 0) {
684 		error = uiomove((char *)base + uiop->uio_offset,
685 		    count, UIO_READ, uiop);
686 	}
687 
688 	return (error);
689 }
690 
691 static int
692 pr_read_as(prnode_t *pnp, uio_t *uiop)
693 {
694 	int error;
695 
696 	ASSERT(pnp->pr_type == PR_AS);
697 
698 	if ((error = prlock(pnp, ZNO)) == 0) {
699 		proc_t *p = pnp->pr_common->prc_proc;
700 		struct as *as = p->p_as;
701 
702 		/*
703 		 * /proc I/O cannot be done to a system process.
704 		 * A 32-bit process cannot read a 64-bit process.
705 		 */
706 		if ((p->p_flag & SSYS) || as == &kas) {
707 			error = 0;
708 #ifdef _SYSCALL32_IMPL
709 		} else if (curproc->p_model == DATAMODEL_ILP32 &&
710 		    PROCESS_NOT_32BIT(p)) {
711 			error = EOVERFLOW;
712 #endif
713 		} else {
714 			/*
715 			 * We don't hold p_lock over an i/o operation because
716 			 * that could lead to deadlock with the clock thread.
717 			 */
718 			mutex_exit(&p->p_lock);
719 			error = prusrio(p, UIO_READ, uiop, 0);
720 			mutex_enter(&p->p_lock);
721 		}
722 		prunlock(pnp);
723 	}
724 
725 	return (error);
726 }
727 
728 static int
729 pr_read_status(prnode_t *pnp, uio_t *uiop, cred_t *cr)
730 {
731 	pstatus_t *sp;
732 	int error;
733 
734 	ASSERT(pnp->pr_type == PR_STATUS);
735 
736 	/*
737 	 * We kmem_alloc() the pstatus structure because
738 	 * it is so big it might blow the kernel stack.
739 	 */
740 	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
741 	if ((error = prlock(pnp, ZNO)) == 0) {
742 		prgetstatus(pnp->pr_common->prc_proc, sp, VTOZONE(PTOV(pnp)));
743 		prunlock(pnp);
744 		error = pr_uioread(sp, sizeof (*sp), uiop);
745 	}
746 	kmem_free(sp, sizeof (*sp));
747 	return (error);
748 }
749 
750 static int
751 pr_read_lstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
752 {
753 	proc_t *p;
754 	kthread_t *t;
755 	lwpdir_t *ldp;
756 	size_t size;
757 	prheader_t *php;
758 	lwpstatus_t *sp;
759 	int error;
760 	int nlwp;
761 	int i;
762 
763 	ASSERT(pnp->pr_type == PR_LSTATUS);
764 
765 	if ((error = prlock(pnp, ZNO)) != 0)
766 		return (error);
767 	p = pnp->pr_common->prc_proc;
768 	nlwp = p->p_lwpcnt;
769 	size = sizeof (prheader_t) + nlwp * LSPAN(lwpstatus_t);
770 
771 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
772 	mutex_exit(&p->p_lock);
773 	php = kmem_zalloc(size, KM_SLEEP);
774 	mutex_enter(&p->p_lock);
775 	/* p->p_lwpcnt can't change while process is locked */
776 	ASSERT(nlwp == p->p_lwpcnt);
777 
778 	php->pr_nent = nlwp;
779 	php->pr_entsize = LSPAN(lwpstatus_t);
780 
781 	sp = (lwpstatus_t *)(php + 1);
782 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
783 		if (ldp->ld_entry == NULL ||
784 		    (t = ldp->ld_entry->le_thread) == NULL)
785 			continue;
786 		prgetlwpstatus(t, sp, VTOZONE(PTOV(pnp)));
787 		sp = (lwpstatus_t *)((caddr_t)sp + LSPAN(lwpstatus_t));
788 	}
789 	prunlock(pnp);
790 
791 	error = pr_uioread(php, size, uiop);
792 	kmem_free(php, size);
793 	return (error);
794 }
795 
796 static int
797 pr_read_psinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
798 {
799 	psinfo_t psinfo;
800 	proc_t *p;
801 	int error = 0;
802 
803 	ASSERT(pnp->pr_type == PR_PSINFO);
804 
805 	/*
806 	 * We don't want the full treatment of prlock(pnp) here.
807 	 * This file is world-readable and never goes invalid.
808 	 * It doesn't matter if we are in the middle of an exec().
809 	 */
810 	p = pr_p_lock(pnp);
811 	mutex_exit(&pr_pidlock);
812 	if (p == NULL)
813 		error = ENOENT;
814 	else {
815 		ASSERT(p == pnp->pr_common->prc_proc);
816 		prgetpsinfo(p, &psinfo);
817 		prunlock(pnp);
818 		error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
819 	}
820 	return (error);
821 }
822 
823 static int
824 pr_read_fdinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
825 {
826 	prfdinfo_t *fdinfo;
827 	list_t data;
828 	proc_t *p;
829 	uint_t fd;
830 	file_t *fp;
831 	short ufp_flag;
832 	int error = 0;
833 
834 	ASSERT(pnp->pr_type == PR_FDINFO);
835 
836 	/*
837 	 * This is a guess at the size of the structure that needs to
838 	 * be returned. It's a balance between not allocating too much more
839 	 * space than is required and not requiring too many subsequent
840 	 * reallocations. Allocate it before acquiring the process lock.
841 	 */
842 	pr_iol_initlist(&data, sizeof (prfdinfo_t) + MAXPATHLEN + 2, 1);
843 
844 	if ((error = prlock(pnp, ZNO)) != 0) {
845 		pr_iol_freelist(&data);
846 		return (error);
847 	}
848 
849 	p = pnp->pr_common->prc_proc;
850 
851 	if ((p->p_flag & SSYS) || p->p_as == &kas) {
852 		prunlock(pnp);
853 		pr_iol_freelist(&data);
854 		return (0);
855 	}
856 
857 	fd = pnp->pr_index;
858 
859 	/* Fetch and lock the file_t for this descriptor */
860 	fp = pr_getf(p, fd, &ufp_flag);
861 
862 	if (fp == NULL) {
863 		error = ENOENT;
864 		prunlock(pnp);
865 		goto out;
866 	}
867 
868 	/*
869 	 * For fdinfo, we don't want to include the placeholder pr_misc at the
870 	 * end of the struct. We'll terminate the data with an empty pr_misc
871 	 * header before returning.
872 	 */
873 
874 	fdinfo = pr_iol_newbuf(&data, offsetof(prfdinfo_t, pr_misc));
875 	fdinfo->pr_fd = fd;
876 	fdinfo->pr_fdflags = ufp_flag;
877 	fdinfo->pr_fileflags = fp->f_flag2 << 16 | fp->f_flag;
878 	if ((fdinfo->pr_fileflags & (FSEARCH | FEXEC)) == 0)
879 		fdinfo->pr_fileflags += FOPEN;
880 	fdinfo->pr_offset = fp->f_offset;
881 	/*
882 	 * Information from the vnode (rather than the file_t) is retrieved
883 	 * later, in prgetfdinfo() - for example sock_getfasync()
884 	 */
885 
886 	prunlock(pnp);
887 
888 	error = prgetfdinfo(p, fp->f_vnode, fdinfo, cr, fp->f_cred, &data);
889 
890 	pr_releasef(fp);
891 
892 out:
893 	if (error == 0)
894 		error = pr_iol_uiomove_and_free(&data, uiop, error);
895 	else
896 		pr_iol_freelist(&data);
897 
898 	return (error);
899 }
900 
901 static int
902 pr_read_lpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
903 {
904 	proc_t *p;
905 	kthread_t *t;
906 	lwpdir_t *ldp;
907 	lwpent_t *lep;
908 	size_t size;
909 	prheader_t *php;
910 	lwpsinfo_t *sp;
911 	int error;
912 	int nlwp;
913 	int i;
914 
915 	ASSERT(pnp->pr_type == PR_LPSINFO);
916 
917 	/*
918 	 * We don't want the full treatment of prlock(pnp) here.
919 	 * This file is world-readable and never goes invalid.
920 	 * It doesn't matter if we are in the middle of an exec().
921 	 */
922 	p = pr_p_lock(pnp);
923 	mutex_exit(&pr_pidlock);
924 	if (p == NULL)
925 		return (ENOENT);
926 	ASSERT(p == pnp->pr_common->prc_proc);
927 	if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
928 		prunlock(pnp);
929 		return (ENOENT);
930 	}
931 	size = sizeof (prheader_t) + nlwp * LSPAN(lwpsinfo_t);
932 
933 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
934 	mutex_exit(&p->p_lock);
935 	php = kmem_zalloc(size, KM_SLEEP);
936 	mutex_enter(&p->p_lock);
937 	/* p->p_lwpcnt can't change while process is locked */
938 	ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);
939 
940 	php->pr_nent = nlwp;
941 	php->pr_entsize = LSPAN(lwpsinfo_t);
942 
943 	sp = (lwpsinfo_t *)(php + 1);
944 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
945 		if ((lep = ldp->ld_entry) == NULL)
946 			continue;
947 		if ((t = lep->le_thread) != NULL)
948 			prgetlwpsinfo(t, sp);
949 		else {
950 			bzero(sp, sizeof (*sp));
951 			sp->pr_lwpid = lep->le_lwpid;
952 			sp->pr_state = SZOMB;
953 			sp->pr_sname = 'Z';
954 			sp->pr_start.tv_sec = lep->le_start;
955 			sp->pr_bindpro = PBIND_NONE;
956 			sp->pr_bindpset = PS_NONE;
957 		}
958 		sp = (lwpsinfo_t *)((caddr_t)sp + LSPAN(lwpsinfo_t));
959 	}
960 	prunlock(pnp);
961 
962 	error = pr_uioread(php, size, uiop);
963 	kmem_free(php, size);
964 	return (error);
965 }
966 
967 static int
968 pr_read_map_common(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
969 {
970 	proc_t *p;
971 	struct as *as;
972 	list_t iolhead;
973 	int error;
974 
975 readmap_common:
976 	if ((error = prlock(pnp, ZNO)) != 0)
977 		return (error);
978 
979 	p = pnp->pr_common->prc_proc;
980 	as = p->p_as;
981 
982 	if ((p->p_flag & SSYS) || as == &kas) {
983 		prunlock(pnp);
984 		return (0);
985 	}
986 
987 	if (!AS_LOCK_TRYENTER(as, RW_WRITER)) {
988 		prunlock(pnp);
989 		delay(1);
990 		goto readmap_common;
991 	}
992 	mutex_exit(&p->p_lock);
993 
994 	switch (type) {
995 	case PR_XMAP:
996 		error = prgetxmap(p, &iolhead);
997 		break;
998 	case PR_RMAP:
999 		error = prgetmap(p, 1, &iolhead);
1000 		break;
1001 	case PR_MAP:
1002 		error = prgetmap(p, 0, &iolhead);
1003 		break;
1004 	}
1005 
1006 	AS_LOCK_EXIT(as);
1007 	mutex_enter(&p->p_lock);
1008 	prunlock(pnp);
1009 
1010 	error = pr_iol_uiomove_and_free(&iolhead, uiop, error);
1011 
1012 	return (error);
1013 }
1014 
1015 static int
1016 pr_read_map(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1017 {
1018 	ASSERT(pnp->pr_type == PR_MAP);
1019 	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
1020 }
1021 
1022 static int
1023 pr_read_rmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1024 {
1025 	ASSERT(pnp->pr_type == PR_RMAP);
1026 	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
1027 }
1028 
1029 static int
1030 pr_read_xmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1031 {
1032 	ASSERT(pnp->pr_type == PR_XMAP);
1033 	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
1034 }
1035 
1036 static int
1037 pr_read_cred(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1038 {
1039 	proc_t *p;
1040 	prcred_t *pcrp;
1041 	int error;
1042 	size_t count;
1043 
1044 	ASSERT(pnp->pr_type == PR_CRED);
1045 
1046 	/*
1047 	 * We kmem_alloc() the prcred_t structure because
1048 	 * the number of supplementary groups is variable.
1049 	 */
1050 	pcrp =
1051 	    kmem_zalloc(sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1),
1052 	    KM_SLEEP);
1053 
1054 	if ((error = prlock(pnp, ZNO)) != 0)
1055 		goto out;
1056 	p = pnp->pr_common->prc_proc;
1057 	ASSERT(p != NULL);
1058 
1059 	prgetcred(p, pcrp);
1060 	prunlock(pnp);
1061 
1062 	count = sizeof (prcred_t);
1063 	if (pcrp->pr_ngroups > 1)
1064 		count += sizeof (gid_t) * (pcrp->pr_ngroups - 1);
1065 	error = pr_uioread(pcrp, count, uiop);
1066 out:
1067 	kmem_free(pcrp, sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1));
1068 	return (error);
1069 }
1070 
1071 static int
1072 pr_read_priv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1073 {
1074 	proc_t *p;
1075 	size_t psize = prgetprivsize();
1076 	prpriv_t *ppriv = kmem_zalloc(psize, KM_SLEEP);
1077 	int error;
1078 
1079 	ASSERT(pnp->pr_type == PR_PRIV);
1080 
1081 	if ((error = prlock(pnp, ZNO)) != 0)
1082 		goto out;
1083 	p = pnp->pr_common->prc_proc;
1084 	ASSERT(p != NULL);
1085 
1086 	prgetpriv(p, ppriv);
1087 	prunlock(pnp);
1088 
1089 	error = pr_uioread(ppriv, psize, uiop);
1090 out:
1091 	kmem_free(ppriv, psize);
1092 	return (error);
1093 }
1094 
1095 static int
1096 pr_read_sigact(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1097 {
1098 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1099 	proc_t *p;
1100 	struct sigaction *sap;
1101 	int sig;
1102 	int error;
1103 	user_t *up;
1104 
1105 	ASSERT(pnp->pr_type == PR_SIGACT);
1106 
1107 	/*
1108 	 * We kmem_alloc() the sigaction array because
1109 	 * it is so big it might blow the kernel stack.
1110 	 */
1111 	sap = kmem_alloc((nsig-1) * sizeof (struct sigaction), KM_SLEEP);
1112 
1113 	if ((error = prlock(pnp, ZNO)) != 0)
1114 		goto out;
1115 	p = pnp->pr_common->prc_proc;
1116 	ASSERT(p != NULL);
1117 
1118 	if (uiop->uio_offset >= (nsig-1)*sizeof (struct sigaction)) {
1119 		prunlock(pnp);
1120 		goto out;
1121 	}
1122 
1123 	up = PTOU(p);
1124 	for (sig = 1; sig < nsig; sig++)
1125 		prgetaction(p, up, sig, &sap[sig-1]);
1126 	prunlock(pnp);
1127 
1128 	error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction), uiop);
1129 out:
1130 	kmem_free(sap, (nsig-1) * sizeof (struct sigaction));
1131 	return (error);
1132 }
1133 
1134 static int
1135 pr_read_auxv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1136 {
1137 	auxv_t auxv[__KERN_NAUXV_IMPL];
1138 	proc_t *p;
1139 	user_t *up;
1140 	int error;
1141 
1142 	ASSERT(pnp->pr_type == PR_AUXV);
1143 
1144 	if ((error = prlock(pnp, ZNO)) != 0)
1145 		return (error);
1146 
1147 	if (uiop->uio_offset >= sizeof (auxv)) {
1148 		prunlock(pnp);
1149 		return (0);
1150 	}
1151 
1152 	p = pnp->pr_common->prc_proc;
1153 	up = PTOU(p);
1154 	bcopy(up->u_auxv, auxv, sizeof (auxv));
1155 	prunlock(pnp);
1156 
1157 	return (pr_uioread(auxv, sizeof (auxv), uiop));
1158 }
1159 
1160 #if defined(__x86)
1161 /*
1162  * XX64
1163  *	This is almost certainly broken for the amd64 kernel, because
1164  *	we have two kinds of LDT structures to export -- one for compatibility
1165  *	mode, and one for long mode, sigh.
1166  *
1167  *	For now let's just have a ldt of size 0 for 64-bit processes.
1168  */
1169 static int
1170 pr_read_ldt(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1171 {
1172 	proc_t *p;
1173 	struct ssd *ssd;
1174 	size_t size;
1175 	int error;
1176 
1177 	ASSERT(pnp->pr_type == PR_LDT);
1178 
1179 	if ((error = prlock(pnp, ZNO)) != 0)
1180 		return (error);
1181 	p = pnp->pr_common->prc_proc;
1182 
1183 	mutex_exit(&p->p_lock);
1184 	mutex_enter(&p->p_ldtlock);
1185 	size = prnldt(p) * sizeof (struct ssd);
1186 	if (uiop->uio_offset >= size) {
1187 		mutex_exit(&p->p_ldtlock);
1188 		mutex_enter(&p->p_lock);
1189 		prunlock(pnp);
1190 		return (0);
1191 	}
1192 
1193 	ssd = kmem_alloc(size, KM_SLEEP);
1194 	prgetldt(p, ssd);
1195 	mutex_exit(&p->p_ldtlock);
1196 	mutex_enter(&p->p_lock);
1197 	prunlock(pnp);
1198 
1199 	error = pr_uioread(ssd, size, uiop);
1200 	kmem_free(ssd, size);
1201 	return (error);
1202 }
1203 #endif	/* __x86 */
1204 
1205 static int
1206 pr_read_usage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1207 {
1208 	prhusage_t *pup;
1209 	prusage_t *upup;
1210 	proc_t *p;
1211 	kthread_t *t;
1212 	int error;
1213 
1214 	ASSERT(pnp->pr_type == PR_USAGE);
1215 
1216 	/* allocate now, before locking the process */
1217 	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
1218 	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
1219 
1220 	/*
1221 	 * We don't want the full treatment of prlock(pnp) here.
1222 	 * This file is world-readable and never goes invalid.
1223 	 * It doesn't matter if we are in the middle of an exec().
1224 	 */
1225 	p = pr_p_lock(pnp);
1226 	mutex_exit(&pr_pidlock);
1227 	if (p == NULL) {
1228 		error = ENOENT;
1229 		goto out;
1230 	}
1231 	ASSERT(p == pnp->pr_common->prc_proc);
1232 
1233 	if (uiop->uio_offset >= sizeof (prusage_t)) {
1234 		prunlock(pnp);
1235 		error = 0;
1236 		goto out;
1237 	}
1238 
1239 	pup->pr_tstamp = gethrtime();
1240 
1241 	pup->pr_count  = p->p_defunct;
1242 	pup->pr_create = p->p_mstart;
1243 	pup->pr_term   = p->p_mterm;
1244 
1245 	pup->pr_rtime    = p->p_mlreal;
1246 	pup->pr_utime    = p->p_acct[LMS_USER];
1247 	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
1248 	pup->pr_ttime    = p->p_acct[LMS_TRAP];
1249 	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
1250 	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
1251 	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
1252 	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
1253 	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
1254 	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
1255 	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
1256 
1257 	pup->pr_minf  = p->p_ru.minflt;
1258 	pup->pr_majf  = p->p_ru.majflt;
1259 	pup->pr_nswap = p->p_ru.nswap;
1260 	pup->pr_inblk = p->p_ru.inblock;
1261 	pup->pr_oublk = p->p_ru.oublock;
1262 	pup->pr_msnd  = p->p_ru.msgsnd;
1263 	pup->pr_mrcv  = p->p_ru.msgrcv;
1264 	pup->pr_sigs  = p->p_ru.nsignals;
1265 	pup->pr_vctx  = p->p_ru.nvcsw;
1266 	pup->pr_ictx  = p->p_ru.nivcsw;
1267 	pup->pr_sysc  = p->p_ru.sysc;
1268 	pup->pr_ioch  = p->p_ru.ioch;
1269 
1270 	/*
1271 	 * Add the usage information for each active lwp.
1272 	 */
1273 	if ((t = p->p_tlist) != NULL &&
1274 	    !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
1275 		do {
1276 			if (t->t_proc_flag & TP_LWPEXIT)
1277 				continue;
1278 			pup->pr_count++;
1279 			praddusage(t, pup);
1280 		} while ((t = t->t_forw) != p->p_tlist);
1281 	}
1282 
1283 	prunlock(pnp);
1284 
1285 	prcvtusage(pup, upup);
1286 
1287 	error = pr_uioread(upup, sizeof (prusage_t), uiop);
1288 out:
1289 	kmem_free(pup, sizeof (*pup));
1290 	kmem_free(upup, sizeof (*upup));
1291 	return (error);
1292 }
1293 
1294 static int
1295 pr_read_lusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1296 {
1297 	int nlwp;
1298 	prhusage_t *pup;
1299 	prheader_t *php;
1300 	prusage_t *upup;
1301 	size_t size;
1302 	hrtime_t curtime;
1303 	proc_t *p;
1304 	kthread_t *t;
1305 	lwpdir_t *ldp;
1306 	int error;
1307 	int i;
1308 
1309 	ASSERT(pnp->pr_type == PR_LUSAGE);
1310 
1311 	/*
1312 	 * We don't want the full treatment of prlock(pnp) here.
1313 	 * This file is world-readable and never goes invalid.
1314 	 * It doesn't matter if we are in the middle of an exec().
1315 	 */
1316 	p = pr_p_lock(pnp);
1317 	mutex_exit(&pr_pidlock);
1318 	if (p == NULL)
1319 		return (ENOENT);
1320 	ASSERT(p == pnp->pr_common->prc_proc);
1321 	if ((nlwp = p->p_lwpcnt) == 0) {
1322 		prunlock(pnp);
1323 		return (ENOENT);
1324 	}
1325 
1326 	size = sizeof (prheader_t) + (nlwp + 1) * LSPAN(prusage_t);
1327 	if (uiop->uio_offset >= size) {
1328 		prunlock(pnp);
1329 		return (0);
1330 	}
1331 
1332 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1333 	mutex_exit(&p->p_lock);
1334 	pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
1335 	mutex_enter(&p->p_lock);
1336 	/* p->p_lwpcnt can't change while process is locked */
1337 	ASSERT(nlwp == p->p_lwpcnt);
1338 
1339 	php = (prheader_t *)(pup + 1);
1340 	upup = (prusage_t *)(php + 1);
1341 
1342 	php->pr_nent = nlwp + 1;
1343 	php->pr_entsize = LSPAN(prusage_t);
1344 
1345 	curtime = gethrtime();
1346 
1347 	/*
1348 	 * First the summation over defunct lwps.
1349 	 */
1350 	pup->pr_count  = p->p_defunct;
1351 	pup->pr_tstamp = curtime;
1352 	pup->pr_create = p->p_mstart;
1353 	pup->pr_term   = p->p_mterm;
1354 
1355 	pup->pr_rtime    = p->p_mlreal;
1356 	pup->pr_utime    = p->p_acct[LMS_USER];
1357 	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
1358 	pup->pr_ttime    = p->p_acct[LMS_TRAP];
1359 	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
1360 	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
1361 	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
1362 	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
1363 	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
1364 	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
1365 	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
1366 
1367 	pup->pr_minf  = p->p_ru.minflt;
1368 	pup->pr_majf  = p->p_ru.majflt;
1369 	pup->pr_nswap = p->p_ru.nswap;
1370 	pup->pr_inblk = p->p_ru.inblock;
1371 	pup->pr_oublk = p->p_ru.oublock;
1372 	pup->pr_msnd  = p->p_ru.msgsnd;
1373 	pup->pr_mrcv  = p->p_ru.msgrcv;
1374 	pup->pr_sigs  = p->p_ru.nsignals;
1375 	pup->pr_vctx  = p->p_ru.nvcsw;
1376 	pup->pr_ictx  = p->p_ru.nivcsw;
1377 	pup->pr_sysc  = p->p_ru.sysc;
1378 	pup->pr_ioch  = p->p_ru.ioch;
1379 
1380 	prcvtusage(pup, upup);
1381 
1382 	/*
1383 	 * Fill one prusage struct for each active lwp.
1384 	 */
1385 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
1386 		if (ldp->ld_entry == NULL ||
1387 		    (t = ldp->ld_entry->le_thread) == NULL)
1388 			continue;
1389 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1390 		ASSERT(nlwp > 0);
1391 		--nlwp;
1392 		upup = (prusage_t *)((caddr_t)upup + LSPAN(prusage_t));
1393 		prgetusage(t, pup);
1394 		prcvtusage(pup, upup);
1395 	}
1396 	ASSERT(nlwp == 0);
1397 
1398 	prunlock(pnp);
1399 
1400 	error = pr_uioread(php, size, uiop);
1401 	kmem_free(pup, size + sizeof (prhusage_t));
1402 	return (error);
1403 }
1404 
1405 static int
1406 pr_read_pagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1407 {
1408 	proc_t *p;
1409 	int error;
1410 
1411 	ASSERT(pnp->pr_type == PR_PAGEDATA);
1412 
1413 	if ((error = prlock(pnp, ZNO)) != 0)
1414 		return (error);
1415 
1416 	p = pnp->pr_common->prc_proc;
1417 	if ((p->p_flag & SSYS) || p->p_as == &kas) {
1418 		prunlock(pnp);
1419 		return (0);
1420 	}
1421 
1422 	mutex_exit(&p->p_lock);
1423 	error = prpdread(p, pnp->pr_hatid, uiop);
1424 	mutex_enter(&p->p_lock);
1425 
1426 	prunlock(pnp);
1427 	return (error);
1428 }
1429 
1430 static int
1431 pr_read_opagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1432 {
1433 	proc_t *p;
1434 	struct as *as;
1435 	int error;
1436 
1437 	ASSERT(pnp->pr_type == PR_OPAGEDATA);
1438 
1439 	if ((error = prlock(pnp, ZNO)) != 0)
1440 		return (error);
1441 
1442 	p = pnp->pr_common->prc_proc;
1443 	as = p->p_as;
1444 	if ((p->p_flag & SSYS) || as == &kas) {
1445 		prunlock(pnp);
1446 		return (0);
1447 	}
1448 
1449 	mutex_exit(&p->p_lock);
1450 	error = oprpdread(as, pnp->pr_hatid, uiop);
1451 	mutex_enter(&p->p_lock);
1452 
1453 	prunlock(pnp);
1454 	return (error);
1455 }
1456 
1457 static int
1458 pr_read_watch(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1459 {
1460 	proc_t *p;
1461 	int error;
1462 	prwatch_t *Bpwp;
1463 	size_t size;
1464 	prwatch_t *pwp;
1465 	int nwarea;
1466 	struct watched_area *pwarea;
1467 
1468 	ASSERT(pnp->pr_type == PR_WATCH);
1469 
1470 	if ((error = prlock(pnp, ZNO)) != 0)
1471 		return (error);
1472 
1473 	p = pnp->pr_common->prc_proc;
1474 	nwarea = avl_numnodes(&p->p_warea);
1475 	size = nwarea * sizeof (prwatch_t);
1476 	if (uiop->uio_offset >= size) {
1477 		prunlock(pnp);
1478 		return (0);
1479 	}
1480 
1481 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1482 	mutex_exit(&p->p_lock);
1483 	Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
1484 	mutex_enter(&p->p_lock);
1485 	/* p->p_nwarea can't change while process is locked */
1486 	ASSERT(nwarea == avl_numnodes(&p->p_warea));
1487 
1488 	/* gather the watched areas */
1489 	for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
1490 	    pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
1491 		pwp->pr_vaddr = (uintptr_t)pwarea->wa_vaddr;
1492 		pwp->pr_size = pwarea->wa_eaddr - pwarea->wa_vaddr;
1493 		pwp->pr_wflags = (int)pwarea->wa_flags;
1494 	}
1495 
1496 	prunlock(pnp);
1497 
1498 	error = pr_uioread(Bpwp, size, uiop);
1499 	kmem_free(Bpwp, size);
1500 	return (error);
1501 }
1502 
1503 static int
1504 pr_read_lwpstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1505 {
1506 	lwpstatus_t *sp;
1507 	int error;
1508 
1509 	ASSERT(pnp->pr_type == PR_LWPSTATUS);
1510 
1511 	/*
1512 	 * We kmem_alloc() the lwpstatus structure because
1513 	 * it is so big it might blow the kernel stack.
1514 	 */
1515 	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
1516 
1517 	if ((error = prlock(pnp, ZNO)) != 0)
1518 		goto out;
1519 
1520 	if (uiop->uio_offset >= sizeof (*sp)) {
1521 		prunlock(pnp);
1522 		goto out;
1523 	}
1524 
1525 	prgetlwpstatus(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
1526 	prunlock(pnp);
1527 
1528 	error = pr_uioread(sp, sizeof (*sp), uiop);
1529 out:
1530 	kmem_free(sp, sizeof (*sp));
1531 	return (error);
1532 }
1533 
1534 static int
1535 pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1536 {
1537 	lwpsinfo_t lwpsinfo;
1538 	proc_t *p;
1539 	kthread_t *t;
1540 	lwpent_t *lep;
1541 
1542 	ASSERT(pnp->pr_type == PR_LWPSINFO);
1543 
1544 	/*
1545 	 * We don't want the full treatment of prlock(pnp) here.
1546 	 * This file is world-readable and never goes invalid.
1547 	 * It doesn't matter if we are in the middle of an exec().
1548 	 */
1549 	p = pr_p_lock(pnp);
1550 	mutex_exit(&pr_pidlock);
1551 	if (p == NULL)
1552 		return (ENOENT);
1553 	ASSERT(p == pnp->pr_common->prc_proc);
1554 	if (pnp->pr_common->prc_tslot == -1) {
1555 		prunlock(pnp);
1556 		return (ENOENT);
1557 	}
1558 
1559 	if (uiop->uio_offset >= sizeof (lwpsinfo)) {
1560 		prunlock(pnp);
1561 		return (0);
1562 	}
1563 
1564 	if ((t = pnp->pr_common->prc_thread) != NULL)
1565 		prgetlwpsinfo(t, &lwpsinfo);
1566 	else {
1567 		lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
1568 		bzero(&lwpsinfo, sizeof (lwpsinfo));
1569 		lwpsinfo.pr_lwpid = lep->le_lwpid;
1570 		lwpsinfo.pr_state = SZOMB;
1571 		lwpsinfo.pr_sname = 'Z';
1572 		lwpsinfo.pr_start.tv_sec = lep->le_start;
1573 		lwpsinfo.pr_bindpro = PBIND_NONE;
1574 		lwpsinfo.pr_bindpset = PS_NONE;
1575 	}
1576 	prunlock(pnp);
1577 
1578 	return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
1579 }
1580 
1581 static int
1582 pr_read_lwpusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1583 {
1584 	prhusage_t *pup;
1585 	prusage_t *upup;
1586 	proc_t *p;
1587 	int error;
1588 
1589 	ASSERT(pnp->pr_type == PR_LWPUSAGE);
1590 
1591 	/* allocate now, before locking the process */
1592 	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
1593 	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
1594 
1595 	/*
1596 	 * We don't want the full treatment of prlock(pnp) here.
1597 	 * This file is world-readable and never goes invalid.
1598 	 * It doesn't matter if we are in the middle of an exec().
1599 	 */
1600 	p = pr_p_lock(pnp);
1601 	mutex_exit(&pr_pidlock);
1602 	if (p == NULL) {
1603 		error = ENOENT;
1604 		goto out;
1605 	}
1606 	ASSERT(p == pnp->pr_common->prc_proc);
1607 	if (pnp->pr_common->prc_thread == NULL) {
1608 		prunlock(pnp);
1609 		error = ENOENT;
1610 		goto out;
1611 	}
1612 	if (uiop->uio_offset >= sizeof (prusage_t)) {
1613 		prunlock(pnp);
1614 		error = 0;
1615 		goto out;
1616 	}
1617 
1618 	pup->pr_tstamp = gethrtime();
1619 	prgetusage(pnp->pr_common->prc_thread, pup);
1620 
1621 	prunlock(pnp);
1622 
1623 	prcvtusage(pup, upup);
1624 
1625 	error = pr_uioread(upup, sizeof (prusage_t), uiop);
1626 out:
1627 	kmem_free(pup, sizeof (*pup));
1628 	kmem_free(upup, sizeof (*upup));
1629 	return (error);
1630 }
1631 
1632 static int
1633 pr_read_lwpname(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1634 {
1635 	char lwpname[THREAD_NAME_MAX];
1636 	kthread_t *t;
1637 	int error;
1638 
1639 	ASSERT(pnp->pr_type == PR_LWPNAME);
1640 
1641 	if (uiop->uio_offset >= THREAD_NAME_MAX)
1642 		return (0);
1643 
1644 	if ((error = prlock(pnp, ZNO)) != 0)
1645 		return (error);
1646 
1647 	bzero(lwpname, sizeof (lwpname));
1648 
1649 	t = pnp->pr_common->prc_thread;
1650 
1651 	if (t->t_name != NULL)
1652 		(void) strlcpy(lwpname, t->t_name, sizeof (lwpname));
1653 
1654 	prunlock(pnp);
1655 
1656 	return (pr_uioread(lwpname, sizeof (lwpname), uiop));
1657 }
1658 
1659 /* ARGSUSED */
1660 static int
1661 pr_read_xregs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1662 {
1663 	proc_t *p;
1664 	kthread_t *t;
1665 	int error;
1666 	void *xreg;
1667 	size_t size;
1668 
1669 	ASSERT(pnp->pr_type == PR_XREGS);
1670 
1671 	if ((error = prlock(pnp, ZNO)) != 0)
1672 		return (error);
1673 
1674 	p = pnp->pr_common->prc_proc;
1675 	t = pnp->pr_common->prc_thread;
1676 
1677 	/*
1678 	 * While we would prefer to do the allocation without holding the
1679 	 * process under a prlock(), we can only determine this size while
1680 	 * holding the process as the hold guarantees us:
1681 	 *
1682 	 *  o That the process in question actualy exists.
1683 	 *  o That the process in question cannot change the set of FPU features
1684 	 *    it has enabled.
1685 	 *
1686 	 * We will drop p_lock across the allocation call itself. This should be
1687 	 * safe as the enabled feature set should not change while the process
1688 	 * is locked (e.g. enabling extending FPU state like AMX on x86 should
1689 	 * require the process to be locked).
1690 	 */
1691 	size = prhasx(p) ? prgetprxregsize(p) : 0;
1692 	if (size == 0) {
1693 		prunlock(pnp);
1694 		return (0);
1695 	}
1696 
1697 	/*
1698 	 * To read the extended register set we require that the thread be
1699 	 * stopped as this state is only valid in the kernel when it is. An
1700 	 * exception made if the target thread and the current thread are one
1701 	 * and the same. We won't stop you from doing something... weird.
1702 	 */
1703 	thread_lock(t);
1704 	if (t != curthread && !ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1705 		thread_unlock(t);
1706 		prunlock(pnp);
1707 		return (EBUSY);
1708 	}
1709 	thread_unlock(t);
1710 
1711 	mutex_exit(&p->p_lock);
1712 	xreg = kmem_zalloc(size, KM_SLEEP);
1713 	mutex_enter(&p->p_lock);
1714 	ASSERT3U(size, ==, prgetprxregsize(p));
1715 
1716 	if (uiop->uio_offset >= size) {
1717 		prunlock(pnp);
1718 		goto out;
1719 	}
1720 
1721 	/* drop p->p_lock while (possibly) touching the stack */
1722 	mutex_exit(&p->p_lock);
1723 	prgetprxregs(ttolwp(t), xreg);
1724 	mutex_enter(&p->p_lock);
1725 	prunlock(pnp);
1726 
1727 	error = pr_uioread(xreg, size, uiop);
1728 out:
1729 	kmem_free(xreg, size);
1730 	return (error);
1731 }
1732 
1733 static int
1734 pr_read_spymaster(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1735 {
1736 	psinfo_t psinfo;
1737 	int error;
1738 	klwp_t *lwp;
1739 
1740 	ASSERT(pnp->pr_type == PR_SPYMASTER);
1741 
1742 	if ((error = prlock(pnp, ZNO)) != 0)
1743 		return (error);
1744 
1745 	if (pnp->pr_common->prc_thread == NULL) {
1746 		prunlock(pnp);
1747 		return (0);
1748 	}
1749 
1750 	lwp = pnp->pr_common->prc_thread->t_lwp;
1751 
1752 	if (lwp->lwp_spymaster == NULL) {
1753 		prunlock(pnp);
1754 		return (0);
1755 	}
1756 
1757 	bcopy(lwp->lwp_spymaster, &psinfo, sizeof (psinfo_t));
1758 	prunlock(pnp);
1759 
1760 	return (pr_uioread(&psinfo, sizeof (psinfo), uiop));
1761 }
1762 
1763 static int
1764 pr_read_secflags(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1765 {
1766 	prsecflags_t ret;
1767 	int error;
1768 	proc_t *p;
1769 
1770 	ASSERT(pnp->pr_type == PR_SECFLAGS);
1771 
1772 	if ((error = prlock(pnp, ZNO)) != 0)
1773 		return (error);
1774 
1775 	p = pnp->pr_common->prc_proc;
1776 	prgetsecflags(p, &ret);
1777 	prunlock(pnp);
1778 
1779 	return (pr_uioread(&ret, sizeof (ret), uiop));
1780 }
1781 
1782 #if defined(__sparc)
1783 
1784 static int
1785 pr_read_gwindows(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1786 {
1787 	proc_t *p;
1788 	kthread_t *t;
1789 	gwindows_t *gwp;
1790 	int error;
1791 	size_t size;
1792 
1793 	ASSERT(pnp->pr_type == PR_GWINDOWS);
1794 
1795 	gwp = kmem_zalloc(sizeof (gwindows_t), KM_SLEEP);
1796 
1797 	if ((error = prlock(pnp, ZNO)) != 0)
1798 		goto out;
1799 
1800 	p = pnp->pr_common->prc_proc;
1801 	t = pnp->pr_common->prc_thread;
1802 
1803 	/*
1804 	 * Drop p->p_lock while touching the stack.
1805 	 * The P_PR_LOCK flag prevents the lwp from
1806 	 * disappearing while we do this.
1807 	 */
1808 	mutex_exit(&p->p_lock);
1809 	if ((size = prnwindows(ttolwp(t))) != 0)
1810 		size = sizeof (gwindows_t) -
1811 		    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow);
1812 	if (uiop->uio_offset >= size) {
1813 		mutex_enter(&p->p_lock);
1814 		prunlock(pnp);
1815 		goto out;
1816 	}
1817 	prgetwindows(ttolwp(t), gwp);
1818 	mutex_enter(&p->p_lock);
1819 	prunlock(pnp);
1820 
1821 	error = pr_uioread(gwp, size, uiop);
1822 out:
1823 	kmem_free(gwp, sizeof (gwindows_t));
1824 	return (error);
1825 }
1826 
1827 /* ARGSUSED */
1828 static int
1829 pr_read_asrs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1830 {
1831 	int error;
1832 
1833 	ASSERT(pnp->pr_type == PR_ASRS);
1834 
1835 	/* the asrs file exists only for sparc v9 _LP64 processes */
1836 	if ((error = prlock(pnp, ZNO)) == 0) {
1837 		proc_t *p = pnp->pr_common->prc_proc;
1838 		kthread_t *t = pnp->pr_common->prc_thread;
1839 		asrset_t asrset;
1840 
1841 		if (p->p_model != DATAMODEL_LP64 ||
1842 		    uiop->uio_offset >= sizeof (asrset_t)) {
1843 			prunlock(pnp);
1844 			return (0);
1845 		}
1846 
1847 		/*
1848 		 * Drop p->p_lock while touching the stack.
1849 		 * The P_PR_LOCK flag prevents the lwp from
1850 		 * disappearing while we do this.
1851 		 */
1852 		mutex_exit(&p->p_lock);
1853 		prgetasregs(ttolwp(t), asrset);
1854 		mutex_enter(&p->p_lock);
1855 		prunlock(pnp);
1856 
1857 		error = pr_uioread(&asrset[0], sizeof (asrset_t), uiop);
1858 	}
1859 
1860 	return (error);
1861 }
1862 
1863 #endif	/* __sparc */
1864 
1865 static int
1866 pr_read_piddir(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1867 {
1868 	ASSERT(pnp->pr_type == PR_PIDDIR);
1869 	ASSERT(pnp->pr_pidfile != NULL);
1870 
1871 	/* use the underlying PR_PIDFILE to read the process */
1872 	pnp = VTOP(pnp->pr_pidfile);
1873 	ASSERT(pnp->pr_type == PR_PIDFILE);
1874 
1875 	return (pr_read_pidfile(pnp, uiop));
1876 }
1877 
1878 static int
1879 pr_read_pidfile(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1880 {
1881 	int error;
1882 
1883 	ASSERT(pnp->pr_type == PR_PIDFILE || pnp->pr_type == PR_LWPIDFILE);
1884 
1885 	if ((error = prlock(pnp, ZNO)) == 0) {
1886 		proc_t *p = pnp->pr_common->prc_proc;
1887 		struct as *as = p->p_as;
1888 
1889 		if ((p->p_flag & SSYS) || as == &kas) {
1890 			/*
1891 			 * /proc I/O cannot be done to a system process.
1892 			 */
1893 			error = EIO;	/* old /proc semantics */
1894 		} else {
1895 			/*
1896 			 * We drop p_lock because we don't want to hold
1897 			 * it over an I/O operation because that could
1898 			 * lead to deadlock with the clock thread.
1899 			 * The process will not disappear and its address
1900 			 * space will not change because it is marked P_PR_LOCK.
1901 			 */
1902 			mutex_exit(&p->p_lock);
1903 			error = prusrio(p, UIO_READ, uiop, 1);
1904 			mutex_enter(&p->p_lock);
1905 		}
1906 		prunlock(pnp);
1907 	}
1908 
1909 	return (error);
1910 }
1911 
1912 #ifdef _SYSCALL32_IMPL
1913 
1914 /*
1915  * Array of ILP32 read functions, indexed by /proc file type.
1916  */
1917 static int pr_read_status_32(),
1918 	pr_read_lstatus_32(), pr_read_psinfo_32(), pr_read_lpsinfo_32(),
1919 	pr_read_map_32(), pr_read_rmap_32(), pr_read_xmap_32(),
1920 	pr_read_sigact_32(), pr_read_auxv_32(),
1921 	pr_read_usage_32(), pr_read_lusage_32(), pr_read_pagedata_32(),
1922 	pr_read_watch_32(), pr_read_lwpstatus_32(), pr_read_lwpsinfo_32(),
1923 	pr_read_lwpusage_32(), pr_read_spymaster_32(),
1924 #if defined(__sparc)
1925 	pr_read_gwindows_32(),
1926 #endif
1927 	pr_read_opagedata_32();
1928 
1929 static int (*pr_read_function_32[PR_NFILES])() = {
1930 	pr_read_inval,		/* /proc				*/
1931 	pr_read_inval,		/* /proc/self				*/
1932 	pr_read_piddir,		/* /proc/<pid> (old /proc read())	*/
1933 	pr_read_as,		/* /proc/<pid>/as			*/
1934 	pr_read_inval,		/* /proc/<pid>/ctl			*/
1935 	pr_read_status_32,	/* /proc/<pid>/status			*/
1936 	pr_read_lstatus_32,	/* /proc/<pid>/lstatus			*/
1937 	pr_read_psinfo_32,	/* /proc/<pid>/psinfo			*/
1938 	pr_read_lpsinfo_32,	/* /proc/<pid>/lpsinfo			*/
1939 	pr_read_map_32,		/* /proc/<pid>/map			*/
1940 	pr_read_rmap_32,	/* /proc/<pid>/rmap			*/
1941 	pr_read_xmap_32,	/* /proc/<pid>/xmap			*/
1942 	pr_read_cred,		/* /proc/<pid>/cred			*/
1943 	pr_read_sigact_32,	/* /proc/<pid>/sigact			*/
1944 	pr_read_auxv_32,	/* /proc/<pid>/auxv			*/
1945 #if defined(__x86)
1946 	pr_read_ldt,		/* /proc/<pid>/ldt			*/
1947 #endif
1948 	pr_read_usage_32,	/* /proc/<pid>/usage			*/
1949 	pr_read_lusage_32,	/* /proc/<pid>/lusage			*/
1950 	pr_read_pagedata_32,	/* /proc/<pid>/pagedata			*/
1951 	pr_read_watch_32,	/* /proc/<pid>/watch			*/
1952 	pr_read_inval,		/* /proc/<pid>/cwd			*/
1953 	pr_read_inval,		/* /proc/<pid>/root			*/
1954 	pr_read_inval,		/* /proc/<pid>/fd			*/
1955 	pr_read_inval,		/* /proc/<pid>/fd/nn			*/
1956 	pr_read_inval,		/* /proc/<pid>/fdinfo			*/
1957 	pr_read_fdinfo,		/* /proc/<pid>/fdinfo/nn		*/
1958 	pr_read_inval,		/* /proc/<pid>/object			*/
1959 	pr_read_inval,		/* /proc/<pid>/object/xxx		*/
1960 	pr_read_inval,		/* /proc/<pid>/lwp			*/
1961 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>		*/
1962 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
1963 	pr_read_lwpname,	/* /proc/<pid>/lwp/<lwpid>/lwpname	*/
1964 	pr_read_lwpstatus_32,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
1965 	pr_read_lwpsinfo_32,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
1966 	pr_read_lwpusage_32,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
1967 	pr_read_xregs,		/* /proc/<pid>/lwp/<lwpid>/xregs	*/
1968 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates	*/
1969 	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
1970 	pr_read_spymaster_32,	/* /proc/<pid>/lwp/<lwpid>/spymaster	*/
1971 #if defined(__sparc)
1972 	pr_read_gwindows_32,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
1973 	pr_read_asrs,		/* /proc/<pid>/lwp/<lwpid>/asrs		*/
1974 #endif
1975 	pr_read_priv,		/* /proc/<pid>/priv			*/
1976 	pr_read_inval,		/* /proc/<pid>/path			*/
1977 	pr_read_inval,		/* /proc/<pid>/path/xxx			*/
1978 	pr_read_inval,		/* /proc/<pid>/contracts		*/
1979 	pr_read_inval,		/* /proc/<pid>/contracts/<ctid>		*/
1980 	pr_read_secflags,	/* /proc/<pid>/secflags			*/
1981 	pr_read_pidfile,	/* old process file			*/
1982 	pr_read_pidfile,	/* old lwp file				*/
1983 	pr_read_opagedata_32,	/* old pagedata file			*/
1984 };
1985 
1986 static int
1987 pr_read_status_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
1988 {
1989 	pstatus32_t *sp;
1990 	proc_t *p;
1991 	int error;
1992 
1993 	ASSERT(pnp->pr_type == PR_STATUS);
1994 
1995 	/*
1996 	 * We kmem_alloc() the pstatus structure because
1997 	 * it is so big it might blow the kernel stack.
1998 	 */
1999 	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
2000 	if ((error = prlock(pnp, ZNO)) == 0) {
2001 		/*
2002 		 * A 32-bit process cannot get the status of a 64-bit process.
2003 		 * The fields for the 64-bit quantities are not large enough.
2004 		 */
2005 		p = pnp->pr_common->prc_proc;
2006 		if (PROCESS_NOT_32BIT(p)) {
2007 			prunlock(pnp);
2008 			error = EOVERFLOW;
2009 		} else {
2010 			prgetstatus32(pnp->pr_common->prc_proc, sp,
2011 			    VTOZONE(PTOV(pnp)));
2012 			prunlock(pnp);
2013 			error = pr_uioread(sp, sizeof (*sp), uiop);
2014 		}
2015 	}
2016 	kmem_free((caddr_t)sp, sizeof (*sp));
2017 	return (error);
2018 }
2019 
2020 static int
2021 pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2022 {
2023 	proc_t *p;
2024 	kthread_t *t;
2025 	lwpdir_t *ldp;
2026 	size_t size;
2027 	prheader32_t *php;
2028 	lwpstatus32_t *sp;
2029 	int error;
2030 	int nlwp;
2031 	int i;
2032 
2033 	ASSERT(pnp->pr_type == PR_LSTATUS);
2034 
2035 	if ((error = prlock(pnp, ZNO)) != 0)
2036 		return (error);
2037 	p = pnp->pr_common->prc_proc;
2038 	/*
2039 	 * A 32-bit process cannot get the status of a 64-bit process.
2040 	 * The fields for the 64-bit quantities are not large enough.
2041 	 */
2042 	if (PROCESS_NOT_32BIT(p)) {
2043 		prunlock(pnp);
2044 		return (EOVERFLOW);
2045 	}
2046 	nlwp = p->p_lwpcnt;
2047 	size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpstatus32_t);
2048 
2049 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2050 	mutex_exit(&p->p_lock);
2051 	php = kmem_zalloc(size, KM_SLEEP);
2052 	mutex_enter(&p->p_lock);
2053 	/* p->p_lwpcnt can't change while process is locked */
2054 	ASSERT(nlwp == p->p_lwpcnt);
2055 
2056 	php->pr_nent = nlwp;
2057 	php->pr_entsize = LSPAN32(lwpstatus32_t);
2058 
2059 	sp = (lwpstatus32_t *)(php + 1);
2060 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
2061 		if (ldp->ld_entry == NULL ||
2062 		    (t = ldp->ld_entry->le_thread) == NULL)
2063 			continue;
2064 		prgetlwpstatus32(t, sp, VTOZONE(PTOV(pnp)));
2065 		sp = (lwpstatus32_t *)((caddr_t)sp + LSPAN32(lwpstatus32_t));
2066 	}
2067 	prunlock(pnp);
2068 
2069 	error = pr_uioread(php, size, uiop);
2070 	kmem_free(php, size);
2071 	return (error);
2072 }
2073 
2074 static int
2075 pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2076 {
2077 	psinfo32_t psinfo;
2078 	proc_t *p;
2079 	int error = 0;
2080 
2081 	ASSERT(pnp->pr_type == PR_PSINFO);
2082 
2083 	/*
2084 	 * We don't want the full treatment of prlock(pnp) here.
2085 	 * This file is world-readable and never goes invalid.
2086 	 * It doesn't matter if we are in the middle of an exec().
2087 	 */
2088 	p = pr_p_lock(pnp);
2089 	mutex_exit(&pr_pidlock);
2090 	if (p == NULL)
2091 		error = ENOENT;
2092 	else {
2093 		ASSERT(p == pnp->pr_common->prc_proc);
2094 		prgetpsinfo32(p, &psinfo);
2095 		prunlock(pnp);
2096 		error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
2097 	}
2098 	return (error);
2099 }
2100 
2101 static int
2102 pr_read_lpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2103 {
2104 	proc_t *p;
2105 	kthread_t *t;
2106 	lwpdir_t *ldp;
2107 	lwpent_t *lep;
2108 	size_t size;
2109 	prheader32_t *php;
2110 	lwpsinfo32_t *sp;
2111 	int error;
2112 	int nlwp;
2113 	int i;
2114 
2115 	ASSERT(pnp->pr_type == PR_LPSINFO);
2116 
2117 	/*
2118 	 * We don't want the full treatment of prlock(pnp) here.
2119 	 * This file is world-readable and never goes invalid.
2120 	 * It doesn't matter if we are in the middle of an exec().
2121 	 */
2122 	p = pr_p_lock(pnp);
2123 	mutex_exit(&pr_pidlock);
2124 	if (p == NULL)
2125 		return (ENOENT);
2126 	ASSERT(p == pnp->pr_common->prc_proc);
2127 	if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
2128 		prunlock(pnp);
2129 		return (ENOENT);
2130 	}
2131 	size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpsinfo32_t);
2132 
2133 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2134 	mutex_exit(&p->p_lock);
2135 	php = kmem_zalloc(size, KM_SLEEP);
2136 	mutex_enter(&p->p_lock);
2137 	/* p->p_lwpcnt can't change while process is locked */
2138 	ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);
2139 
2140 	php->pr_nent = nlwp;
2141 	php->pr_entsize = LSPAN32(lwpsinfo32_t);
2142 
2143 	sp = (lwpsinfo32_t *)(php + 1);
2144 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
2145 		if ((lep = ldp->ld_entry) == NULL)
2146 			continue;
2147 		if ((t = lep->le_thread) != NULL)
2148 			prgetlwpsinfo32(t, sp);
2149 		else {
2150 			bzero(sp, sizeof (*sp));
2151 			sp->pr_lwpid = lep->le_lwpid;
2152 			sp->pr_state = SZOMB;
2153 			sp->pr_sname = 'Z';
2154 			sp->pr_start.tv_sec = (time32_t)lep->le_start;
2155 		}
2156 		sp = (lwpsinfo32_t *)((caddr_t)sp + LSPAN32(lwpsinfo32_t));
2157 	}
2158 	prunlock(pnp);
2159 
2160 	error = pr_uioread(php, size, uiop);
2161 	kmem_free(php, size);
2162 	return (error);
2163 }
2164 
2165 static int
2166 pr_read_map_common_32(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
2167 {
2168 	proc_t *p;
2169 	struct as *as;
2170 	list_t	iolhead;
2171 	int error;
2172 
2173 readmap32_common:
2174 	if ((error = prlock(pnp, ZNO)) != 0)
2175 		return (error);
2176 
2177 	p = pnp->pr_common->prc_proc;
2178 	as = p->p_as;
2179 
2180 	if ((p->p_flag & SSYS) || as == &kas) {
2181 		prunlock(pnp);
2182 		return (0);
2183 	}
2184 
2185 	if (PROCESS_NOT_32BIT(p)) {
2186 		prunlock(pnp);
2187 		return (EOVERFLOW);
2188 	}
2189 
2190 	if (!AS_LOCK_TRYENTER(as, RW_WRITER)) {
2191 		prunlock(pnp);
2192 		delay(1);
2193 		goto readmap32_common;
2194 	}
2195 	mutex_exit(&p->p_lock);
2196 
2197 	switch (type) {
2198 	case PR_XMAP:
2199 		error = prgetxmap32(p, &iolhead);
2200 		break;
2201 	case PR_RMAP:
2202 		error = prgetmap32(p, 1, &iolhead);
2203 		break;
2204 	case PR_MAP:
2205 		error = prgetmap32(p, 0, &iolhead);
2206 		break;
2207 	}
2208 	AS_LOCK_EXIT(as);
2209 	mutex_enter(&p->p_lock);
2210 	prunlock(pnp);
2211 
2212 	error = pr_iol_uiomove_and_free(&iolhead, uiop, error);
2213 
2214 	return (error);
2215 }
2216 
2217 static int
2218 pr_read_map_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2219 {
2220 	ASSERT(pnp->pr_type == PR_MAP);
2221 	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2222 }
2223 
2224 static int
2225 pr_read_rmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2226 {
2227 	ASSERT(pnp->pr_type == PR_RMAP);
2228 	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2229 }
2230 
2231 static int
2232 pr_read_xmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2233 {
2234 	ASSERT(pnp->pr_type == PR_XMAP);
2235 	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2236 }
2237 
2238 static int
2239 pr_read_sigact_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2240 {
2241 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
2242 	proc_t *p;
2243 	struct sigaction32 *sap;
2244 	int sig;
2245 	int error;
2246 	user_t *up;
2247 
2248 	ASSERT(pnp->pr_type == PR_SIGACT);
2249 
2250 	/*
2251 	 * We kmem_alloc() the sigaction32 array because
2252 	 * it is so big it might blow the kernel stack.
2253 	 */
2254 	sap = kmem_alloc((nsig-1) * sizeof (struct sigaction32), KM_SLEEP);
2255 
2256 	if ((error = prlock(pnp, ZNO)) != 0)
2257 		goto out;
2258 	p = pnp->pr_common->prc_proc;
2259 
2260 	if (PROCESS_NOT_32BIT(p)) {
2261 		prunlock(pnp);
2262 		error = EOVERFLOW;
2263 		goto out;
2264 	}
2265 
2266 	if (uiop->uio_offset >= (nsig-1) * sizeof (struct sigaction32)) {
2267 		prunlock(pnp);
2268 		goto out;
2269 	}
2270 
2271 	up = PTOU(p);
2272 	for (sig = 1; sig < nsig; sig++)
2273 		prgetaction32(p, up, sig, &sap[sig-1]);
2274 	prunlock(pnp);
2275 
2276 	error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction32), uiop);
2277 out:
2278 	kmem_free(sap, (nsig-1) * sizeof (struct sigaction32));
2279 	return (error);
2280 }
2281 
2282 static int
2283 pr_read_auxv_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2284 {
2285 	auxv32_t auxv[__KERN_NAUXV_IMPL];
2286 	proc_t *p;
2287 	user_t *up;
2288 	int error;
2289 	int i;
2290 
2291 	ASSERT(pnp->pr_type == PR_AUXV);
2292 
2293 	if ((error = prlock(pnp, ZNO)) != 0)
2294 		return (error);
2295 	p = pnp->pr_common->prc_proc;
2296 
2297 	if (PROCESS_NOT_32BIT(p)) {
2298 		prunlock(pnp);
2299 		return (EOVERFLOW);
2300 	}
2301 
2302 	if (uiop->uio_offset >= sizeof (auxv)) {
2303 		prunlock(pnp);
2304 		return (0);
2305 	}
2306 
2307 	up = PTOU(p);
2308 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
2309 		auxv[i].a_type = (int32_t)up->u_auxv[i].a_type;
2310 		auxv[i].a_un.a_val = (int32_t)up->u_auxv[i].a_un.a_val;
2311 	}
2312 	prunlock(pnp);
2313 
2314 	return (pr_uioread(auxv, sizeof (auxv), uiop));
2315 }
2316 
2317 static int
2318 pr_read_usage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2319 {
2320 	prhusage_t *pup;
2321 	prusage32_t *upup;
2322 	proc_t *p;
2323 	kthread_t *t;
2324 	int error;
2325 
2326 	ASSERT(pnp->pr_type == PR_USAGE);
2327 
2328 	/* allocate now, before locking the process */
2329 	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
2330 	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
2331 
2332 	/*
2333 	 * We don't want the full treatment of prlock(pnp) here.
2334 	 * This file is world-readable and never goes invalid.
2335 	 * It doesn't matter if we are in the middle of an exec().
2336 	 */
2337 	p = pr_p_lock(pnp);
2338 	mutex_exit(&pr_pidlock);
2339 	if (p == NULL) {
2340 		error = ENOENT;
2341 		goto out;
2342 	}
2343 	ASSERT(p == pnp->pr_common->prc_proc);
2344 
2345 	if (uiop->uio_offset >= sizeof (prusage32_t)) {
2346 		prunlock(pnp);
2347 		error = 0;
2348 		goto out;
2349 	}
2350 
2351 	pup->pr_tstamp = gethrtime();
2352 
2353 	pup->pr_count  = p->p_defunct;
2354 	pup->pr_create = p->p_mstart;
2355 	pup->pr_term   = p->p_mterm;
2356 
2357 	pup->pr_rtime    = p->p_mlreal;
2358 	pup->pr_utime    = p->p_acct[LMS_USER];
2359 	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
2360 	pup->pr_ttime    = p->p_acct[LMS_TRAP];
2361 	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
2362 	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
2363 	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
2364 	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
2365 	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
2366 	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
2367 	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
2368 
2369 	pup->pr_minf  = p->p_ru.minflt;
2370 	pup->pr_majf  = p->p_ru.majflt;
2371 	pup->pr_nswap = p->p_ru.nswap;
2372 	pup->pr_inblk = p->p_ru.inblock;
2373 	pup->pr_oublk = p->p_ru.oublock;
2374 	pup->pr_msnd  = p->p_ru.msgsnd;
2375 	pup->pr_mrcv  = p->p_ru.msgrcv;
2376 	pup->pr_sigs  = p->p_ru.nsignals;
2377 	pup->pr_vctx  = p->p_ru.nvcsw;
2378 	pup->pr_ictx  = p->p_ru.nivcsw;
2379 	pup->pr_sysc  = p->p_ru.sysc;
2380 	pup->pr_ioch  = p->p_ru.ioch;
2381 
2382 	/*
2383 	 * Add the usage information for each active lwp.
2384 	 */
2385 	if ((t = p->p_tlist) != NULL &&
2386 	    !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
2387 		do {
2388 			if (t->t_proc_flag & TP_LWPEXIT)
2389 				continue;
2390 			pup->pr_count++;
2391 			praddusage(t, pup);
2392 		} while ((t = t->t_forw) != p->p_tlist);
2393 	}
2394 
2395 	prunlock(pnp);
2396 
2397 	prcvtusage32(pup, upup);
2398 
2399 	error = pr_uioread(upup, sizeof (prusage32_t), uiop);
2400 out:
2401 	kmem_free(pup, sizeof (*pup));
2402 	kmem_free(upup, sizeof (*upup));
2403 	return (error);
2404 }
2405 
2406 static int
2407 pr_read_lusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2408 {
2409 	int nlwp;
2410 	prhusage_t *pup;
2411 	prheader32_t *php;
2412 	prusage32_t *upup;
2413 	size_t size;
2414 	hrtime_t curtime;
2415 	proc_t *p;
2416 	kthread_t *t;
2417 	lwpdir_t *ldp;
2418 	int error;
2419 	int i;
2420 
2421 	ASSERT(pnp->pr_type == PR_LUSAGE);
2422 
2423 	/*
2424 	 * We don't want the full treatment of prlock(pnp) here.
2425 	 * This file is world-readable and never goes invalid.
2426 	 * It doesn't matter if we are in the middle of an exec().
2427 	 */
2428 	p = pr_p_lock(pnp);
2429 	mutex_exit(&pr_pidlock);
2430 	if (p == NULL)
2431 		return (ENOENT);
2432 	ASSERT(p == pnp->pr_common->prc_proc);
2433 	if ((nlwp = p->p_lwpcnt) == 0) {
2434 		prunlock(pnp);
2435 		return (ENOENT);
2436 	}
2437 
2438 	size = sizeof (prheader32_t) + (nlwp + 1) * LSPAN32(prusage32_t);
2439 	if (uiop->uio_offset >= size) {
2440 		prunlock(pnp);
2441 		return (0);
2442 	}
2443 
2444 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2445 	mutex_exit(&p->p_lock);
2446 	pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
2447 	mutex_enter(&p->p_lock);
2448 	/* p->p_lwpcnt can't change while process is locked */
2449 	ASSERT(nlwp == p->p_lwpcnt);
2450 
2451 	php = (prheader32_t *)(pup + 1);
2452 	upup = (prusage32_t *)(php + 1);
2453 
2454 	php->pr_nent = nlwp + 1;
2455 	php->pr_entsize = LSPAN32(prusage32_t);
2456 
2457 	curtime = gethrtime();
2458 
2459 	/*
2460 	 * First the summation over defunct lwps.
2461 	 */
2462 	pup->pr_count  = p->p_defunct;
2463 	pup->pr_tstamp = curtime;
2464 	pup->pr_create = p->p_mstart;
2465 	pup->pr_term   = p->p_mterm;
2466 
2467 	pup->pr_rtime    = p->p_mlreal;
2468 	pup->pr_utime    = p->p_acct[LMS_USER];
2469 	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
2470 	pup->pr_ttime    = p->p_acct[LMS_TRAP];
2471 	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
2472 	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
2473 	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
2474 	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
2475 	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
2476 	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
2477 	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
2478 
2479 	pup->pr_minf  = p->p_ru.minflt;
2480 	pup->pr_majf  = p->p_ru.majflt;
2481 	pup->pr_nswap = p->p_ru.nswap;
2482 	pup->pr_inblk = p->p_ru.inblock;
2483 	pup->pr_oublk = p->p_ru.oublock;
2484 	pup->pr_msnd  = p->p_ru.msgsnd;
2485 	pup->pr_mrcv  = p->p_ru.msgrcv;
2486 	pup->pr_sigs  = p->p_ru.nsignals;
2487 	pup->pr_vctx  = p->p_ru.nvcsw;
2488 	pup->pr_ictx  = p->p_ru.nivcsw;
2489 	pup->pr_sysc  = p->p_ru.sysc;
2490 	pup->pr_ioch  = p->p_ru.ioch;
2491 
2492 	prcvtusage32(pup, upup);
2493 
2494 	/*
2495 	 * Fill one prusage struct for each active lwp.
2496 	 */
2497 	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
2498 		if (ldp->ld_entry == NULL ||
2499 		    (t = ldp->ld_entry->le_thread) == NULL)
2500 			continue;
2501 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
2502 		ASSERT(nlwp > 0);
2503 		--nlwp;
2504 		upup = (prusage32_t *)
2505 		    ((caddr_t)upup + LSPAN32(prusage32_t));
2506 		prgetusage(t, pup);
2507 		prcvtusage32(pup, upup);
2508 	}
2509 	ASSERT(nlwp == 0);
2510 
2511 	prunlock(pnp);
2512 
2513 	error = pr_uioread(php, size, uiop);
2514 	kmem_free(pup, size + sizeof (prhusage_t));
2515 	return (error);
2516 }
2517 
2518 static int
2519 pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2520 {
2521 	proc_t *p;
2522 	int error;
2523 
2524 	ASSERT(pnp->pr_type == PR_PAGEDATA);
2525 
2526 	if ((error = prlock(pnp, ZNO)) != 0)
2527 		return (error);
2528 
2529 	p = pnp->pr_common->prc_proc;
2530 	if ((p->p_flag & SSYS) || p->p_as == &kas) {
2531 		prunlock(pnp);
2532 		return (0);
2533 	}
2534 
2535 	if (PROCESS_NOT_32BIT(p)) {
2536 		prunlock(pnp);
2537 		return (EOVERFLOW);
2538 	}
2539 
2540 	mutex_exit(&p->p_lock);
2541 	error = prpdread32(p, pnp->pr_hatid, uiop);
2542 	mutex_enter(&p->p_lock);
2543 
2544 	prunlock(pnp);
2545 	return (error);
2546 }
2547 
2548 static int
2549 pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2550 {
2551 	proc_t *p;
2552 	struct as *as;
2553 	int error;
2554 
2555 	ASSERT(pnp->pr_type == PR_OPAGEDATA);
2556 
2557 	if ((error = prlock(pnp, ZNO)) != 0)
2558 		return (error);
2559 
2560 	p = pnp->pr_common->prc_proc;
2561 	as = p->p_as;
2562 
2563 	if ((p->p_flag & SSYS) || as == &kas) {
2564 		prunlock(pnp);
2565 		return (0);
2566 	}
2567 
2568 	if (PROCESS_NOT_32BIT(p)) {
2569 		prunlock(pnp);
2570 		return (EOVERFLOW);
2571 	}
2572 
2573 	mutex_exit(&p->p_lock);
2574 	error = oprpdread32(as, pnp->pr_hatid, uiop);
2575 	mutex_enter(&p->p_lock);
2576 
2577 	prunlock(pnp);
2578 	return (error);
2579 }
2580 
2581 static int
2582 pr_read_watch_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2583 {
2584 	proc_t *p;
2585 	int error;
2586 	prwatch32_t *Bpwp;
2587 	size_t size;
2588 	prwatch32_t *pwp;
2589 	int nwarea;
2590 	struct watched_area *pwarea;
2591 
2592 	ASSERT(pnp->pr_type == PR_WATCH);
2593 
2594 	if ((error = prlock(pnp, ZNO)) != 0)
2595 		return (error);
2596 
2597 	p = pnp->pr_common->prc_proc;
2598 	if (PROCESS_NOT_32BIT(p)) {
2599 		prunlock(pnp);
2600 		return (EOVERFLOW);
2601 	}
2602 	nwarea = avl_numnodes(&p->p_warea);
2603 	size = nwarea * sizeof (prwatch32_t);
2604 	if (uiop->uio_offset >= size) {
2605 		prunlock(pnp);
2606 		return (0);
2607 	}
2608 
2609 	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2610 	mutex_exit(&p->p_lock);
2611 	Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
2612 	mutex_enter(&p->p_lock);
2613 	/* p->p_nwarea can't change while process is locked */
2614 	ASSERT(nwarea == avl_numnodes(&p->p_warea));
2615 
2616 	/* gather the watched areas */
2617 	for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
2618 	    pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
2619 		pwp->pr_vaddr = (caddr32_t)(uintptr_t)pwarea->wa_vaddr;
2620 		pwp->pr_size = (size32_t)(pwarea->wa_eaddr - pwarea->wa_vaddr);
2621 		pwp->pr_wflags = (int)pwarea->wa_flags;
2622 	}
2623 
2624 	prunlock(pnp);
2625 
2626 	error = pr_uioread(Bpwp, size, uiop);
2627 	kmem_free(Bpwp, size);
2628 	return (error);
2629 }
2630 
2631 static int
2632 pr_read_lwpstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2633 {
2634 	lwpstatus32_t *sp;
2635 	proc_t *p;
2636 	int error;
2637 
2638 	ASSERT(pnp->pr_type == PR_LWPSTATUS);
2639 
2640 	/*
2641 	 * We kmem_alloc() the lwpstatus structure because
2642 	 * it is so big it might blow the kernel stack.
2643 	 */
2644 	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
2645 
2646 	if ((error = prlock(pnp, ZNO)) != 0)
2647 		goto out;
2648 
2649 	/*
2650 	 * A 32-bit process cannot get the status of a 64-bit process.
2651 	 * The fields for the 64-bit quantities are not large enough.
2652 	 */
2653 	p = pnp->pr_common->prc_proc;
2654 	if (PROCESS_NOT_32BIT(p)) {
2655 		prunlock(pnp);
2656 		error = EOVERFLOW;
2657 		goto out;
2658 	}
2659 
2660 	if (uiop->uio_offset >= sizeof (*sp)) {
2661 		prunlock(pnp);
2662 		goto out;
2663 	}
2664 
2665 	prgetlwpstatus32(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
2666 	prunlock(pnp);
2667 
2668 	error = pr_uioread(sp, sizeof (*sp), uiop);
2669 out:
2670 	kmem_free(sp, sizeof (*sp));
2671 	return (error);
2672 }
2673 
2674 static int
2675 pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2676 {
2677 	lwpsinfo32_t lwpsinfo;
2678 	proc_t *p;
2679 	kthread_t *t;
2680 	lwpent_t *lep;
2681 
2682 	ASSERT(pnp->pr_type == PR_LWPSINFO);
2683 
2684 	/*
2685 	 * We don't want the full treatment of prlock(pnp) here.
2686 	 * This file is world-readable and never goes invalid.
2687 	 * It doesn't matter if we are in the middle of an exec().
2688 	 */
2689 	p = pr_p_lock(pnp);
2690 	mutex_exit(&pr_pidlock);
2691 	if (p == NULL)
2692 		return (ENOENT);
2693 	ASSERT(p == pnp->pr_common->prc_proc);
2694 	if (pnp->pr_common->prc_tslot == -1) {
2695 		prunlock(pnp);
2696 		return (ENOENT);
2697 	}
2698 
2699 	if (uiop->uio_offset >= sizeof (lwpsinfo)) {
2700 		prunlock(pnp);
2701 		return (0);
2702 	}
2703 
2704 	if ((t = pnp->pr_common->prc_thread) != NULL)
2705 		prgetlwpsinfo32(t, &lwpsinfo);
2706 	else {
2707 		lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
2708 		bzero(&lwpsinfo, sizeof (lwpsinfo));
2709 		lwpsinfo.pr_lwpid = lep->le_lwpid;
2710 		lwpsinfo.pr_state = SZOMB;
2711 		lwpsinfo.pr_sname = 'Z';
2712 		lwpsinfo.pr_start.tv_sec = (time32_t)lep->le_start;
2713 	}
2714 	prunlock(pnp);
2715 
2716 	return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
2717 }
2718 
2719 static int
2720 pr_read_lwpusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2721 {
2722 	prhusage_t *pup;
2723 	prusage32_t *upup;
2724 	proc_t *p;
2725 	int error;
2726 
2727 	ASSERT(pnp->pr_type == PR_LWPUSAGE);
2728 
2729 	/* allocate now, before locking the process */
2730 	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
2731 	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
2732 
2733 	/*
2734 	 * We don't want the full treatment of prlock(pnp) here.
2735 	 * This file is world-readable and never goes invalid.
2736 	 * It doesn't matter if we are in the middle of an exec().
2737 	 */
2738 	p = pr_p_lock(pnp);
2739 	mutex_exit(&pr_pidlock);
2740 	if (p == NULL) {
2741 		error = ENOENT;
2742 		goto out;
2743 	}
2744 	ASSERT(p == pnp->pr_common->prc_proc);
2745 	if (pnp->pr_common->prc_thread == NULL) {
2746 		prunlock(pnp);
2747 		error = ENOENT;
2748 		goto out;
2749 	}
2750 	if (uiop->uio_offset >= sizeof (prusage32_t)) {
2751 		prunlock(pnp);
2752 		error = 0;
2753 		goto out;
2754 	}
2755 
2756 	pup->pr_tstamp = gethrtime();
2757 	prgetusage(pnp->pr_common->prc_thread, pup);
2758 
2759 	prunlock(pnp);
2760 
2761 	prcvtusage32(pup, upup);
2762 
2763 	error = pr_uioread(upup, sizeof (prusage32_t), uiop);
2764 out:
2765 	kmem_free(pup, sizeof (*pup));
2766 	kmem_free(upup, sizeof (*upup));
2767 	return (error);
2768 }
2769 
2770 static int
2771 pr_read_spymaster_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2772 {
2773 	psinfo32_t psinfo;
2774 	int error;
2775 	klwp_t *lwp;
2776 
2777 	ASSERT(pnp->pr_type == PR_SPYMASTER);
2778 
2779 	if ((error = prlock(pnp, ZNO)) != 0)
2780 		return (error);
2781 
2782 	if (pnp->pr_common->prc_thread == NULL) {
2783 		prunlock(pnp);
2784 		return (0);
2785 	}
2786 
2787 	lwp = pnp->pr_common->prc_thread->t_lwp;
2788 
2789 	if (lwp->lwp_spymaster == NULL) {
2790 		prunlock(pnp);
2791 		return (0);
2792 	}
2793 
2794 	psinfo_kto32(lwp->lwp_spymaster, &psinfo);
2795 	prunlock(pnp);
2796 
2797 	return (pr_uioread(&psinfo, sizeof (psinfo), uiop));
2798 }
2799 
2800 #if defined(__sparc)
2801 static int
2802 pr_read_gwindows_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
2803 {
2804 	proc_t *p;
2805 	kthread_t *t;
2806 	gwindows32_t *gwp;
2807 	int error;
2808 	size_t size;
2809 
2810 	ASSERT(pnp->pr_type == PR_GWINDOWS);
2811 
2812 	gwp = kmem_zalloc(sizeof (gwindows32_t), KM_SLEEP);
2813 
2814 	if ((error = prlock(pnp, ZNO)) != 0)
2815 		goto out;
2816 
2817 	p = pnp->pr_common->prc_proc;
2818 	t = pnp->pr_common->prc_thread;
2819 
2820 	if (PROCESS_NOT_32BIT(p)) {
2821 		prunlock(pnp);
2822 		error = EOVERFLOW;
2823 		goto out;
2824 	}
2825 
2826 	/*
2827 	 * Drop p->p_lock while touching the stack.
2828 	 * The P_PR_LOCK flag prevents the lwp from
2829 	 * disappearing while we do this.
2830 	 */
2831 	mutex_exit(&p->p_lock);
2832 	if ((size = prnwindows(ttolwp(t))) != 0)
2833 		size = sizeof (gwindows32_t) -
2834 		    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow32);
2835 	if (uiop->uio_offset >= size) {
2836 		mutex_enter(&p->p_lock);
2837 		prunlock(pnp);
2838 		goto out;
2839 	}
2840 	prgetwindows32(ttolwp(t), gwp);
2841 	mutex_enter(&p->p_lock);
2842 	prunlock(pnp);
2843 
2844 	error = pr_uioread(gwp, size, uiop);
2845 out:
2846 	kmem_free(gwp, sizeof (gwindows32_t));
2847 	return (error);
2848 }
2849 #endif	/* __sparc */
2850 
2851 #endif	/* _SYSCALL32_IMPL */
2852 
2853 /* ARGSUSED */
2854 static int
2855 prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
2856 {
2857 	prnode_t *pnp = VTOP(vp);
2858 
2859 	ASSERT(pnp->pr_type < PR_NFILES);
2860 
2861 #ifdef _SYSCALL32_IMPL
2862 	/*
2863 	 * What is read from the /proc files depends on the data
2864 	 * model of the caller.  An LP64 process will see LP64
2865 	 * data.  An ILP32 process will see ILP32 data.
2866 	 */
2867 	if (curproc->p_model == DATAMODEL_LP64)
2868 		return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
2869 	else
2870 		return (pr_read_function_32[pnp->pr_type](pnp, uiop, cr));
2871 #else
2872 	return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
2873 #endif
2874 }
2875 
2876 /* Note we intentionally don't handle partial writes/updates. */
2877 static int
2878 pr_write_lwpname(prnode_t *pnp, uio_t *uiop)
2879 {
2880 	kthread_t *t = NULL;
2881 	char *lwpname;
2882 	int error;
2883 
2884 	lwpname = kmem_zalloc(THREAD_NAME_MAX, KM_SLEEP);
2885 
2886 	if ((error = uiomove(lwpname, THREAD_NAME_MAX, UIO_WRITE, uiop)) != 0) {
2887 		kmem_free(lwpname, THREAD_NAME_MAX);
2888 		return (error);
2889 	}
2890 
2891 	/* Somebody tried to write too long a thread name... */
2892 	if (lwpname[THREAD_NAME_MAX - 1] != '\0' || uiop->uio_resid > 0) {
2893 		kmem_free(lwpname, THREAD_NAME_MAX);
2894 		return (EIO);
2895 	}
2896 
2897 	VERIFY3U(lwpname[THREAD_NAME_MAX - 1], ==, '\0');
2898 
2899 	for (size_t i = 0; lwpname[i] != '\0'; i++) {
2900 		if (!ISPRINT(lwpname[i])) {
2901 			kmem_free(lwpname, THREAD_NAME_MAX);
2902 			return (EINVAL);
2903 		}
2904 	}
2905 
2906 	/* Equivalent of thread_setname(), but with the ZNO magic. */
2907 	if ((error = prlock(pnp, ZNO)) != 0) {
2908 		kmem_free(lwpname, THREAD_NAME_MAX);
2909 		return (error);
2910 	}
2911 
2912 	t = pnp->pr_common->prc_thread;
2913 	if (t->t_name == NULL) {
2914 		t->t_name = lwpname;
2915 	} else {
2916 		(void) strlcpy(t->t_name, lwpname, THREAD_NAME_MAX);
2917 		kmem_free(lwpname, THREAD_NAME_MAX);
2918 	}
2919 
2920 	prunlock(pnp);
2921 	return (0);
2922 }
2923 
2924 /* ARGSUSED */
2925 static int
2926 prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
2927 {
2928 	prnode_t *pnp = VTOP(vp);
2929 	int old = 0;
2930 	int error;
2931 	ssize_t resid;
2932 
2933 	ASSERT(pnp->pr_type < PR_NFILES);
2934 
2935 	/*
2936 	 * Only a handful of /proc files are writable, enumerate them here.
2937 	 */
2938 	switch (pnp->pr_type) {
2939 	case PR_PIDDIR:		/* directory write()s: visceral revulsion. */
2940 		ASSERT(pnp->pr_pidfile != NULL);
2941 		/* use the underlying PR_PIDFILE to write the process */
2942 		vp = pnp->pr_pidfile;
2943 		pnp = VTOP(vp);
2944 		ASSERT(pnp->pr_type == PR_PIDFILE);
2945 		/* FALLTHROUGH */
2946 	case PR_PIDFILE:
2947 	case PR_LWPIDFILE:
2948 		old = 1;
2949 		/* FALLTHROUGH */
2950 	case PR_AS:
2951 		if ((error = prlock(pnp, ZNO)) == 0) {
2952 			proc_t *p = pnp->pr_common->prc_proc;
2953 			struct as *as = p->p_as;
2954 
2955 			if ((p->p_flag & SSYS) || as == &kas) {
2956 				/*
2957 				 * /proc I/O cannot be done to a system process.
2958 				 */
2959 				error = EIO;
2960 #ifdef _SYSCALL32_IMPL
2961 			} else if (curproc->p_model == DATAMODEL_ILP32 &&
2962 			    PROCESS_NOT_32BIT(p)) {
2963 				error = EOVERFLOW;
2964 #endif
2965 			} else {
2966 				/*
2967 				 * See comments above (pr_read_pidfile)
2968 				 * about this locking dance.
2969 				 */
2970 				mutex_exit(&p->p_lock);
2971 				error = prusrio(p, UIO_WRITE, uiop, old);
2972 				mutex_enter(&p->p_lock);
2973 			}
2974 			prunlock(pnp);
2975 		}
2976 		return (error);
2977 
2978 	case PR_CTL:
2979 	case PR_LWPCTL:
2980 		resid = uiop->uio_resid;
2981 		/*
2982 		 * Perform the action on the control file
2983 		 * by passing curthreads credentials
2984 		 * and not target process's credentials.
2985 		 */
2986 #ifdef _SYSCALL32_IMPL
2987 		if (curproc->p_model == DATAMODEL_ILP32)
2988 			error = prwritectl32(vp, uiop, CRED());
2989 		else
2990 			error = prwritectl(vp, uiop, CRED());
2991 #else
2992 		error = prwritectl(vp, uiop, CRED());
2993 #endif
2994 		/*
2995 		 * This hack makes sure that the EINTR is passed
2996 		 * all the way back to the caller's write() call.
2997 		 */
2998 		if (error == EINTR)
2999 			uiop->uio_resid = resid;
3000 		return (error);
3001 
3002 	case PR_LWPNAME:
3003 		return (pr_write_lwpname(pnp, uiop));
3004 
3005 	default:
3006 		return ((vp->v_type == VDIR)? EISDIR : EBADF);
3007 	}
3008 	/* NOTREACHED */
3009 }
3010 
3011 static int
3012 prgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
3013     caller_context_t *ct)
3014 {
3015 	prnode_t *pnp = VTOP(vp);
3016 	prnodetype_t type = pnp->pr_type;
3017 	prcommon_t *pcp;
3018 	proc_t *p;
3019 	struct as *as;
3020 	int error;
3021 	vnode_t *rvp;
3022 	timestruc_t now;
3023 	extern uint_t nproc;
3024 	int ngroups;
3025 	int nsig;
3026 
3027 	/*
3028 	 * This ugly bit of code allows us to keep both versions of this
3029 	 * function from the same source.
3030 	 */
3031 #ifdef _LP64
3032 	int iam32bit = (curproc->p_model == DATAMODEL_ILP32);
3033 #define	PR_OBJSIZE(obj32, obj64)	\
3034 	(iam32bit ? sizeof (obj32) : sizeof (obj64))
3035 #define	PR_OBJSPAN(obj32, obj64)	\
3036 	(iam32bit ? LSPAN32(obj32) : LSPAN(obj64))
3037 #else
3038 #define	PR_OBJSIZE(obj32, obj64)	\
3039 	(sizeof (obj64))
3040 #define	PR_OBJSPAN(obj32, obj64)	\
3041 	(LSPAN(obj64))
3042 #endif
3043 
3044 	/*
3045 	 * Return all the attributes.  Should be refined
3046 	 * so that it returns only those asked for.
3047 	 * Most of this is complete fakery anyway.
3048 	 */
3049 
3050 	/*
3051 	 * For files in the /proc/<pid>/object directory,
3052 	 * return the attributes of the underlying object.
3053 	 * For files in the /proc/<pid>/fd directory,
3054 	 * return the attributes of the underlying file, but
3055 	 * make it look inaccessible if it is not a regular file.
3056 	 * Make directories look like symlinks.
3057 	 */
3058 	switch (type) {
3059 	case PR_CURDIR:
3060 	case PR_ROOTDIR:
3061 		if (!(flags & ATTR_REAL))
3062 			break;
3063 		/* restrict full knowledge of the attributes to owner or root */
3064 		if ((error = praccess(vp, 0, 0, cr, ct)) != 0)
3065 			return (error);
3066 		/* FALLTHROUGH */
3067 	case PR_OBJECT:
3068 	case PR_FD:
3069 		rvp = pnp->pr_realvp;
3070 		error = VOP_GETATTR(rvp, vap, flags, cr, ct);
3071 		if (error)
3072 			return (error);
3073 		if (type == PR_FD) {
3074 			if (rvp->v_type != VREG && rvp->v_type != VDIR)
3075 				vap->va_mode = 0;
3076 			else
3077 				vap->va_mode &= pnp->pr_mode;
3078 		}
3079 		if (type == PR_OBJECT)
3080 			vap->va_mode &= 07555;
3081 		if (rvp->v_type == VDIR && !(flags & ATTR_REAL)) {
3082 			vap->va_type = VLNK;
3083 			vap->va_size = 0;
3084 			vap->va_nlink = 1;
3085 		}
3086 		return (0);
3087 	default:
3088 		break;
3089 	}
3090 
3091 	bzero(vap, sizeof (*vap));
3092 	/*
3093 	 * Large Files: Internally proc now uses VPROC to indicate
3094 	 * a proc file. Since we have been returning VREG through
3095 	 * VOP_GETATTR() until now, we continue to do this so as
3096 	 * not to break apps depending on this return value.
3097 	 */
3098 	vap->va_type = (vp->v_type == VPROC) ? VREG : vp->v_type;
3099 	vap->va_mode = pnp->pr_mode;
3100 	vap->va_fsid = vp->v_vfsp->vfs_dev;
3101 	vap->va_blksize = DEV_BSIZE;
3102 	vap->va_rdev = 0;
3103 	vap->va_seq = 0;
3104 
3105 	if (type == PR_PROCDIR) {
3106 		vap->va_uid = 0;
3107 		vap->va_gid = 0;
3108 		vap->va_nlink = nproc + 2;
3109 		vap->va_nodeid = (ino64_t)PRROOTINO;
3110 		gethrestime(&now);
3111 		vap->va_atime = vap->va_mtime = vap->va_ctime = now;
3112 		vap->va_size = (v.v_proc + 2) * PRSDSIZE;
3113 		vap->va_nblocks = btod(vap->va_size);
3114 		return (0);
3115 	}
3116 
3117 	/*
3118 	 * /proc/<pid>/self is a symbolic link, and has no prcommon member
3119 	 */
3120 	if (type == PR_SELF) {
3121 		vap->va_uid = crgetruid(CRED());
3122 		vap->va_gid = crgetrgid(CRED());
3123 		vap->va_nodeid = (ino64_t)PR_SELF;
3124 		gethrestime(&now);
3125 		vap->va_atime = vap->va_mtime = vap->va_ctime = now;
3126 		vap->va_nlink = 1;
3127 		vap->va_type = VLNK;
3128 		vap->va_size = 0;
3129 		return (0);
3130 	}
3131 
3132 	/* A subset of prlock(pnp...) */
3133 	p = pr_p_lock(pnp);
3134 	mutex_exit(&pr_pidlock);
3135 	if (p == NULL)
3136 		return (ENOENT);
3137 	pcp = pnp->pr_common;
3138 
3139 	/*
3140 	 * Because we're performing a subset of prlock() inline here, we must
3141 	 * follow prlock's semantics when encountering a zombie process
3142 	 * (PRC_DESTROY flag is set) or an exiting process (SEXITING flag is
3143 	 * set). Those semantics indicate acting as if the process is no
3144 	 * longer there (return ENOENT).
3145 	 *
3146 	 * If we chose to proceed here regardless, we may encounter issues
3147 	 * when we drop the p_lock (see PR_OBJECTDIR, PR_PATHDIR, PR_*MAP,
3148 	 * PR_LDT, and PR_*PAGEDATA below). A process-cleanup which was
3149 	 * blocked on p_lock may ignore the P_PR_LOCK flag we set above, since
3150 	 * it set one of PRC_DESTROY or SEXITING. If the process then gets
3151 	 * destroyed our "p" will be useless, as will its p_lock.
3152 	 *
3153 	 * It may be desirable to move this check to only places further down
3154 	 * prior to actual droppages of p->p_lock, but for now, we're playing
3155 	 * it safe and checking here immediately, like prlock() does..
3156 	 */
3157 	if (((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
3158 		prunlock(pnp);
3159 		return (ENOENT);
3160 	}
3161 
3162 	mutex_enter(&p->p_crlock);
3163 	vap->va_uid = crgetruid(p->p_cred);
3164 	vap->va_gid = crgetrgid(p->p_cred);
3165 	mutex_exit(&p->p_crlock);
3166 
3167 	vap->va_nlink = 1;
3168 	vap->va_nodeid = pnp->pr_ino? pnp->pr_ino :
3169 	    pmkino(pcp->prc_tslot, pcp->prc_slot, pnp->pr_type);
3170 	if ((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot != -1) {
3171 		vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
3172 		    vap->va_ctime.tv_sec =
3173 		    p->p_lwpdir[pcp->prc_tslot].ld_entry->le_start;
3174 		vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
3175 		    vap->va_ctime.tv_nsec = 0;
3176 	} else {
3177 		user_t *up = PTOU(p);
3178 		vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
3179 		    vap->va_ctime.tv_sec = up->u_start.tv_sec;
3180 		vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
3181 		    vap->va_ctime.tv_nsec = up->u_start.tv_nsec;
3182 	}
3183 
3184 	switch (type) {
3185 	case PR_PIDDIR:
3186 		/* va_nlink: count 'lwp', 'object' and 'fd' directory links */
3187 		vap->va_nlink = 5;
3188 		vap->va_size = sizeof (piddir);
3189 		break;
3190 	case PR_OBJECTDIR:
3191 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3192 			vap->va_size = 2 * PRSDSIZE;
3193 		else {
3194 			mutex_exit(&p->p_lock);
3195 			AS_LOCK_ENTER(as, RW_WRITER);
3196 			if (as->a_updatedir)
3197 				rebuild_objdir(as);
3198 			vap->va_size = (as->a_sizedir + 2) * PRSDSIZE;
3199 			AS_LOCK_EXIT(as);
3200 			mutex_enter(&p->p_lock);
3201 		}
3202 		vap->va_nlink = 2;
3203 		break;
3204 	case PR_PATHDIR:
3205 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3206 			vap->va_size = (P_FINFO(p)->fi_nfiles + 4) * PRSDSIZE;
3207 		else {
3208 			mutex_exit(&p->p_lock);
3209 			AS_LOCK_ENTER(as, RW_WRITER);
3210 			if (as->a_updatedir)
3211 				rebuild_objdir(as);
3212 			vap->va_size = (as->a_sizedir + 4 +
3213 			    P_FINFO(p)->fi_nfiles) * PRSDSIZE;
3214 			AS_LOCK_EXIT(as);
3215 			mutex_enter(&p->p_lock);
3216 		}
3217 		vap->va_nlink = 2;
3218 		break;
3219 	case PR_PATH:
3220 	case PR_CURDIR:
3221 	case PR_ROOTDIR:
3222 	case PR_CT:
3223 		vap->va_type = VLNK;
3224 		vap->va_size = 0;
3225 		break;
3226 	case PR_FDDIR:
3227 	case PR_FDINFODIR:
3228 		vap->va_nlink = 2;
3229 		vap->va_size = (P_FINFO(p)->fi_nfiles + 2) * PRSDSIZE;
3230 		break;
3231 	case PR_FDINFO: {
3232 		file_t *fp;
3233 		int fd = pnp->pr_index;
3234 
3235 		fp = pr_getf(p, fd, NULL);
3236 		if (fp == NULL) {
3237 			prunlock(pnp);
3238 			return (ENOENT);
3239 		}
3240 		prunlock(pnp);
3241 		vap->va_size = prgetfdinfosize(p, fp->f_vnode, cr);
3242 		vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
3243 		pr_releasef(fp);
3244 		return (0);
3245 	}
3246 	case PR_LWPDIR:
3247 		/*
3248 		 * va_nlink: count each lwp as a directory link.
3249 		 * va_size: size of p_lwpdir + 2
3250 		 */
3251 		vap->va_nlink = p->p_lwpcnt + p->p_zombcnt + 2;
3252 		vap->va_size = (p->p_lwpdir_sz + 2) * PRSDSIZE;
3253 		break;
3254 	case PR_LWPIDDIR:
3255 		vap->va_nlink = 2;
3256 		vap->va_size = sizeof (lwpiddir);
3257 		break;
3258 	case PR_CTDIR:
3259 		vap->va_nlink = 2;
3260 		vap->va_size = (avl_numnodes(&p->p_ct_held) + 2) * PRSDSIZE;
3261 		break;
3262 	case PR_TMPLDIR:
3263 		vap->va_nlink = 2;
3264 		vap->va_size = (ct_ntypes + 2) * PRSDSIZE;
3265 		break;
3266 	case PR_AS:
3267 	case PR_PIDFILE:
3268 	case PR_LWPIDFILE:
3269 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3270 			vap->va_size = 0;
3271 		else
3272 			vap->va_size = as->a_resvsize;
3273 		break;
3274 	case PR_STATUS:
3275 		vap->va_size = PR_OBJSIZE(pstatus32_t, pstatus_t);
3276 		break;
3277 	case PR_LSTATUS:
3278 		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
3279 		    p->p_lwpcnt * PR_OBJSPAN(lwpstatus32_t, lwpstatus_t);
3280 		break;
3281 	case PR_PSINFO:
3282 		vap->va_size = PR_OBJSIZE(psinfo32_t, psinfo_t);
3283 		break;
3284 	case PR_LPSINFO:
3285 		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
3286 		    (p->p_lwpcnt + p->p_zombcnt) *
3287 		    PR_OBJSPAN(lwpsinfo32_t, lwpsinfo_t);
3288 		break;
3289 	case PR_MAP:
3290 	case PR_RMAP:
3291 	case PR_XMAP:
3292 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3293 			vap->va_size = 0;
3294 		else {
3295 			mutex_exit(&p->p_lock);
3296 			AS_LOCK_ENTER(as, RW_WRITER);
3297 			if (type == PR_MAP)
3298 				vap->va_mtime = as->a_updatetime;
3299 			if (type == PR_XMAP)
3300 				vap->va_size = prnsegs(as, 0) *
3301 				    PR_OBJSIZE(prxmap32_t, prxmap_t);
3302 			else
3303 				vap->va_size = prnsegs(as, type == PR_RMAP) *
3304 				    PR_OBJSIZE(prmap32_t, prmap_t);
3305 			AS_LOCK_EXIT(as);
3306 			mutex_enter(&p->p_lock);
3307 		}
3308 		break;
3309 	case PR_CRED:
3310 		mutex_enter(&p->p_crlock);
3311 		vap->va_size = sizeof (prcred_t);
3312 		ngroups = crgetngroups(p->p_cred);
3313 		if (ngroups > 1)
3314 			vap->va_size += (ngroups - 1) * sizeof (gid_t);
3315 		mutex_exit(&p->p_crlock);
3316 		break;
3317 	case PR_PRIV:
3318 		vap->va_size = prgetprivsize();
3319 		break;
3320 	case PR_SECFLAGS:
3321 		vap->va_size = sizeof (prsecflags_t);
3322 		break;
3323 	case PR_SIGACT:
3324 		nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
3325 		vap->va_size = (nsig-1) *
3326 		    PR_OBJSIZE(struct sigaction32, struct sigaction);
3327 		break;
3328 	case PR_AUXV:
3329 		vap->va_size = __KERN_NAUXV_IMPL * PR_OBJSIZE(auxv32_t, auxv_t);
3330 		break;
3331 #if defined(__x86)
3332 	case PR_LDT:
3333 		mutex_exit(&p->p_lock);
3334 		mutex_enter(&p->p_ldtlock);
3335 		vap->va_size = prnldt(p) * sizeof (struct ssd);
3336 		mutex_exit(&p->p_ldtlock);
3337 		mutex_enter(&p->p_lock);
3338 		break;
3339 #endif
3340 	case PR_USAGE:
3341 		vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
3342 		break;
3343 	case PR_LUSAGE:
3344 		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
3345 		    (p->p_lwpcnt + 1) * PR_OBJSPAN(prusage32_t, prusage_t);
3346 		break;
3347 	case PR_PAGEDATA:
3348 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3349 			vap->va_size = 0;
3350 		else {
3351 			/*
3352 			 * We can drop p->p_lock before grabbing the
3353 			 * address space lock because p->p_as will not
3354 			 * change while the process is marked P_PR_LOCK.
3355 			 */
3356 			mutex_exit(&p->p_lock);
3357 			AS_LOCK_ENTER(as, RW_WRITER);
3358 #ifdef _LP64
3359 			vap->va_size = iam32bit?
3360 			    prpdsize32(as) : prpdsize(as);
3361 #else
3362 			vap->va_size = prpdsize(as);
3363 #endif
3364 			AS_LOCK_EXIT(as);
3365 			mutex_enter(&p->p_lock);
3366 		}
3367 		break;
3368 	case PR_OPAGEDATA:
3369 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3370 			vap->va_size = 0;
3371 		else {
3372 			mutex_exit(&p->p_lock);
3373 			AS_LOCK_ENTER(as, RW_WRITER);
3374 #ifdef _LP64
3375 			vap->va_size = iam32bit?
3376 			    oprpdsize32(as) : oprpdsize(as);
3377 #else
3378 			vap->va_size = oprpdsize(as);
3379 #endif
3380 			AS_LOCK_EXIT(as);
3381 			mutex_enter(&p->p_lock);
3382 		}
3383 		break;
3384 	case PR_WATCH:
3385 		vap->va_size = avl_numnodes(&p->p_warea) *
3386 		    PR_OBJSIZE(prwatch32_t, prwatch_t);
3387 		break;
3388 	case PR_LWPSTATUS:
3389 		vap->va_size = PR_OBJSIZE(lwpstatus32_t, lwpstatus_t);
3390 		break;
3391 	case PR_LWPSINFO:
3392 		vap->va_size = PR_OBJSIZE(lwpsinfo32_t, lwpsinfo_t);
3393 		break;
3394 	case PR_LWPUSAGE:
3395 		vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
3396 		break;
3397 	case PR_XREGS:
3398 		if (prhasx(p))
3399 			vap->va_size = prgetprxregsize(p);
3400 		else
3401 			vap->va_size = 0;
3402 		break;
3403 	case PR_SPYMASTER:
3404 		if (pnp->pr_common->prc_thread != NULL &&
3405 		    pnp->pr_common->prc_thread->t_lwp->lwp_spymaster != NULL) {
3406 			vap->va_size = PR_OBJSIZE(psinfo32_t, psinfo_t);
3407 		} else {
3408 			vap->va_size = 0;
3409 		}
3410 		break;
3411 #if defined(__sparc)
3412 	case PR_GWINDOWS:
3413 	{
3414 		kthread_t *t;
3415 		int n;
3416 
3417 		/*
3418 		 * If there is no lwp then just make the size zero.
3419 		 * This can happen if the lwp exits between the VOP_LOOKUP()
3420 		 * of the /proc/<pid>/lwp/<lwpid>/gwindows file and the
3421 		 * VOP_GETATTR() of the resulting vnode.
3422 		 */
3423 		if ((t = pcp->prc_thread) == NULL) {
3424 			vap->va_size = 0;
3425 			break;
3426 		}
3427 		/*
3428 		 * Drop p->p_lock while touching the stack.
3429 		 * The P_PR_LOCK flag prevents the lwp from
3430 		 * disappearing while we do this.
3431 		 */
3432 		mutex_exit(&p->p_lock);
3433 		if ((n = prnwindows(ttolwp(t))) == 0)
3434 			vap->va_size = 0;
3435 		else
3436 			vap->va_size = PR_OBJSIZE(gwindows32_t, gwindows_t) -
3437 			    (SPARC_MAXREGWINDOW - n) *
3438 			    PR_OBJSIZE(struct rwindow32, struct rwindow);
3439 		mutex_enter(&p->p_lock);
3440 		break;
3441 	}
3442 	case PR_ASRS:
3443 #ifdef _LP64
3444 		if (p->p_model == DATAMODEL_LP64)
3445 			vap->va_size = sizeof (asrset_t);
3446 		else
3447 #endif
3448 			vap->va_size = 0;
3449 		break;
3450 #endif
3451 	case PR_CTL:
3452 	case PR_LWPCTL:
3453 	default:
3454 		vap->va_size = 0;
3455 		break;
3456 	}
3457 
3458 	prunlock(pnp);
3459 	vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
3460 	return (0);
3461 }
3462 
3463 static int
3464 praccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
3465 {
3466 	prnode_t *pnp = VTOP(vp);
3467 	prnodetype_t type = pnp->pr_type;
3468 	int vmode;
3469 	vtype_t vtype;
3470 	proc_t *p;
3471 	int error = 0;
3472 	vnode_t *rvp;
3473 	vnode_t *xvp;
3474 
3475 	if ((mode & VWRITE) && vn_is_readonly(vp))
3476 		return (EROFS);
3477 
3478 	switch (type) {
3479 	case PR_PROCDIR:
3480 		break;
3481 
3482 	case PR_OBJECT:
3483 	case PR_FD:
3484 		/*
3485 		 * Disallow write access to the underlying objects.
3486 		 * Disallow access to underlying non-regular-file fds.
3487 		 * Disallow access to fds with other than existing open modes.
3488 		 */
3489 		rvp = pnp->pr_realvp;
3490 		vtype = rvp->v_type;
3491 		vmode = pnp->pr_mode;
3492 		if ((type == PR_OBJECT && (mode & VWRITE)) ||
3493 		    (type == PR_FD && vtype != VREG && vtype != VDIR) ||
3494 		    (type == PR_FD && (vmode & mode) != mode &&
3495 		    secpolicy_proc_access(cr) != 0))
3496 			return (EACCES);
3497 		return (VOP_ACCESS(rvp, mode, flags, cr, ct));
3498 
3499 	case PR_PSINFO:		/* these files can be read by anyone */
3500 	case PR_LPSINFO:
3501 	case PR_LWPSINFO:
3502 	case PR_LWPDIR:
3503 	case PR_LWPIDDIR:
3504 	case PR_USAGE:
3505 	case PR_LUSAGE:
3506 	case PR_LWPUSAGE:
3507 		p = pr_p_lock(pnp);
3508 		mutex_exit(&pr_pidlock);
3509 		if (p == NULL)
3510 			return (ENOENT);
3511 		prunlock(pnp);
3512 		break;
3513 
3514 	default:
3515 		/*
3516 		 * Except for the world-readable files above,
3517 		 * only /proc/pid exists if the process is a zombie.
3518 		 */
3519 		if ((error = prlock(pnp,
3520 		    (type == PR_PIDDIR)? ZYES : ZNO)) != 0)
3521 			return (error);
3522 		p = pnp->pr_common->prc_proc;
3523 		if (p != curproc)
3524 			error = priv_proc_cred_perm(cr, p, NULL, mode);
3525 
3526 		if (error != 0 || p == curproc || (p->p_flag & SSYS) ||
3527 		    p->p_as == &kas || (xvp = p->p_exec) == NULL) {
3528 			prunlock(pnp);
3529 		} else {
3530 			/*
3531 			 * Determine if the process's executable is readable.
3532 			 * We have to drop p->p_lock before the secpolicy
3533 			 * and VOP operation.
3534 			 */
3535 			VN_HOLD(xvp);
3536 			prunlock(pnp);
3537 			if (secpolicy_proc_access(cr) != 0)
3538 				error = VOP_ACCESS(xvp, VREAD, 0, cr, ct);
3539 			VN_RELE(xvp);
3540 		}
3541 		if (error)
3542 			return (error);
3543 		break;
3544 	}
3545 
3546 	if (type == PR_CURDIR || type == PR_ROOTDIR) {
3547 		/*
3548 		 * Final access check on the underlying directory vnode.
3549 		 */
3550 		return (VOP_ACCESS(pnp->pr_realvp, mode, flags, cr, ct));
3551 	}
3552 
3553 	/*
3554 	 * Visceral revulsion:  For compatibility with old /proc,
3555 	 * allow the /proc/<pid> directory to be opened for writing.
3556 	 */
3557 	vmode = pnp->pr_mode;
3558 	if (type == PR_PIDDIR)
3559 		vmode |= VWRITE;
3560 	if ((vmode & mode) != mode)
3561 		error = secpolicy_proc_access(cr);
3562 	return (error);
3563 }
3564 
3565 /*
3566  * Array of lookup functions, indexed by /proc file type.
3567  */
3568 static vnode_t *pr_lookup_notdir(), *pr_lookup_procdir(), *pr_lookup_piddir(),
3569 	*pr_lookup_objectdir(), *pr_lookup_lwpdir(), *pr_lookup_lwpiddir(),
3570 	*pr_lookup_fddir(), *pr_lookup_fdinfodir(), *pr_lookup_pathdir(),
3571 	*pr_lookup_tmpldir(), *pr_lookup_ctdir();
3572 
3573 static vnode_t *(*pr_lookup_function[PR_NFILES])() = {
3574 	pr_lookup_procdir,	/* /proc				*/
3575 	pr_lookup_notdir,	/* /proc/self				*/
3576 	pr_lookup_piddir,	/* /proc/<pid>				*/
3577 	pr_lookup_notdir,	/* /proc/<pid>/as			*/
3578 	pr_lookup_notdir,	/* /proc/<pid>/ctl			*/
3579 	pr_lookup_notdir,	/* /proc/<pid>/status			*/
3580 	pr_lookup_notdir,	/* /proc/<pid>/lstatus			*/
3581 	pr_lookup_notdir,	/* /proc/<pid>/psinfo			*/
3582 	pr_lookup_notdir,	/* /proc/<pid>/lpsinfo			*/
3583 	pr_lookup_notdir,	/* /proc/<pid>/map			*/
3584 	pr_lookup_notdir,	/* /proc/<pid>/rmap			*/
3585 	pr_lookup_notdir,	/* /proc/<pid>/xmap			*/
3586 	pr_lookup_notdir,	/* /proc/<pid>/cred			*/
3587 	pr_lookup_notdir,	/* /proc/<pid>/sigact			*/
3588 	pr_lookup_notdir,	/* /proc/<pid>/auxv			*/
3589 #if defined(__x86)
3590 	pr_lookup_notdir,	/* /proc/<pid>/ldt			*/
3591 #endif
3592 	pr_lookup_notdir,	/* /proc/<pid>/usage			*/
3593 	pr_lookup_notdir,	/* /proc/<pid>/lusage			*/
3594 	pr_lookup_notdir,	/* /proc/<pid>/pagedata			*/
3595 	pr_lookup_notdir,	/* /proc/<pid>/watch			*/
3596 	pr_lookup_notdir,	/* /proc/<pid>/cwd			*/
3597 	pr_lookup_notdir,	/* /proc/<pid>/root			*/
3598 	pr_lookup_fddir,	/* /proc/<pid>/fd			*/
3599 	pr_lookup_notdir,	/* /proc/<pid>/fd/nn			*/
3600 	pr_lookup_fdinfodir,	/* /proc/<pid>/fdinfo			*/
3601 	pr_lookup_notdir,	/* /proc/<pid>/fdinfo/nn		*/
3602 	pr_lookup_objectdir,	/* /proc/<pid>/object			*/
3603 	pr_lookup_notdir,	/* /proc/<pid>/object/xxx		*/
3604 	pr_lookup_lwpdir,	/* /proc/<pid>/lwp			*/
3605 	pr_lookup_lwpiddir,	/* /proc/<pid>/lwp/<lwpid>		*/
3606 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
3607 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpname	*/
3608 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
3609 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
3610 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
3611 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/xregs	*/
3612 	pr_lookup_tmpldir,	/* /proc/<pid>/lwp/<lwpid>/templates	*/
3613 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
3614 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/spymaster	*/
3615 #if defined(__sparc)
3616 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
3617 	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/asrs		*/
3618 #endif
3619 	pr_lookup_notdir,	/* /proc/<pid>/priv			*/
3620 	pr_lookup_pathdir,	/* /proc/<pid>/path			*/
3621 	pr_lookup_notdir,	/* /proc/<pid>/path/xxx			*/
3622 	pr_lookup_ctdir,	/* /proc/<pid>/contracts		*/
3623 	pr_lookup_notdir,	/* /proc/<pid>/contracts/<ctid>		*/
3624 	pr_lookup_notdir,	/* /proc/<pid>/secflags			*/
3625 	pr_lookup_notdir,	/* old process file			*/
3626 	pr_lookup_notdir,	/* old lwp file				*/
3627 	pr_lookup_notdir,	/* old pagedata file			*/
3628 };
3629 
3630 static int
3631 prlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
3632     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
3633     int *direntflags, pathname_t *realpnp)
3634 {
3635 	prnode_t *pnp = VTOP(dp);
3636 	prnodetype_t type = pnp->pr_type;
3637 	int error;
3638 
3639 	ASSERT(dp->v_type == VDIR);
3640 	ASSERT(type < PR_NFILES);
3641 
3642 	if (type != PR_PROCDIR && strcmp(comp, "..") == 0) {
3643 		VN_HOLD(pnp->pr_parent);
3644 		*vpp = pnp->pr_parent;
3645 		return (0);
3646 	}
3647 
3648 	if (*comp == '\0' ||
3649 	    strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) {
3650 		VN_HOLD(dp);
3651 		*vpp = dp;
3652 		return (0);
3653 	}
3654 
3655 	switch (type) {
3656 	case PR_CURDIR:
3657 	case PR_ROOTDIR:
3658 		/* restrict lookup permission to owner or root */
3659 		if ((error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
3660 			return (error);
3661 		/* FALLTHROUGH */
3662 	case PR_FD:
3663 		/*
3664 		 * Performing a VOP_LOOKUP on the underlying vnode and emitting
3665 		 * the resulting vnode, without encapsulation, as our own is a
3666 		 * very special case when it comes to the assumptions built
3667 		 * into VFS.
3668 		 *
3669 		 * Since the resulting vnode is highly likely to be at some
3670 		 * abitrary position in another filesystem, we insist that the
3671 		 * VTRAVERSE flag is set on the parent.  This prevents things
3672 		 * such as the v_path freshness logic from mistaking the
3673 		 * resulting vnode as a "real" child of the parent, rather than
3674 		 * a consequence of this "procfs wormhole".
3675 		 *
3676 		 * Failure to establish such protections can lead to
3677 		 * incorrectly calculated v_paths being set on nodes reached
3678 		 * through these lookups.
3679 		 */
3680 		ASSERT((dp->v_flag & VTRAVERSE) != 0);
3681 
3682 		dp = pnp->pr_realvp;
3683 		return (VOP_LOOKUP(dp, comp, vpp, pathp, flags, rdir, cr, ct,
3684 		    direntflags, realpnp));
3685 	default:
3686 		break;
3687 	}
3688 
3689 	if ((type == PR_OBJECTDIR || type == PR_FDDIR ||
3690 	    type == PR_FDINFODIR || type == PR_PATHDIR) &&
3691 	    (error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
3692 		return (error);
3693 
3694 	/* XXX - Do we need to pass ct, direntflags, or realpnp? */
3695 	*vpp = (pr_lookup_function[type](dp, comp));
3696 
3697 	return ((*vpp == NULL) ? ENOENT : 0);
3698 }
3699 
3700 /* ARGSUSED */
3701 static int
3702 prcreate(vnode_t *dp, char *comp, vattr_t *vap, vcexcl_t excl,
3703     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
3704     vsecattr_t *vsecp)
3705 {
3706 	int error;
3707 
3708 	if ((error = prlookup(dp, comp, vpp, NULL, 0, NULL, cr,
3709 	    ct, NULL, NULL)) != 0) {
3710 		if (error == ENOENT) {
3711 			/* One can't O_CREAT nonexistent files in /proc. */
3712 			error = EACCES;
3713 		}
3714 		return (error);
3715 	}
3716 
3717 	if (excl == EXCL) {
3718 		/* Disallow the O_EXCL case */
3719 		error = EEXIST;
3720 	} else if ((error = praccess(*vpp, mode, 0, cr, ct)) == 0) {
3721 		/* Before proceeding, handle O_TRUNC if necessary. */
3722 		if (vap->va_mask & AT_SIZE) {
3723 			vnode_t *vp = *vpp;
3724 
3725 			if (vp->v_type == VDIR) {
3726 				/* Only allow O_TRUNC on files */
3727 				error = EISDIR;
3728 			} else if (vp->v_type != VPROC ||
3729 			    VTOP(vp)->pr_type != PR_FD) {
3730 				/*
3731 				 * Disallow for files outside of the
3732 				 * /proc/<pid>/fd/<n> entries
3733 				 */
3734 				error = EACCES;
3735 			} else {
3736 				uint_t mask;
3737 
3738 				vp = VTOP(vp)->pr_realvp;
3739 				mask = vap->va_mask;
3740 				vap->va_mask = AT_SIZE;
3741 				error = VOP_SETATTR(vp, vap, 0, cr, ct);
3742 				vap->va_mask = mask;
3743 			}
3744 		}
3745 	}
3746 
3747 	if (error) {
3748 		VN_RELE(*vpp);
3749 		*vpp = NULL;
3750 	}
3751 	return (error);
3752 }
3753 
3754 /* ARGSUSED */
3755 static vnode_t *
3756 pr_lookup_notdir(vnode_t *dp, char *comp)
3757 {
3758 	return (NULL);
3759 }
3760 
3761 /*
3762  * Find or construct a process vnode for the given pid.
3763  */
3764 static vnode_t *
3765 pr_lookup_procdir(vnode_t *dp, char *comp)
3766 {
3767 	pid_t pid;
3768 	prnode_t *pnp;
3769 	prcommon_t *pcp;
3770 	vnode_t *vp;
3771 	proc_t *p;
3772 	int c;
3773 
3774 	ASSERT(VTOP(dp)->pr_type == PR_PROCDIR);
3775 
3776 	if (strcmp(comp, "self") == 0) {
3777 		pnp = prgetnode(dp, PR_SELF);
3778 		return (PTOV(pnp));
3779 	} else {
3780 		pid = 0;
3781 		while ((c = *comp++) != '\0') {
3782 			if (c < '0' || c > '9')
3783 				return (NULL);
3784 			pid = 10*pid + c - '0';
3785 			if (pid > maxpid)
3786 				return (NULL);
3787 		}
3788 	}
3789 
3790 	pnp = prgetnode(dp, PR_PIDDIR);
3791 
3792 	mutex_enter(&pidlock);
3793 	if ((p = prfind(pid)) == NULL || p->p_stat == SIDL) {
3794 		mutex_exit(&pidlock);
3795 		prfreenode(pnp);
3796 		return (NULL);
3797 	}
3798 	ASSERT(p->p_stat != 0);
3799 
3800 	/* NOTE: we're holding pidlock across the policy call. */
3801 	if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
3802 		mutex_exit(&pidlock);
3803 		prfreenode(pnp);
3804 		return (NULL);
3805 	}
3806 
3807 	mutex_enter(&p->p_lock);
3808 	mutex_exit(&pidlock);
3809 
3810 	/*
3811 	 * If a process vnode already exists and it is not invalid
3812 	 * and it was created by the current process and it belongs
3813 	 * to the same /proc mount point as our parent vnode, then
3814 	 * just use it and discard the newly-allocated prnode.
3815 	 */
3816 	for (vp = p->p_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
3817 		if (!(VTOP(VTOP(vp)->pr_pidfile)->pr_flags & PR_INVAL) &&
3818 		    VTOP(vp)->pr_owner == curproc &&
3819 		    vp->v_vfsp == dp->v_vfsp) {
3820 			ASSERT(!(VTOP(vp)->pr_flags & PR_INVAL));
3821 			VN_HOLD(vp);
3822 			prfreenode(pnp);
3823 			mutex_exit(&p->p_lock);
3824 			return (vp);
3825 		}
3826 	}
3827 	pnp->pr_owner = curproc;
3828 
3829 	/*
3830 	 * prgetnode() initialized most of the prnode.
3831 	 * Finish the job.
3832 	 */
3833 	pcp = pnp->pr_common;	/* the newly-allocated prcommon struct */
3834 	if ((vp = p->p_trace) != NULL) {
3835 		/* discard the new prcommon and use the existing prcommon */
3836 		prfreecommon(pcp);
3837 		pcp = VTOP(vp)->pr_common;
3838 		mutex_enter(&pcp->prc_mutex);
3839 		ASSERT(pcp->prc_refcnt > 0);
3840 		pcp->prc_refcnt++;
3841 		mutex_exit(&pcp->prc_mutex);
3842 		pnp->pr_common = pcp;
3843 	} else {
3844 		/* initialize the new prcommon struct */
3845 		if ((p->p_flag & SSYS) || p->p_as == &kas)
3846 			pcp->prc_flags |= PRC_SYS;
3847 		if (p->p_stat == SZOMB || (p->p_flag & SEXITING) != 0)
3848 			pcp->prc_flags |= PRC_DESTROY;
3849 		pcp->prc_proc = p;
3850 		pcp->prc_datamodel = p->p_model;
3851 		pcp->prc_pid = p->p_pid;
3852 		pcp->prc_slot = p->p_slot;
3853 	}
3854 	pnp->pr_pcommon = pcp;
3855 	pnp->pr_parent = dp;
3856 	VN_HOLD(dp);
3857 	/*
3858 	 * Link in the old, invalid directory vnode so we
3859 	 * can later determine the last close of the file.
3860 	 */
3861 	pnp->pr_next = p->p_trace;
3862 	p->p_trace = dp = PTOV(pnp);
3863 
3864 	/*
3865 	 * Kludge for old /proc: initialize the PR_PIDFILE as well.
3866 	 */
3867 	vp = pnp->pr_pidfile;
3868 	pnp = VTOP(vp);
3869 	pnp->pr_ino = ptoi(pcp->prc_pid);
3870 	pnp->pr_common = pcp;
3871 	pnp->pr_pcommon = pcp;
3872 	pnp->pr_parent = dp;
3873 	pnp->pr_next = p->p_plist;
3874 	p->p_plist = vp;
3875 
3876 	mutex_exit(&p->p_lock);
3877 	return (dp);
3878 }
3879 
3880 static vnode_t *
3881 pr_lookup_piddir(vnode_t *dp, char *comp)
3882 {
3883 	prnode_t *dpnp = VTOP(dp);
3884 	vnode_t *vp;
3885 	prnode_t *pnp;
3886 	proc_t *p;
3887 	user_t *up;
3888 	prdirent_t *dirp;
3889 	int i;
3890 	enum prnodetype type;
3891 
3892 	ASSERT(dpnp->pr_type == PR_PIDDIR);
3893 
3894 	for (i = 0; i < NPIDDIRFILES; i++) {
3895 		/* Skip "." and ".." */
3896 		dirp = &piddir[i+2];
3897 		if (strcmp(comp, dirp->d_name) == 0)
3898 			break;
3899 	}
3900 
3901 	if (i >= NPIDDIRFILES)
3902 		return (NULL);
3903 
3904 	type = (int)dirp->d_ino;
3905 	pnp = prgetnode(dp, type);
3906 
3907 	p = pr_p_lock(dpnp);
3908 	mutex_exit(&pr_pidlock);
3909 	if (p == NULL) {
3910 		prfreenode(pnp);
3911 		return (NULL);
3912 	}
3913 	if (dpnp->pr_pcommon->prc_flags & PRC_DESTROY) {
3914 		switch (type) {
3915 		case PR_PSINFO:
3916 		case PR_USAGE:
3917 			break;
3918 		default:
3919 			prunlock(dpnp);
3920 			prfreenode(pnp);
3921 			return (NULL);
3922 		}
3923 	}
3924 
3925 	switch (type) {
3926 	case PR_CURDIR:
3927 	case PR_ROOTDIR:
3928 		up = PTOU(p);
3929 		vp = (type == PR_CURDIR)? up->u_cdir :
3930 		    (up->u_rdir? up->u_rdir : rootdir);
3931 
3932 		if (vp == NULL) {
3933 			/* can't happen(?) */
3934 			prunlock(dpnp);
3935 			prfreenode(pnp);
3936 			return (NULL);
3937 		}
3938 		/*
3939 		 * Fill in the prnode so future references will
3940 		 * be able to find the underlying object's vnode.
3941 		 */
3942 		VN_HOLD(vp);
3943 		pnp->pr_realvp = vp;
3944 		PTOV(pnp)->v_flag |= VTRAVERSE;
3945 		break;
3946 	default:
3947 		break;
3948 	}
3949 
3950 	mutex_enter(&dpnp->pr_mutex);
3951 
3952 	if ((vp = dpnp->pr_files[i]) != NULL &&
3953 	    !(VTOP(vp)->pr_flags & PR_INVAL)) {
3954 		VN_HOLD(vp);
3955 		mutex_exit(&dpnp->pr_mutex);
3956 		prunlock(dpnp);
3957 		prfreenode(pnp);
3958 		return (vp);
3959 	}
3960 
3961 	/*
3962 	 * prgetnode() initialized most of the prnode.
3963 	 * Finish the job.
3964 	 */
3965 	pnp->pr_common = dpnp->pr_common;
3966 	pnp->pr_pcommon = dpnp->pr_pcommon;
3967 	pnp->pr_parent = dp;
3968 	VN_HOLD(dp);
3969 	pnp->pr_index = i;
3970 
3971 	dpnp->pr_files[i] = vp = PTOV(pnp);
3972 
3973 	/*
3974 	 * Link new vnode into list of all /proc vnodes for the process.
3975 	 */
3976 	if (vp->v_type == VPROC) {
3977 		pnp->pr_next = p->p_plist;
3978 		p->p_plist = vp;
3979 	}
3980 	mutex_exit(&dpnp->pr_mutex);
3981 	prunlock(dpnp);
3982 	return (vp);
3983 }
3984 
3985 static vnode_t *
3986 pr_lookup_objectdir(vnode_t *dp, char *comp)
3987 {
3988 	prnode_t *dpnp = VTOP(dp);
3989 	prnode_t *pnp;
3990 	proc_t *p;
3991 	struct seg *seg;
3992 	struct as *as;
3993 	vnode_t *vp;
3994 	vattr_t vattr;
3995 
3996 	ASSERT(dpnp->pr_type == PR_OBJECTDIR);
3997 
3998 	pnp = prgetnode(dp, PR_OBJECT);
3999 
4000 	if (prlock(dpnp, ZNO) != 0) {
4001 		prfreenode(pnp);
4002 		return (NULL);
4003 	}
4004 	p = dpnp->pr_common->prc_proc;
4005 	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
4006 		prunlock(dpnp);
4007 		prfreenode(pnp);
4008 		return (NULL);
4009 	}
4010 
4011 	/*
4012 	 * We drop p_lock before grabbing the address space lock
4013 	 * in order to avoid a deadlock with the clock thread.
4014 	 * The process will not disappear and its address space
4015 	 * will not change because it is marked P_PR_LOCK.
4016 	 */
4017 	mutex_exit(&p->p_lock);
4018 	AS_LOCK_ENTER(as, RW_READER);
4019 	if ((seg = AS_SEGFIRST(as)) == NULL) {
4020 		vp = NULL;
4021 		goto out;
4022 	}
4023 	if (strcmp(comp, "a.out") == 0) {
4024 		vp = p->p_exec;
4025 		goto out;
4026 	}
4027 	do {
4028 		/*
4029 		 * Manufacture a filename for the "object" directory.
4030 		 */
4031 		vattr.va_mask = AT_FSID|AT_NODEID;
4032 		if (seg->s_ops == &segvn_ops &&
4033 		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4034 		    vp != NULL && vp->v_type == VREG &&
4035 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4036 			char name[64];
4037 
4038 			if (vp == p->p_exec)	/* "a.out" */
4039 				continue;
4040 			pr_object_name(name, vp, &vattr);
4041 			if (strcmp(name, comp) == 0)
4042 				goto out;
4043 		}
4044 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4045 
4046 	vp = NULL;
4047 out:
4048 	if (vp != NULL) {
4049 		VN_HOLD(vp);
4050 	}
4051 	AS_LOCK_EXIT(as);
4052 	mutex_enter(&p->p_lock);
4053 	prunlock(dpnp);
4054 
4055 	if (vp == NULL)
4056 		prfreenode(pnp);
4057 	else {
4058 		/*
4059 		 * Fill in the prnode so future references will
4060 		 * be able to find the underlying object's vnode.
4061 		 * Don't link this prnode into the list of all
4062 		 * prnodes for the process; this is a one-use node.
4063 		 * Its use is entirely to catch and fail opens for writing.
4064 		 */
4065 		pnp->pr_realvp = vp;
4066 		vp = PTOV(pnp);
4067 	}
4068 
4069 	return (vp);
4070 }
4071 
4072 /*
4073  * Find or construct an lwp vnode for the given lwpid.
4074  */
4075 static vnode_t *
4076 pr_lookup_lwpdir(vnode_t *dp, char *comp)
4077 {
4078 	id_t tid;	/* same type as t->t_tid */
4079 	int want_agent;
4080 	prnode_t *dpnp = VTOP(dp);
4081 	prnode_t *pnp;
4082 	prcommon_t *pcp;
4083 	vnode_t *vp;
4084 	proc_t *p;
4085 	kthread_t *t;
4086 	lwpdir_t *ldp;
4087 	lwpent_t *lep;
4088 	int tslot;
4089 	int c;
4090 
4091 	ASSERT(dpnp->pr_type == PR_LWPDIR);
4092 
4093 	tid = 0;
4094 	if (strcmp(comp, "agent") == 0)
4095 		want_agent = 1;
4096 	else {
4097 		want_agent = 0;
4098 		while ((c = *comp++) != '\0') {
4099 			id_t otid;
4100 
4101 			if (c < '0' || c > '9')
4102 				return (NULL);
4103 			otid = tid;
4104 			tid = 10*tid + c - '0';
4105 			if (tid/10 != otid)	/* integer overflow */
4106 				return (NULL);
4107 		}
4108 	}
4109 
4110 	pnp = prgetnode(dp, PR_LWPIDDIR);
4111 
4112 	p = pr_p_lock(dpnp);
4113 	mutex_exit(&pr_pidlock);
4114 	if (p == NULL) {
4115 		prfreenode(pnp);
4116 		return (NULL);
4117 	}
4118 
4119 	if (want_agent) {
4120 		if ((t = p->p_agenttp) == NULL)
4121 			lep = NULL;
4122 		else {
4123 			tid = t->t_tid;
4124 			tslot = t->t_dslot;
4125 			lep = p->p_lwpdir[tslot].ld_entry;
4126 		}
4127 	} else {
4128 		if ((ldp = lwp_hash_lookup(p, tid)) == NULL)
4129 			lep = NULL;
4130 		else {
4131 			tslot = (int)(ldp - p->p_lwpdir);
4132 			lep = ldp->ld_entry;
4133 		}
4134 	}
4135 
4136 	if (lep == NULL) {
4137 		prunlock(dpnp);
4138 		prfreenode(pnp);
4139 		return (NULL);
4140 	}
4141 
4142 	/*
4143 	 * If an lwp vnode already exists and it is not invalid
4144 	 * and it was created by the current process and it belongs
4145 	 * to the same /proc mount point as our parent vnode, then
4146 	 * just use it and discard the newly-allocated prnode.
4147 	 */
4148 	for (vp = lep->le_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
4149 		if (!(VTOP(vp)->pr_flags & PR_INVAL) &&
4150 		    VTOP(vp)->pr_owner == curproc &&
4151 		    vp->v_vfsp == dp->v_vfsp) {
4152 			VN_HOLD(vp);
4153 			prunlock(dpnp);
4154 			prfreenode(pnp);
4155 			return (vp);
4156 		}
4157 	}
4158 	pnp->pr_owner = curproc;
4159 
4160 	/*
4161 	 * prgetnode() initialized most of the prnode.
4162 	 * Finish the job.
4163 	 */
4164 	pcp = pnp->pr_common;	/* the newly-allocated prcommon struct */
4165 	if ((vp = lep->le_trace) != NULL) {
4166 		/* discard the new prcommon and use the existing prcommon */
4167 		prfreecommon(pcp);
4168 		pcp = VTOP(vp)->pr_common;
4169 		mutex_enter(&pcp->prc_mutex);
4170 		ASSERT(pcp->prc_refcnt > 0);
4171 		pcp->prc_refcnt++;
4172 		mutex_exit(&pcp->prc_mutex);
4173 		pnp->pr_common = pcp;
4174 	} else {
4175 		/* initialize the new prcommon struct */
4176 		pcp->prc_flags |= PRC_LWP;
4177 		if ((p->p_flag & SSYS) || p->p_as == &kas)
4178 			pcp->prc_flags |= PRC_SYS;
4179 		if ((t = lep->le_thread) == NULL)
4180 			pcp->prc_flags |= PRC_DESTROY;
4181 		pcp->prc_proc = p;
4182 		pcp->prc_datamodel = dpnp->pr_pcommon->prc_datamodel;
4183 		pcp->prc_pid = p->p_pid;
4184 		pcp->prc_slot = p->p_slot;
4185 		pcp->prc_thread = t;
4186 		pcp->prc_tid = tid;
4187 		pcp->prc_tslot = tslot;
4188 	}
4189 	pnp->pr_pcommon = dpnp->pr_pcommon;
4190 	pnp->pr_parent = dp;
4191 	VN_HOLD(dp);
4192 	/*
4193 	 * Link in the old, invalid directory vnode so we
4194 	 * can later determine the last close of the file.
4195 	 */
4196 	pnp->pr_next = lep->le_trace;
4197 	lep->le_trace = vp = PTOV(pnp);
4198 	prunlock(dpnp);
4199 	return (vp);
4200 }
4201 
4202 static vnode_t *
4203 pr_lookup_lwpiddir(vnode_t *dp, char *comp)
4204 {
4205 	prnode_t *dpnp = VTOP(dp);
4206 	vnode_t *vp;
4207 	prnode_t *pnp;
4208 	proc_t *p;
4209 	prdirent_t *dirp;
4210 	int i;
4211 	enum prnodetype type;
4212 
4213 	ASSERT(dpnp->pr_type == PR_LWPIDDIR);
4214 
4215 	for (i = 0; i < NLWPIDDIRFILES; i++) {
4216 		/* Skip "." and ".." */
4217 		dirp = &lwpiddir[i+2];
4218 		if (strcmp(comp, dirp->d_name) == 0)
4219 			break;
4220 	}
4221 
4222 	if (i >= NLWPIDDIRFILES)
4223 		return (NULL);
4224 
4225 	type = (int)dirp->d_ino;
4226 	pnp = prgetnode(dp, type);
4227 
4228 	p = pr_p_lock(dpnp);
4229 	mutex_exit(&pr_pidlock);
4230 	if (p == NULL) {
4231 		prfreenode(pnp);
4232 		return (NULL);
4233 	}
4234 	if (dpnp->pr_common->prc_flags & PRC_DESTROY) {
4235 		/*
4236 		 * Only the lwpsinfo file is present for zombie lwps.
4237 		 * Nothing is present if the lwp has been reaped.
4238 		 */
4239 		if (dpnp->pr_common->prc_tslot == -1 ||
4240 		    type != PR_LWPSINFO) {
4241 			prunlock(dpnp);
4242 			prfreenode(pnp);
4243 			return (NULL);
4244 		}
4245 	}
4246 
4247 #if defined(__sparc)
4248 	/* the asrs file exists only for sparc v9 _LP64 processes */
4249 	if (type == PR_ASRS && p->p_model != DATAMODEL_LP64) {
4250 		prunlock(dpnp);
4251 		prfreenode(pnp);
4252 		return (NULL);
4253 	}
4254 #endif
4255 
4256 	mutex_enter(&dpnp->pr_mutex);
4257 
4258 	if ((vp = dpnp->pr_files[i]) != NULL &&
4259 	    !(VTOP(vp)->pr_flags & PR_INVAL)) {
4260 		VN_HOLD(vp);
4261 		mutex_exit(&dpnp->pr_mutex);
4262 		prunlock(dpnp);
4263 		prfreenode(pnp);
4264 		return (vp);
4265 	}
4266 
4267 	/*
4268 	 * prgetnode() initialized most of the prnode.
4269 	 * Finish the job.
4270 	 */
4271 	pnp->pr_common = dpnp->pr_common;
4272 	pnp->pr_pcommon = dpnp->pr_pcommon;
4273 	pnp->pr_parent = dp;
4274 	VN_HOLD(dp);
4275 	pnp->pr_index = i;
4276 
4277 	dpnp->pr_files[i] = vp = PTOV(pnp);
4278 
4279 	/*
4280 	 * Link new vnode into list of all /proc vnodes for the process.
4281 	 */
4282 	if (vp->v_type == VPROC) {
4283 		pnp->pr_next = p->p_plist;
4284 		p->p_plist = vp;
4285 	}
4286 	mutex_exit(&dpnp->pr_mutex);
4287 	prunlock(dpnp);
4288 	return (vp);
4289 }
4290 
4291 /*
4292  * Lookup one of the process's file vnodes.
4293  */
4294 static vnode_t *
4295 pr_lookup_fddir(vnode_t *dp, char *comp)
4296 {
4297 	prnode_t *dpnp = VTOP(dp);
4298 	prnode_t *pnp;
4299 	vnode_t *vp = NULL;
4300 	proc_t *p;
4301 	file_t *fp;
4302 	uint_t fd;
4303 	int c;
4304 
4305 	ASSERT(dpnp->pr_type == PR_FDDIR);
4306 
4307 	fd = 0;
4308 	while ((c = *comp++) != '\0') {
4309 		int ofd;
4310 		if (c < '0' || c > '9')
4311 			return (NULL);
4312 		ofd = fd;
4313 		fd = 10 * fd + c - '0';
4314 		if (fd / 10 != ofd)	/* integer overflow */
4315 			return (NULL);
4316 	}
4317 
4318 	pnp = prgetnode(dp, PR_FD);
4319 
4320 	if (prlock(dpnp, ZNO) != 0) {
4321 		prfreenode(pnp);
4322 		return (NULL);
4323 	}
4324 	p = dpnp->pr_common->prc_proc;
4325 	if ((p->p_flag & SSYS) || p->p_as == &kas) {
4326 		prunlock(dpnp);
4327 		prfreenode(pnp);
4328 		return (NULL);
4329 	}
4330 
4331 	if ((fp = pr_getf(p, fd, NULL)) != NULL) {
4332 		pnp->pr_mode = 07111;
4333 		if (fp->f_flag & FREAD)
4334 			pnp->pr_mode |= 0444;
4335 		if (fp->f_flag & FWRITE)
4336 			pnp->pr_mode |= 0222;
4337 		vp = fp->f_vnode;
4338 		VN_HOLD(vp);
4339 	}
4340 
4341 	prunlock(dpnp);
4342 	if (fp != NULL) {
4343 		pr_releasef(fp);
4344 	}
4345 
4346 	if (vp == NULL) {
4347 		prfreenode(pnp);
4348 		return (NULL);
4349 	}
4350 
4351 	/*
4352 	 * Fill in the prnode so future references will
4353 	 * be able to find the underlying object's vnode.
4354 	 * Don't link this prnode into the list of all
4355 	 * prnodes for the process; this is a one-use node.
4356 	 */
4357 	pnp->pr_realvp = vp;
4358 	pnp->pr_parent = dp;		/* needed for prlookup */
4359 	VN_HOLD(dp);
4360 	vp = PTOV(pnp);
4361 	if (pnp->pr_realvp->v_type == VDIR) {
4362 		vp->v_type = VDIR;
4363 		vp->v_flag |= VTRAVERSE;
4364 	}
4365 
4366 	return (vp);
4367 }
4368 
4369 static vnode_t *
4370 pr_lookup_fdinfodir(vnode_t *dp, char *comp)
4371 {
4372 	prnode_t *dpnp = VTOP(dp);
4373 	prnode_t *pnp;
4374 	vnode_t *vp = NULL;
4375 	proc_t *p;
4376 	uint_t fd;
4377 	int c;
4378 
4379 	ASSERT(dpnp->pr_type == PR_FDINFODIR);
4380 
4381 	fd = 0;
4382 	while ((c = *comp++) != '\0') {
4383 		int ofd;
4384 		if (c < '0' || c > '9')
4385 			return (NULL);
4386 		ofd = fd;
4387 		fd = 10 * fd + c - '0';
4388 		if (fd / 10 != ofd)	/* integer overflow */
4389 			return (NULL);
4390 	}
4391 
4392 	pnp = prgetnode(dp, PR_FDINFO);
4393 
4394 	if (prlock(dpnp, ZNO) != 0) {
4395 		prfreenode(pnp);
4396 		return (NULL);
4397 	}
4398 	p = dpnp->pr_common->prc_proc;
4399 	if ((p->p_flag & SSYS) || p->p_as == &kas) {
4400 		prunlock(dpnp);
4401 		prfreenode(pnp);
4402 		return (NULL);
4403 	}
4404 
4405 	/*
4406 	 * Don't link this prnode into the list of all
4407 	 * prnodes for the process; this is a one-use node.
4408 	 * Unlike the FDDIR case, the underlying vnode is not stored in
4409 	 * pnp->pr_realvp. Instead, the fd number is stored in pnp->pr_index
4410 	 * and used by pr_read_fdinfo() to return information for the right
4411 	 * file descriptor.
4412 	 */
4413 	pnp->pr_common = dpnp->pr_common;
4414 	pnp->pr_pcommon = dpnp->pr_pcommon;
4415 	pnp->pr_parent = dp;
4416 	pnp->pr_index = fd;
4417 	VN_HOLD(dp);
4418 	prunlock(dpnp);
4419 	vp = PTOV(pnp);
4420 
4421 	return (vp);
4422 }
4423 
4424 static vnode_t *
4425 pr_lookup_pathdir(vnode_t *dp, char *comp)
4426 {
4427 	prnode_t *dpnp = VTOP(dp);
4428 	prnode_t *pnp;
4429 	vnode_t *vp = NULL;
4430 	proc_t *p;
4431 	uint_t fd, flags = 0;
4432 	int c;
4433 	uf_entry_t *ufp;
4434 	uf_info_t *fip;
4435 	enum { NAME_FD, NAME_OBJECT, NAME_ROOT, NAME_CWD, NAME_UNKNOWN } type;
4436 	char *tmp;
4437 	int idx;
4438 	struct seg *seg;
4439 	struct as *as = NULL;
4440 	vattr_t vattr;
4441 
4442 	ASSERT(dpnp->pr_type == PR_PATHDIR);
4443 
4444 	/*
4445 	 * First, check if this is a numeric entry, in which case we have a
4446 	 * file descriptor.
4447 	 */
4448 	fd = 0;
4449 	type = NAME_FD;
4450 	tmp = comp;
4451 	while ((c = *tmp++) != '\0') {
4452 		int ofd;
4453 		if (c < '0' || c > '9') {
4454 			type = NAME_UNKNOWN;
4455 			break;
4456 		}
4457 		ofd = fd;
4458 		fd = 10*fd + c - '0';
4459 		if (fd/10 != ofd) {	/* integer overflow */
4460 			type = NAME_UNKNOWN;
4461 			break;
4462 		}
4463 	}
4464 
4465 	/*
4466 	 * Next, see if it is one of the special values {root, cwd}.
4467 	 */
4468 	if (type == NAME_UNKNOWN) {
4469 		if (strcmp(comp, "root") == 0)
4470 			type = NAME_ROOT;
4471 		else if (strcmp(comp, "cwd") == 0)
4472 			type = NAME_CWD;
4473 	}
4474 
4475 	/*
4476 	 * Grab the necessary data from the process
4477 	 */
4478 	if (prlock(dpnp, ZNO) != 0)
4479 		return (NULL);
4480 	p = dpnp->pr_common->prc_proc;
4481 
4482 	fip = P_FINFO(p);
4483 
4484 	switch (type) {
4485 	case NAME_ROOT:
4486 		if ((vp = PTOU(p)->u_rdir) == NULL)
4487 			vp = p->p_zone->zone_rootvp;
4488 		VN_HOLD(vp);
4489 		break;
4490 	case NAME_CWD:
4491 		vp = PTOU(p)->u_cdir;
4492 		VN_HOLD(vp);
4493 		break;
4494 	default:
4495 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
4496 			prunlock(dpnp);
4497 			return (NULL);
4498 		}
4499 	}
4500 	mutex_exit(&p->p_lock);
4501 
4502 	/*
4503 	 * Determine if this is an object entry
4504 	 */
4505 	if (type == NAME_UNKNOWN) {
4506 		/*
4507 		 * Start with the inode index immediately after the number of
4508 		 * files.
4509 		 */
4510 		mutex_enter(&fip->fi_lock);
4511 		idx = fip->fi_nfiles + 4;
4512 		mutex_exit(&fip->fi_lock);
4513 
4514 		if (strcmp(comp, "a.out") == 0) {
4515 			if (p->p_execdir != NULL) {
4516 				vp = p->p_execdir;
4517 				VN_HOLD(vp);
4518 				type = NAME_OBJECT;
4519 				flags |= PR_AOUT;
4520 			} else {
4521 				vp = p->p_exec;
4522 				VN_HOLD(vp);
4523 				type = NAME_OBJECT;
4524 			}
4525 		} else {
4526 			AS_LOCK_ENTER(as, RW_READER);
4527 			if ((seg = AS_SEGFIRST(as)) != NULL) {
4528 				do {
4529 					/*
4530 					 * Manufacture a filename for the
4531 					 * "object" directory.
4532 					 */
4533 					vattr.va_mask = AT_FSID|AT_NODEID;
4534 					if (seg->s_ops == &segvn_ops &&
4535 					    SEGOP_GETVP(seg, seg->s_base, &vp)
4536 					    == 0 &&
4537 					    vp != NULL && vp->v_type == VREG &&
4538 					    VOP_GETATTR(vp, &vattr, 0, CRED(),
4539 					    NULL) == 0) {
4540 						char name[64];
4541 
4542 						if (vp == p->p_exec)
4543 							continue;
4544 						idx++;
4545 						pr_object_name(name, vp,
4546 						    &vattr);
4547 						if (strcmp(name, comp) == 0)
4548 							break;
4549 					}
4550 				} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4551 			}
4552 
4553 			if (seg == NULL) {
4554 				vp = NULL;
4555 			} else {
4556 				VN_HOLD(vp);
4557 				type = NAME_OBJECT;
4558 			}
4559 
4560 			AS_LOCK_EXIT(as);
4561 		}
4562 	}
4563 
4564 
4565 	switch (type) {
4566 	case NAME_FD:
4567 		mutex_enter(&fip->fi_lock);
4568 		if (fd < fip->fi_nfiles) {
4569 			UF_ENTER(ufp, fip, fd);
4570 			if (ufp->uf_file != NULL) {
4571 				vp = ufp->uf_file->f_vnode;
4572 				VN_HOLD(vp);
4573 			}
4574 			UF_EXIT(ufp);
4575 		}
4576 		mutex_exit(&fip->fi_lock);
4577 		idx = fd + 4;
4578 		break;
4579 	case NAME_ROOT:
4580 		idx = 2;
4581 		break;
4582 	case NAME_CWD:
4583 		idx = 3;
4584 		break;
4585 	case NAME_OBJECT:
4586 	case NAME_UNKNOWN:
4587 		/* Nothing to do */
4588 		break;
4589 	}
4590 
4591 	mutex_enter(&p->p_lock);
4592 	prunlock(dpnp);
4593 
4594 	if (vp != NULL) {
4595 		pnp = prgetnode(dp, PR_PATH);
4596 
4597 		pnp->pr_flags |= flags;
4598 		pnp->pr_common = dpnp->pr_common;
4599 		pnp->pr_pcommon = dpnp->pr_pcommon;
4600 		pnp->pr_realvp = vp;
4601 		pnp->pr_parent = dp;		/* needed for prlookup */
4602 		pnp->pr_ino = pmkino(idx, dpnp->pr_common->prc_slot, PR_PATH);
4603 		VN_HOLD(dp);
4604 		vp = PTOV(pnp);
4605 		vp->v_type = VLNK;
4606 	}
4607 
4608 	return (vp);
4609 }
4610 
4611 /*
4612  * Look up one of the process's active templates.
4613  */
4614 static vnode_t *
4615 pr_lookup_tmpldir(vnode_t *dp, char *comp)
4616 {
4617 	prnode_t *dpnp = VTOP(dp);
4618 	prnode_t *pnp;
4619 	vnode_t *vp = NULL;
4620 	proc_t *p;
4621 	int i;
4622 
4623 	ASSERT(dpnp->pr_type == PR_TMPLDIR);
4624 
4625 	for (i = 0; i < ct_ntypes; i++)
4626 		if (strcmp(comp, ct_types[i]->ct_type_name) == 0)
4627 			break;
4628 	if (i == ct_ntypes)
4629 		return (NULL);
4630 
4631 	pnp = prgetnode(dp, PR_TMPL);
4632 
4633 	if (prlock(dpnp, ZNO) != 0) {
4634 		prfreenode(pnp);
4635 		return (NULL);
4636 	}
4637 	p = dpnp->pr_common->prc_proc;
4638 	if ((p->p_flag & SSYS) || p->p_as == &kas ||
4639 	    (dpnp->pr_common->prc_flags & (PRC_DESTROY | PRC_LWP)) != PRC_LWP) {
4640 		prunlock(dpnp);
4641 		prfreenode(pnp);
4642 		return (NULL);
4643 	}
4644 	if (ttolwp(dpnp->pr_common->prc_thread)->lwp_ct_active[i] != NULL) {
4645 		pnp->pr_common = dpnp->pr_common;
4646 		pnp->pr_pcommon = dpnp->pr_pcommon;
4647 		pnp->pr_parent = dp;
4648 		pnp->pr_cttype = i;
4649 		VN_HOLD(dp);
4650 		vp = PTOV(pnp);
4651 	} else {
4652 		prfreenode(pnp);
4653 	}
4654 	prunlock(dpnp);
4655 
4656 	return (vp);
4657 }
4658 
4659 /*
4660  * Look up one of the contracts owned by the process.
4661  */
4662 static vnode_t *
4663 pr_lookup_ctdir(vnode_t *dp, char *comp)
4664 {
4665 	prnode_t *dpnp = VTOP(dp);
4666 	prnode_t *pnp;
4667 	vnode_t *vp = NULL;
4668 	proc_t *p;
4669 	id_t id = 0;
4670 	contract_t *ct;
4671 	int c;
4672 
4673 	ASSERT(dpnp->pr_type == PR_CTDIR);
4674 
4675 	while ((c = *comp++) != '\0') {
4676 		id_t oid;
4677 		if (c < '0' || c > '9')
4678 			return (NULL);
4679 		oid = id;
4680 		id = 10 * id + c - '0';
4681 		if (id / 10 != oid)	/* integer overflow */
4682 			return (NULL);
4683 	}
4684 
4685 	/*
4686 	 * Search all contracts; we'll filter below.
4687 	 */
4688 	ct = contract_ptr(id, GLOBAL_ZONEUNIQID);
4689 	if (ct == NULL)
4690 		return (NULL);
4691 
4692 	pnp = prgetnode(dp, PR_CT);
4693 
4694 	if (prlock(dpnp, ZNO) != 0) {
4695 		prfreenode(pnp);
4696 		contract_rele(ct);
4697 		return (NULL);
4698 	}
4699 	p = dpnp->pr_common->prc_proc;
4700 	/*
4701 	 * We only allow lookups of contracts owned by this process, or,
4702 	 * if we are zsched and this is a zone's procfs, contracts on
4703 	 * stuff in the zone which are held by processes or contracts
4704 	 * outside the zone.  (see logic in contract_status_common)
4705 	 */
4706 	if ((ct->ct_owner != p) &&
4707 	    !(p == VTOZONE(dp)->zone_zsched && ct->ct_state < CTS_ORPHAN &&
4708 	    VTOZONE(dp)->zone_uniqid == contract_getzuniqid(ct) &&
4709 	    VTOZONE(dp)->zone_uniqid != GLOBAL_ZONEUNIQID &&
4710 	    ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) {
4711 		prunlock(dpnp);
4712 		prfreenode(pnp);
4713 		contract_rele(ct);
4714 		return (NULL);
4715 	}
4716 	pnp->pr_common = dpnp->pr_common;
4717 	pnp->pr_pcommon = dpnp->pr_pcommon;
4718 	pnp->pr_contract = ct;
4719 	pnp->pr_parent = dp;
4720 	pnp->pr_ino = pmkino(id, pnp->pr_common->prc_slot, PR_CT);
4721 	VN_HOLD(dp);
4722 	prunlock(dpnp);
4723 	vp = PTOV(pnp);
4724 
4725 	return (vp);
4726 }
4727 
4728 /*
4729  * Construct an lwp vnode for the old /proc interface.
4730  * We stand on our head to make the /proc plumbing correct.
4731  */
4732 vnode_t *
4733 prlwpnode(prnode_t *pnp, uint_t tid)
4734 {
4735 	char comp[12];
4736 	vnode_t *dp;
4737 	vnode_t *vp;
4738 	prcommon_t *pcp;
4739 	proc_t *p;
4740 
4741 	/*
4742 	 * Lookup the /proc/<pid>/lwp/<lwpid> directory vnode.
4743 	 */
4744 	if (pnp->pr_type == PR_PIDFILE) {
4745 		dp = pnp->pr_parent;		/* /proc/<pid> */
4746 		VN_HOLD(dp);
4747 		vp = pr_lookup_piddir(dp, "lwp");
4748 		VN_RELE(dp);
4749 		if ((dp = vp) == NULL)		/* /proc/<pid>/lwp */
4750 			return (NULL);
4751 	} else if (pnp->pr_type == PR_LWPIDFILE) {
4752 		dp = pnp->pr_parent;		/* /proc/<pid>/lwp/<lwpid> */
4753 		dp = VTOP(dp)->pr_parent;	/* /proc/<pid>/lwp */
4754 		VN_HOLD(dp);
4755 	} else {
4756 		return (NULL);
4757 	}
4758 
4759 	(void) pr_u32tos(tid, comp, sizeof (comp));
4760 	vp = pr_lookup_lwpdir(dp, comp);
4761 	VN_RELE(dp);
4762 	if ((dp = vp) == NULL)
4763 		return (NULL);
4764 
4765 	pnp = prgetnode(dp, PR_LWPIDFILE);
4766 	vp = PTOV(pnp);
4767 
4768 	/*
4769 	 * prgetnode() initialized most of the prnode.
4770 	 * Finish the job.
4771 	 */
4772 	pcp = VTOP(dp)->pr_common;
4773 	pnp->pr_ino = ptoi(pcp->prc_pid);
4774 	pnp->pr_common = pcp;
4775 	pnp->pr_pcommon = VTOP(dp)->pr_pcommon;
4776 	pnp->pr_parent = dp;
4777 	/*
4778 	 * Link new vnode into list of all /proc vnodes for the process.
4779 	 */
4780 	p = pr_p_lock(pnp);
4781 	mutex_exit(&pr_pidlock);
4782 	if (p == NULL) {
4783 		VN_RELE(dp);
4784 		prfreenode(pnp);
4785 		vp = NULL;
4786 	} else if (pcp->prc_thread == NULL) {
4787 		prunlock(pnp);
4788 		VN_RELE(dp);
4789 		prfreenode(pnp);
4790 		vp = NULL;
4791 	} else {
4792 		pnp->pr_next = p->p_plist;
4793 		p->p_plist = vp;
4794 		prunlock(pnp);
4795 	}
4796 
4797 	return (vp);
4798 }
4799 
4800 #if defined(DEBUG)
4801 
4802 static	uint32_t nprnode;
4803 static	uint32_t nprcommon;
4804 
4805 #define	INCREMENT(x)	atomic_inc_32(&x);
4806 #define	DECREMENT(x)	atomic_dec_32(&x);
4807 
4808 #else
4809 
4810 #define	INCREMENT(x)
4811 #define	DECREMENT(x)
4812 
4813 #endif	/* DEBUG */
4814 
4815 /*
4816  * New /proc vnode required; allocate it and fill in most of the fields.
4817  */
4818 prnode_t *
4819 prgetnode(vnode_t *dp, prnodetype_t type)
4820 {
4821 	prnode_t *pnp;
4822 	prcommon_t *pcp;
4823 	vnode_t *vp;
4824 	ulong_t nfiles;
4825 
4826 	INCREMENT(nprnode);
4827 	pnp = kmem_zalloc(sizeof (prnode_t), KM_SLEEP);
4828 
4829 	mutex_init(&pnp->pr_mutex, NULL, MUTEX_DEFAULT, NULL);
4830 	pnp->pr_type = type;
4831 
4832 	pnp->pr_vnode = vn_alloc(KM_SLEEP);
4833 
4834 	vp = PTOV(pnp);
4835 	vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
4836 	vn_setops(vp, prvnodeops);
4837 	vp->v_vfsp = dp->v_vfsp;
4838 	vp->v_type = VPROC;
4839 	vp->v_data = (caddr_t)pnp;
4840 
4841 	switch (type) {
4842 	case PR_PIDDIR:
4843 	case PR_LWPIDDIR:
4844 		/*
4845 		 * We need a prcommon and a files array for each of these.
4846 		 */
4847 		INCREMENT(nprcommon);
4848 
4849 		pcp = kmem_zalloc(sizeof (prcommon_t), KM_SLEEP);
4850 		pcp->prc_refcnt = 1;
4851 		pnp->pr_common = pcp;
4852 		mutex_init(&pcp->prc_mutex, NULL, MUTEX_DEFAULT, NULL);
4853 		cv_init(&pcp->prc_wait, NULL, CV_DEFAULT, NULL);
4854 
4855 		nfiles = (type == PR_PIDDIR)? NPIDDIRFILES : NLWPIDDIRFILES;
4856 		pnp->pr_files =
4857 		    kmem_zalloc(nfiles * sizeof (vnode_t *), KM_SLEEP);
4858 
4859 		vp->v_type = VDIR;
4860 		/*
4861 		 * Mode should be read-search by all, but we cannot so long
4862 		 * as we must support compatibility mode with old /proc.
4863 		 * Make /proc/<pid> be read by owner only, search by all.
4864 		 * Make /proc/<pid>/lwp/<lwpid> read-search by all.  Also,
4865 		 * set VDIROPEN on /proc/<pid> so it can be opened for writing.
4866 		 */
4867 		if (type == PR_PIDDIR) {
4868 			/* kludge for old /proc interface */
4869 			prnode_t *xpnp = prgetnode(dp, PR_PIDFILE);
4870 			pnp->pr_pidfile = PTOV(xpnp);
4871 			pnp->pr_mode = 0511;
4872 			vp->v_flag |= VDIROPEN;
4873 		} else {
4874 			pnp->pr_mode = 0555;
4875 		}
4876 
4877 		break;
4878 
4879 	case PR_CURDIR:
4880 	case PR_ROOTDIR:
4881 	case PR_FDDIR:
4882 	case PR_FDINFODIR:
4883 	case PR_OBJECTDIR:
4884 	case PR_PATHDIR:
4885 	case PR_CTDIR:
4886 	case PR_TMPLDIR:
4887 		vp->v_type = VDIR;
4888 		pnp->pr_mode = 0500;	/* read-search by owner only */
4889 		break;
4890 
4891 	case PR_CT:
4892 		vp->v_type = VLNK;
4893 		pnp->pr_mode = 0500;	/* read-search by owner only */
4894 		break;
4895 
4896 	case PR_PATH:
4897 	case PR_SELF:
4898 		vp->v_type = VLNK;
4899 		pnp->pr_mode = 0777;
4900 		break;
4901 
4902 	case PR_LWPDIR:
4903 		vp->v_type = VDIR;
4904 		pnp->pr_mode = 0555;	/* read-search by all */
4905 		break;
4906 
4907 	case PR_AS:
4908 	case PR_TMPL:
4909 		pnp->pr_mode = 0600;	/* read-write by owner only */
4910 		break;
4911 
4912 	case PR_CTL:
4913 	case PR_LWPCTL:
4914 		pnp->pr_mode = 0200;	/* write-only by owner only */
4915 		break;
4916 
4917 	case PR_PIDFILE:
4918 	case PR_LWPIDFILE:
4919 		pnp->pr_mode = 0600;	/* read-write by owner only */
4920 		break;
4921 
4922 	case PR_LWPNAME:
4923 		pnp->pr_mode = 0644;	/* readable by all + owner can write */
4924 		break;
4925 
4926 	case PR_PSINFO:
4927 	case PR_LPSINFO:
4928 	case PR_LWPSINFO:
4929 	case PR_USAGE:
4930 	case PR_LUSAGE:
4931 	case PR_LWPUSAGE:
4932 		pnp->pr_mode = 0444;	/* read-only by all */
4933 		break;
4934 
4935 	default:
4936 		pnp->pr_mode = 0400;	/* read-only by owner only */
4937 		break;
4938 	}
4939 	vn_exists(vp);
4940 	return (pnp);
4941 }
4942 
4943 /*
4944  * Free the storage obtained from prgetnode().
4945  */
4946 void
4947 prfreenode(prnode_t *pnp)
4948 {
4949 	vnode_t *vp;
4950 	ulong_t nfiles;
4951 
4952 	vn_invalid(PTOV(pnp));
4953 	vn_free(PTOV(pnp));
4954 	mutex_destroy(&pnp->pr_mutex);
4955 
4956 	switch (pnp->pr_type) {
4957 	case PR_PIDDIR:
4958 		/* kludge for old /proc interface */
4959 		if (pnp->pr_pidfile != NULL) {
4960 			prfreenode(VTOP(pnp->pr_pidfile));
4961 			pnp->pr_pidfile = NULL;
4962 		}
4963 		/* FALLTHROUGH */
4964 	case PR_LWPIDDIR:
4965 		/*
4966 		 * We allocated a prcommon and a files array for each of these.
4967 		 */
4968 		prfreecommon(pnp->pr_common);
4969 		nfiles = (pnp->pr_type == PR_PIDDIR)?
4970 		    NPIDDIRFILES : NLWPIDDIRFILES;
4971 		kmem_free(pnp->pr_files, nfiles * sizeof (vnode_t *));
4972 		break;
4973 	default:
4974 		break;
4975 	}
4976 	/*
4977 	 * If there is an underlying vnode, be sure
4978 	 * to release it after freeing the prnode.
4979 	 */
4980 	vp = pnp->pr_realvp;
4981 	kmem_free(pnp, sizeof (*pnp));
4982 	DECREMENT(nprnode);
4983 	if (vp != NULL) {
4984 		VN_RELE(vp);
4985 	}
4986 }
4987 
4988 /*
4989  * Free a prcommon structure, if the reference count reaches zero.
4990  */
4991 static void
4992 prfreecommon(prcommon_t *pcp)
4993 {
4994 	mutex_enter(&pcp->prc_mutex);
4995 	ASSERT(pcp->prc_refcnt > 0);
4996 	if (--pcp->prc_refcnt != 0)
4997 		mutex_exit(&pcp->prc_mutex);
4998 	else {
4999 		mutex_exit(&pcp->prc_mutex);
5000 
5001 		ASSERT(pcp->prc_refcnt == 0);
5002 		ASSERT(pcp->prc_selfopens == 0 && pcp->prc_writers == 0);
5003 
5004 		pollhead_clean(&pcp->prc_pollhead);
5005 		mutex_destroy(&pcp->prc_mutex);
5006 		cv_destroy(&pcp->prc_wait);
5007 		kmem_free(pcp, sizeof (prcommon_t));
5008 		DECREMENT(nprcommon);
5009 	}
5010 }
5011 
5012 /*
5013  * Array of readdir functions, indexed by /proc file type.
5014  */
5015 static int pr_readdir_notdir(), pr_readdir_procdir(), pr_readdir_piddir(),
5016 	pr_readdir_objectdir(), pr_readdir_lwpdir(), pr_readdir_lwpiddir(),
5017 	pr_readdir_fddir(), pr_readdir_fdinfodir(), pr_readdir_pathdir(),
5018 	pr_readdir_tmpldir(), pr_readdir_ctdir();
5019 
5020 static int (*pr_readdir_function[PR_NFILES])() = {
5021 	pr_readdir_procdir,	/* /proc				*/
5022 	pr_readdir_notdir,	/* /proc/self				*/
5023 	pr_readdir_piddir,	/* /proc/<pid>				*/
5024 	pr_readdir_notdir,	/* /proc/<pid>/as			*/
5025 	pr_readdir_notdir,	/* /proc/<pid>/ctl			*/
5026 	pr_readdir_notdir,	/* /proc/<pid>/status			*/
5027 	pr_readdir_notdir,	/* /proc/<pid>/lstatus			*/
5028 	pr_readdir_notdir,	/* /proc/<pid>/psinfo			*/
5029 	pr_readdir_notdir,	/* /proc/<pid>/lpsinfo			*/
5030 	pr_readdir_notdir,	/* /proc/<pid>/map			*/
5031 	pr_readdir_notdir,	/* /proc/<pid>/rmap			*/
5032 	pr_readdir_notdir,	/* /proc/<pid>/xmap			*/
5033 	pr_readdir_notdir,	/* /proc/<pid>/cred			*/
5034 	pr_readdir_notdir,	/* /proc/<pid>/sigact			*/
5035 	pr_readdir_notdir,	/* /proc/<pid>/auxv			*/
5036 #if defined(__x86)
5037 	pr_readdir_notdir,	/* /proc/<pid>/ldt			*/
5038 #endif
5039 	pr_readdir_notdir,	/* /proc/<pid>/usage			*/
5040 	pr_readdir_notdir,	/* /proc/<pid>/lusage			*/
5041 	pr_readdir_notdir,	/* /proc/<pid>/pagedata			*/
5042 	pr_readdir_notdir,	/* /proc/<pid>/watch			*/
5043 	pr_readdir_notdir,	/* /proc/<pid>/cwd			*/
5044 	pr_readdir_notdir,	/* /proc/<pid>/root			*/
5045 	pr_readdir_fddir,	/* /proc/<pid>/fd			*/
5046 	pr_readdir_notdir,	/* /proc/<pid>/fd/nn			*/
5047 	pr_readdir_fdinfodir,	/* /proc/<pid>/fdinfo			*/
5048 	pr_readdir_notdir,	/* /proc/<pid>/fdinfo/nn		*/
5049 	pr_readdir_objectdir,	/* /proc/<pid>/object			*/
5050 	pr_readdir_notdir,	/* /proc/<pid>/object/xxx		*/
5051 	pr_readdir_lwpdir,	/* /proc/<pid>/lwp			*/
5052 	pr_readdir_lwpiddir,	/* /proc/<pid>/lwp/<lwpid>		*/
5053 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
5054 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpname	*/
5055 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
5056 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
5057 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
5058 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/xregs	*/
5059 	pr_readdir_tmpldir,	/* /proc/<pid>/lwp/<lwpid>/templates	*/
5060 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
5061 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/spymaster	*/
5062 #if defined(__sparc)
5063 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
5064 	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/asrs		*/
5065 #endif
5066 	pr_readdir_notdir,	/* /proc/<pid>/priv			*/
5067 	pr_readdir_pathdir,	/* /proc/<pid>/path			*/
5068 	pr_readdir_notdir,	/* /proc/<pid>/path/xxx			*/
5069 	pr_readdir_ctdir,	/* /proc/<pid>/contracts		*/
5070 	pr_readdir_notdir,	/* /proc/<pid>/contracts/<ctid>		*/
5071 	pr_readdir_notdir,	/* /proc/<pid>/secflags			*/
5072 	pr_readdir_notdir,	/* old process file			*/
5073 	pr_readdir_notdir,	/* old lwp file				*/
5074 	pr_readdir_notdir,	/* old pagedata file			*/
5075 };
5076 
5077 /* ARGSUSED */
5078 static int
5079 prreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
5080     caller_context_t *ct, int flags)
5081 {
5082 	prnode_t *pnp = VTOP(vp);
5083 
5084 	ASSERT(pnp->pr_type < PR_NFILES);
5085 
5086 	/* XXX - Do we need to pass ct and flags? */
5087 	return (pr_readdir_function[pnp->pr_type](pnp, uiop, eofp));
5088 }
5089 
5090 /* ARGSUSED */
5091 static int
5092 pr_readdir_notdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5093 {
5094 	return (ENOTDIR);
5095 }
5096 
5097 /* ARGSUSED */
5098 static int
5099 pr_readdir_procdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5100 {
5101 	zoneid_t zoneid;
5102 	gfs_readdir_state_t gstate;
5103 	int error, eof = 0;
5104 	offset_t n;
5105 
5106 	ASSERT(pnp->pr_type == PR_PROCDIR);
5107 
5108 	zoneid = VTOZONE(PTOV(pnp))->zone_id;
5109 
5110 	if ((error = gfs_readdir_init(&gstate, PNSIZ, PRSDSIZE, uiop,
5111 	    PRROOTINO, PRROOTINO, 0)) != 0)
5112 		return (error);
5113 
5114 	/*
5115 	 * Loop until user's request is satisfied or until all processes
5116 	 * have been examined.
5117 	 */
5118 	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5119 		uint_t pid;
5120 		int pslot;
5121 		proc_t *p;
5122 
5123 		/*
5124 		 * Find next entry.  Skip processes not visible where
5125 		 * this /proc was mounted.
5126 		 */
5127 		mutex_enter(&pidlock);
5128 		while (n < v.v_proc &&
5129 		    ((p = pid_entry(n)) == NULL || p->p_stat == SIDL ||
5130 		    (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) ||
5131 		    secpolicy_basic_procinfo(CRED(), p, curproc) != 0))
5132 			n++;
5133 
5134 		/*
5135 		 * Stop when entire proc table has been examined.
5136 		 */
5137 		if (n >= v.v_proc) {
5138 			mutex_exit(&pidlock);
5139 			eof = 1;
5140 			break;
5141 		}
5142 
5143 		ASSERT(p->p_stat != 0);
5144 		pid = p->p_pid;
5145 		pslot = p->p_slot;
5146 		mutex_exit(&pidlock);
5147 		error = gfs_readdir_emitn(&gstate, uiop, n,
5148 		    pmkino(0, pslot, PR_PIDDIR), pid);
5149 		if (error)
5150 			break;
5151 	}
5152 
5153 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5154 }
5155 
5156 /* ARGSUSED */
5157 static int
5158 pr_readdir_piddir(prnode_t *pnp, uio_t *uiop, int *eofp)
5159 {
5160 	int zombie = ((pnp->pr_pcommon->prc_flags & PRC_DESTROY) != 0);
5161 	prdirent_t dirent;
5162 	prdirent_t *dirp;
5163 	offset_t off;
5164 	int error;
5165 
5166 	ASSERT(pnp->pr_type == PR_PIDDIR);
5167 
5168 	if (uiop->uio_offset < 0 ||
5169 	    uiop->uio_offset % sizeof (prdirent_t) != 0 ||
5170 	    uiop->uio_resid < sizeof (prdirent_t))
5171 		return (EINVAL);
5172 	if (pnp->pr_pcommon->prc_proc == NULL)
5173 		return (ENOENT);
5174 	if (uiop->uio_offset >= sizeof (piddir))
5175 		goto out;
5176 
5177 	/*
5178 	 * Loop until user's request is satisfied, omitting some
5179 	 * files along the way if the process is a zombie.
5180 	 */
5181 	for (dirp = &piddir[uiop->uio_offset / sizeof (prdirent_t)];
5182 	    uiop->uio_resid >= sizeof (prdirent_t) &&
5183 	    dirp < &piddir[NPIDDIRFILES+2];
5184 	    uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
5185 		off = uiop->uio_offset;
5186 		if (zombie) {
5187 			switch (dirp->d_ino) {
5188 			case PR_PIDDIR:
5189 			case PR_PROCDIR:
5190 			case PR_PSINFO:
5191 			case PR_USAGE:
5192 				break;
5193 			default:
5194 				continue;
5195 			}
5196 		}
5197 		bcopy(dirp, &dirent, sizeof (prdirent_t));
5198 		if (dirent.d_ino == PR_PROCDIR)
5199 			dirent.d_ino = PRROOTINO;
5200 		else
5201 			dirent.d_ino = pmkino(0, pnp->pr_pcommon->prc_slot,
5202 			    dirent.d_ino);
5203 		if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
5204 		    UIO_READ, uiop)) != 0)
5205 			return (error);
5206 	}
5207 out:
5208 	if (eofp)
5209 		*eofp = (uiop->uio_offset >= sizeof (piddir));
5210 	return (0);
5211 }
5212 
5213 static void
5214 rebuild_objdir(struct as *as)
5215 {
5216 	struct seg *seg;
5217 	vnode_t *vp;
5218 	vattr_t vattr;
5219 	vnode_t **dir;
5220 	ulong_t nalloc;
5221 	ulong_t nentries;
5222 	int i, j;
5223 	ulong_t nold, nnew;
5224 
5225 	ASSERT(AS_WRITE_HELD(as));
5226 
5227 	if (as->a_updatedir == 0 && as->a_objectdir != NULL)
5228 		return;
5229 	as->a_updatedir = 0;
5230 
5231 	if ((nalloc = avl_numnodes(&as->a_segtree)) == 0 ||
5232 	    (seg = AS_SEGFIRST(as)) == NULL)	/* can't happen? */
5233 		return;
5234 
5235 	/*
5236 	 * Allocate space for the new object directory.
5237 	 * (This is usually about two times too many entries.)
5238 	 */
5239 	nalloc = (nalloc + 0xf) & ~0xf;		/* multiple of 16 */
5240 	dir = kmem_zalloc(nalloc * sizeof (vnode_t *), KM_SLEEP);
5241 
5242 	/* fill in the new directory with desired entries */
5243 	nentries = 0;
5244 	do {
5245 		vattr.va_mask = AT_FSID|AT_NODEID;
5246 		if (seg->s_ops == &segvn_ops &&
5247 		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
5248 		    vp != NULL && vp->v_type == VREG &&
5249 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
5250 			for (i = 0; i < nentries; i++)
5251 				if (vp == dir[i])
5252 					break;
5253 			if (i == nentries) {
5254 				ASSERT(nentries < nalloc);
5255 				dir[nentries++] = vp;
5256 			}
5257 		}
5258 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5259 
5260 	if (as->a_objectdir == NULL) {	/* first time */
5261 		as->a_objectdir = dir;
5262 		as->a_sizedir = nalloc;
5263 		return;
5264 	}
5265 
5266 	/*
5267 	 * Null out all of the defunct entries in the old directory.
5268 	 */
5269 	nold = 0;
5270 	nnew = nentries;
5271 	for (i = 0; i < as->a_sizedir; i++) {
5272 		if ((vp = as->a_objectdir[i]) != NULL) {
5273 			for (j = 0; j < nentries; j++) {
5274 				if (vp == dir[j]) {
5275 					dir[j] = NULL;
5276 					nnew--;
5277 					break;
5278 				}
5279 			}
5280 			if (j == nentries)
5281 				as->a_objectdir[i] = NULL;
5282 			else
5283 				nold++;
5284 		}
5285 	}
5286 
5287 	if (nold + nnew > as->a_sizedir) {
5288 		/*
5289 		 * Reallocate the old directory to have enough
5290 		 * space for the old and new entries combined.
5291 		 * Round up to the next multiple of 16.
5292 		 */
5293 		ulong_t newsize = (nold + nnew + 0xf) & ~0xf;
5294 		vnode_t **newdir = kmem_zalloc(newsize * sizeof (vnode_t *),
5295 		    KM_SLEEP);
5296 		bcopy(as->a_objectdir, newdir,
5297 		    as->a_sizedir * sizeof (vnode_t *));
5298 		kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
5299 		as->a_objectdir = newdir;
5300 		as->a_sizedir = newsize;
5301 	}
5302 
5303 	/*
5304 	 * Move all new entries to the old directory and
5305 	 * deallocate the space used by the new directory.
5306 	 */
5307 	if (nnew) {
5308 		for (i = 0, j = 0; i < nentries; i++) {
5309 			if ((vp = dir[i]) == NULL)
5310 				continue;
5311 			for (; j < as->a_sizedir; j++) {
5312 				if (as->a_objectdir[j] != NULL)
5313 					continue;
5314 				as->a_objectdir[j++] = vp;
5315 				break;
5316 			}
5317 		}
5318 	}
5319 	kmem_free(dir, nalloc * sizeof (vnode_t *));
5320 }
5321 
5322 /*
5323  * Return the vnode from a slot in the process's object directory.
5324  * The caller must have locked the process's address space.
5325  * The only caller is below, in pr_readdir_objectdir().
5326  */
5327 static vnode_t *
5328 obj_entry(struct as *as, int slot)
5329 {
5330 	ASSERT(AS_LOCK_HELD(as));
5331 	if (as->a_objectdir == NULL)
5332 		return (NULL);
5333 	ASSERT(slot < as->a_sizedir);
5334 	return (as->a_objectdir[slot]);
5335 }
5336 
5337 /* ARGSUSED */
5338 static int
5339 pr_readdir_objectdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5340 {
5341 	gfs_readdir_state_t gstate;
5342 	int error, eof = 0;
5343 	offset_t n;
5344 	int pslot;
5345 	size_t objdirsize;
5346 	proc_t *p;
5347 	struct as *as;
5348 	vnode_t *vp;
5349 
5350 	ASSERT(pnp->pr_type == PR_OBJECTDIR);
5351 
5352 	if ((error = prlock(pnp, ZNO)) != 0)
5353 		return (error);
5354 	p = pnp->pr_common->prc_proc;
5355 	pslot = p->p_slot;
5356 
5357 	/*
5358 	 * We drop p_lock before grabbing the address space lock
5359 	 * in order to avoid a deadlock with the clock thread.
5360 	 * The process will not disappear and its address space
5361 	 * will not change because it is marked P_PR_LOCK.
5362 	 */
5363 	mutex_exit(&p->p_lock);
5364 
5365 	if ((error = gfs_readdir_init(&gstate, 64, PRSDSIZE, uiop,
5366 	    pmkino(0, pslot, PR_PIDDIR),
5367 	    pmkino(0, pslot, PR_OBJECTDIR), 0)) != 0) {
5368 		mutex_enter(&p->p_lock);
5369 		prunlock(pnp);
5370 		return (error);
5371 	}
5372 
5373 	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
5374 		as = NULL;
5375 		objdirsize = 0;
5376 	}
5377 
5378 	/*
5379 	 * Loop until user's request is satisfied or until
5380 	 * all mapped objects have been examined. Cannot hold
5381 	 * the address space lock for the following call as
5382 	 * gfs_readdir_pred() utimately causes a call to uiomove().
5383 	 */
5384 	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5385 		vattr_t vattr;
5386 		char str[64];
5387 
5388 		/*
5389 		 * Set the correct size of the directory just
5390 		 * in case the process has changed it's address
5391 		 * space via mmap/munmap calls.
5392 		 */
5393 		if (as != NULL) {
5394 			AS_LOCK_ENTER(as, RW_WRITER);
5395 			if (as->a_updatedir)
5396 				rebuild_objdir(as);
5397 			objdirsize = as->a_sizedir;
5398 		}
5399 
5400 		/*
5401 		 * Find next object.
5402 		 */
5403 		vattr.va_mask = AT_FSID | AT_NODEID;
5404 		while (n < objdirsize && (((vp = obj_entry(as, n)) == NULL) ||
5405 		    (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)
5406 		    != 0))) {
5407 			vattr.va_mask = AT_FSID | AT_NODEID;
5408 			n++;
5409 		}
5410 
5411 		if (as != NULL)
5412 			AS_LOCK_EXIT(as);
5413 
5414 		/*
5415 		 * Stop when all objects have been reported.
5416 		 */
5417 		if (n >= objdirsize) {
5418 			eof = 1;
5419 			break;
5420 		}
5421 
5422 		if (vp == p->p_exec)
5423 			(void) strcpy(str, "a.out");
5424 		else
5425 			pr_object_name(str, vp, &vattr);
5426 
5427 		error = gfs_readdir_emit(&gstate, uiop, n, vattr.va_nodeid,
5428 		    str, 0);
5429 
5430 		if (error)
5431 			break;
5432 	}
5433 
5434 	mutex_enter(&p->p_lock);
5435 	prunlock(pnp);
5436 
5437 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5438 }
5439 
5440 /* ARGSUSED */
5441 static int
5442 pr_readdir_lwpdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5443 {
5444 	gfs_readdir_state_t gstate;
5445 	int error, eof = 0;
5446 	offset_t tslot;
5447 	proc_t *p;
5448 	int pslot;
5449 	lwpdir_t *lwpdir;
5450 	int lwpdirsize;
5451 
5452 	ASSERT(pnp->pr_type == PR_LWPDIR);
5453 
5454 	p = pr_p_lock(pnp);
5455 	mutex_exit(&pr_pidlock);
5456 	if (p == NULL)
5457 		return (ENOENT);
5458 	ASSERT(p == pnp->pr_common->prc_proc);
5459 	pslot = p->p_slot;
5460 	lwpdir = p->p_lwpdir;
5461 	lwpdirsize = p->p_lwpdir_sz;
5462 
5463 	/*
5464 	 * Drop p->p_lock so we can safely do uiomove().
5465 	 * The lwp directory will not change because
5466 	 * we have the process locked with P_PR_LOCK.
5467 	 */
5468 	mutex_exit(&p->p_lock);
5469 
5470 
5471 	if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
5472 	    pmkino(0, pslot, PR_PIDDIR),
5473 	    pmkino(0, pslot, PR_LWPDIR), 0)) != 0) {
5474 		mutex_enter(&p->p_lock);
5475 		prunlock(pnp);
5476 		return (error);
5477 	}
5478 
5479 	/*
5480 	 * Loop until user's request is satisfied or until all lwps
5481 	 * have been examined.
5482 	 */
5483 	while ((error = gfs_readdir_pred(&gstate, uiop, &tslot)) == 0) {
5484 		lwpent_t *lep;
5485 		uint_t tid;
5486 
5487 		/*
5488 		 * Find next LWP.
5489 		 */
5490 		while (tslot < lwpdirsize &&
5491 		    ((lep = lwpdir[tslot].ld_entry) == NULL))
5492 			tslot++;
5493 		/*
5494 		 * Stop when all lwps have been reported.
5495 		 */
5496 		if (tslot >= lwpdirsize) {
5497 			eof = 1;
5498 			break;
5499 		}
5500 
5501 		tid = lep->le_lwpid;
5502 		error = gfs_readdir_emitn(&gstate, uiop, tslot,
5503 		    pmkino(tslot, pslot, PR_LWPIDDIR), tid);
5504 		if (error)
5505 			break;
5506 	}
5507 
5508 	mutex_enter(&p->p_lock);
5509 	prunlock(pnp);
5510 
5511 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5512 }
5513 
5514 /* ARGSUSED */
5515 static int
5516 pr_readdir_lwpiddir(prnode_t *pnp, uio_t *uiop, int *eofp)
5517 {
5518 	prcommon_t *pcp = pnp->pr_common;
5519 	int zombie = ((pcp->prc_flags & PRC_DESTROY) != 0);
5520 	prdirent_t dirent;
5521 	prdirent_t *dirp;
5522 	offset_t off;
5523 	int error;
5524 	int pslot;
5525 	int tslot;
5526 
5527 	ASSERT(pnp->pr_type == PR_LWPIDDIR);
5528 
5529 	if (uiop->uio_offset < 0 ||
5530 	    uiop->uio_offset % sizeof (prdirent_t) != 0 ||
5531 	    uiop->uio_resid < sizeof (prdirent_t))
5532 		return (EINVAL);
5533 	if (pcp->prc_proc == NULL || pcp->prc_tslot == -1)
5534 		return (ENOENT);
5535 	if (uiop->uio_offset >= sizeof (lwpiddir))
5536 		goto out;
5537 
5538 	/*
5539 	 * Loop until user's request is satisfied, omitting some files
5540 	 * along the way if the lwp is a zombie and also depending
5541 	 * on the data model of the process.
5542 	 */
5543 	pslot = pcp->prc_slot;
5544 	tslot = pcp->prc_tslot;
5545 	for (dirp = &lwpiddir[uiop->uio_offset / sizeof (prdirent_t)];
5546 	    uiop->uio_resid >= sizeof (prdirent_t) &&
5547 	    dirp < &lwpiddir[NLWPIDDIRFILES+2];
5548 	    uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
5549 		off = uiop->uio_offset;
5550 		if (zombie) {
5551 			switch (dirp->d_ino) {
5552 			case PR_LWPIDDIR:
5553 			case PR_LWPDIR:
5554 			case PR_LWPSINFO:
5555 				break;
5556 			default:
5557 				continue;
5558 			}
5559 		}
5560 #if defined(__sparc)
5561 		/* the asrs file exists only for sparc v9 _LP64 processes */
5562 		if (dirp->d_ino == PR_ASRS &&
5563 		    pcp->prc_datamodel != DATAMODEL_LP64)
5564 			continue;
5565 #endif
5566 		bcopy(dirp, &dirent, sizeof (prdirent_t));
5567 		if (dirent.d_ino == PR_LWPDIR)
5568 			dirent.d_ino = pmkino(0, pslot, dirp->d_ino);
5569 		else
5570 			dirent.d_ino = pmkino(tslot, pslot, dirp->d_ino);
5571 		if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
5572 		    UIO_READ, uiop)) != 0)
5573 			return (error);
5574 	}
5575 out:
5576 	if (eofp)
5577 		*eofp = (uiop->uio_offset >= sizeof (lwpiddir));
5578 	return (0);
5579 }
5580 
5581 /*
5582  * Helper function for reading a directory which lists open file desciptors
5583  */
5584 static int
5585 pr_readdir_fdlist(prnode_t *pnp, uio_t *uiop, int *eofp,
5586     prnodetype_t dirtype, prnodetype_t entrytype)
5587 {
5588 	gfs_readdir_state_t gstate;
5589 	int error, eof = 0;
5590 	offset_t n;
5591 	proc_t *p;
5592 	int pslot;
5593 	int fddirsize;
5594 	uf_info_t *fip;
5595 
5596 	if ((error = prlock(pnp, ZNO)) != 0)
5597 		return (error);
5598 	p = pnp->pr_common->prc_proc;
5599 	pslot = p->p_slot;
5600 	fip = P_FINFO(p);
5601 	mutex_exit(&p->p_lock);
5602 
5603 	if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
5604 	    pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, dirtype), 0)) != 0) {
5605 		mutex_enter(&p->p_lock);
5606 		prunlock(pnp);
5607 		return (error);
5608 	}
5609 
5610 	mutex_enter(&fip->fi_lock);
5611 	if ((p->p_flag & SSYS) || p->p_as == &kas)
5612 		fddirsize = 0;
5613 	else
5614 		fddirsize = fip->fi_nfiles;
5615 
5616 	/*
5617 	 * Loop until user's request is satisfied or until
5618 	 * all file descriptors have been examined.
5619 	 */
5620 	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5621 		/*
5622 		 * Find next fd.
5623 		 */
5624 		while (n < fddirsize && fip->fi_list[n].uf_file == NULL)
5625 			n++;
5626 		/*
5627 		 * Stop when all fds have been reported.
5628 		 */
5629 		if (n >= fddirsize) {
5630 			eof = 1;
5631 			break;
5632 		}
5633 
5634 		error = gfs_readdir_emitn(&gstate, uiop, n,
5635 		    pmkino(n, pslot, entrytype), n);
5636 		if (error)
5637 			break;
5638 	}
5639 
5640 	mutex_exit(&fip->fi_lock);
5641 	mutex_enter(&p->p_lock);
5642 	prunlock(pnp);
5643 
5644 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5645 }
5646 
5647 static int
5648 pr_readdir_fddir(prnode_t *pnp, uio_t *uiop, int *eofp)
5649 {
5650 
5651 	ASSERT(pnp->pr_type == PR_FDDIR);
5652 
5653 	return (pr_readdir_fdlist(pnp, uiop, eofp, pnp->pr_type, PR_FD));
5654 }
5655 
5656 static int
5657 pr_readdir_fdinfodir(prnode_t *pnp, uio_t *uiop, int *eofp)
5658 {
5659 
5660 	ASSERT(pnp->pr_type == PR_FDINFODIR);
5661 
5662 	return (pr_readdir_fdlist(pnp, uiop, eofp, pnp->pr_type, PR_FDINFO));
5663 }
5664 
5665 /* ARGSUSED */
5666 static int
5667 pr_readdir_pathdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5668 {
5669 	longlong_t bp[DIRENT64_RECLEN(64) / sizeof (longlong_t)];
5670 	dirent64_t *dirent = (dirent64_t *)bp;
5671 	int reclen;
5672 	ssize_t oresid;
5673 	offset_t off, idx;
5674 	int error = 0;
5675 	proc_t *p;
5676 	int fd, obj;
5677 	int pslot;
5678 	int fddirsize;
5679 	uf_info_t *fip;
5680 	struct as *as = NULL;
5681 	size_t objdirsize;
5682 	vattr_t vattr;
5683 	vnode_t *vp;
5684 
5685 	ASSERT(pnp->pr_type == PR_PATHDIR);
5686 
5687 	if (uiop->uio_offset < 0 ||
5688 	    uiop->uio_resid <= 0 ||
5689 	    (uiop->uio_offset % PRSDSIZE) != 0)
5690 		return (EINVAL);
5691 	oresid = uiop->uio_resid;
5692 	bzero(bp, sizeof (bp));
5693 
5694 	if ((error = prlock(pnp, ZNO)) != 0)
5695 		return (error);
5696 	p = pnp->pr_common->prc_proc;
5697 	fip = P_FINFO(p);
5698 	pslot = p->p_slot;
5699 	mutex_exit(&p->p_lock);
5700 
5701 	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
5702 		as = NULL;
5703 		objdirsize = 0;
5704 	} else {
5705 		AS_LOCK_ENTER(as, RW_WRITER);
5706 		if (as->a_updatedir)
5707 			rebuild_objdir(as);
5708 		objdirsize = as->a_sizedir;
5709 		AS_LOCK_EXIT(as);
5710 		as = NULL;
5711 	}
5712 
5713 	mutex_enter(&fip->fi_lock);
5714 	if ((p->p_flag & SSYS) || p->p_as == &kas)
5715 		fddirsize = 0;
5716 	else
5717 		fddirsize = fip->fi_nfiles;
5718 
5719 	for (; uiop->uio_resid > 0; uiop->uio_offset = off + PRSDSIZE) {
5720 		/*
5721 		 * There are 4 special files in the path directory: ".", "..",
5722 		 * "root", and "cwd".  We handle those specially here.
5723 		 */
5724 		off = uiop->uio_offset;
5725 		idx = off / PRSDSIZE;
5726 		if (off == 0) {				/* "." */
5727 			dirent->d_ino = pmkino(0, pslot, PR_PATHDIR);
5728 			dirent->d_name[0] = '.';
5729 			dirent->d_name[1] = '\0';
5730 			reclen = DIRENT64_RECLEN(1);
5731 		} else if (idx == 1) {			/* ".." */
5732 			dirent->d_ino = pmkino(0, pslot, PR_PIDDIR);
5733 			dirent->d_name[0] = '.';
5734 			dirent->d_name[1] = '.';
5735 			dirent->d_name[2] = '\0';
5736 			reclen = DIRENT64_RECLEN(2);
5737 		} else if (idx == 2) {			/* "root" */
5738 			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5739 			(void) strcpy(dirent->d_name, "root");
5740 			reclen = DIRENT64_RECLEN(4);
5741 		} else if (idx == 3) {			/* "cwd" */
5742 			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5743 			(void) strcpy(dirent->d_name, "cwd");
5744 			reclen = DIRENT64_RECLEN(3);
5745 		} else if (idx < 4 + fddirsize) {
5746 			/*
5747 			 * In this case, we have one of the file descriptors.
5748 			 */
5749 			fd = idx - 4;
5750 			if (fip->fi_list[fd].uf_file == NULL)
5751 				continue;
5752 			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5753 			(void) pr_u32tos(fd, dirent->d_name, PLNSIZ+1);
5754 			reclen = DIRENT64_RECLEN(PLNSIZ);
5755 		} else if (idx < 4 + fddirsize + objdirsize) {
5756 			if (fip != NULL) {
5757 				mutex_exit(&fip->fi_lock);
5758 				fip = NULL;
5759 			}
5760 
5761 			/*
5762 			 * We drop p_lock before grabbing the address space lock
5763 			 * in order to avoid a deadlock with the clock thread.
5764 			 * The process will not disappear and its address space
5765 			 * will not change because it is marked P_PR_LOCK.
5766 			 */
5767 			if (as == NULL) {
5768 				as = p->p_as;
5769 				AS_LOCK_ENTER(as, RW_WRITER);
5770 			}
5771 
5772 			if (as->a_updatedir) {
5773 				rebuild_objdir(as);
5774 				objdirsize = as->a_sizedir;
5775 			}
5776 
5777 			obj = idx - 4 - fddirsize;
5778 			if ((vp = obj_entry(as, obj)) == NULL)
5779 				continue;
5780 			vattr.va_mask = AT_FSID|AT_NODEID;
5781 			if (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) != 0)
5782 				continue;
5783 			if (vp == p->p_exec)
5784 				(void) strcpy(dirent->d_name, "a.out");
5785 			else
5786 				pr_object_name(dirent->d_name, vp, &vattr);
5787 			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5788 			reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
5789 		} else {
5790 			break;
5791 		}
5792 
5793 		dirent->d_off = uiop->uio_offset + PRSDSIZE;
5794 		dirent->d_reclen = (ushort_t)reclen;
5795 		if (reclen > uiop->uio_resid) {
5796 			/*
5797 			 * Error if no entries have been returned yet.
5798 			 */
5799 			if (uiop->uio_resid == oresid)
5800 				error = EINVAL;
5801 			break;
5802 		}
5803 		/*
5804 		 * Drop the address space lock to do the uiomove().
5805 		 */
5806 		if (as != NULL)
5807 			AS_LOCK_EXIT(as);
5808 
5809 		error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop);
5810 		if (as != NULL)
5811 			AS_LOCK_ENTER(as, RW_WRITER);
5812 
5813 		if (error)
5814 			break;
5815 	}
5816 
5817 	if (error == 0 && eofp)
5818 		*eofp = (uiop->uio_offset >= (fddirsize + 2) * PRSDSIZE);
5819 
5820 	if (fip != NULL)
5821 		mutex_exit(&fip->fi_lock);
5822 	if (as != NULL)
5823 		AS_LOCK_EXIT(as);
5824 	mutex_enter(&p->p_lock);
5825 	prunlock(pnp);
5826 	return (error);
5827 }
5828 
5829 static int
5830 pr_readdir_tmpldir(prnode_t *pnp, uio_t *uiop, int *eofp)
5831 {
5832 	proc_t *p;
5833 	int pslot, tslot;
5834 	gfs_readdir_state_t gstate;
5835 	int error, eof = 0;
5836 	offset_t n;
5837 
5838 	ASSERT(pnp->pr_type == PR_TMPLDIR);
5839 
5840 	if ((error = prlock(pnp, ZNO)) != 0)
5841 		return (error);
5842 	p = pnp->pr_common->prc_proc;
5843 	pslot = pnp->pr_common->prc_slot;
5844 	tslot = pnp->pr_common->prc_tslot;
5845 	mutex_exit(&p->p_lock);
5846 
5847 	if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
5848 	    pmkino(tslot, pslot, PR_LWPDIR),
5849 	    pmkino(tslot, pslot, PR_TMPLDIR), 0)) != 0) {
5850 		mutex_enter(&p->p_lock);
5851 		prunlock(pnp);
5852 		return (error);
5853 	}
5854 
5855 	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5856 		/*
5857 		 * Check for an active template.  Reading a directory's
5858 		 * contents is already racy, so we don't bother taking
5859 		 * any locks.
5860 		 */
5861 		while (n < ct_ntypes &&
5862 		    pnp->pr_common->prc_thread->t_lwp->lwp_ct_active[n] == NULL)
5863 			n++;
5864 		/*
5865 		 * Stop when all types have been reported.
5866 		 */
5867 		if (n >= ct_ntypes) {
5868 			eof = 1;
5869 			break;
5870 		}
5871 		/*
5872 		 * The pmkino invocation below will need to be updated
5873 		 * when we create our fifth contract type.
5874 		 */
5875 		ASSERT(ct_ntypes <= 4);
5876 		error = gfs_readdir_emit(&gstate, uiop, n,
5877 		    pmkino((tslot << 2) | n, pslot, PR_TMPL),
5878 		    ct_types[n]->ct_type_name, 0);
5879 		if (error)
5880 			break;
5881 	}
5882 
5883 	mutex_enter(&p->p_lock);
5884 	prunlock(pnp);
5885 
5886 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5887 }
5888 
5889 static int
5890 pr_readdir_ctdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5891 {
5892 	proc_t *p;
5893 	int pslot;
5894 	gfs_readdir_state_t gstate;
5895 	int error, eof = 0;
5896 	offset_t n;
5897 	uint64_t zid;
5898 
5899 	ASSERT(pnp->pr_type == PR_CTDIR);
5900 
5901 	if ((error = prlock(pnp, ZNO)) != 0)
5902 		return (error);
5903 	p = pnp->pr_common->prc_proc;
5904 	pslot = p->p_slot;
5905 	mutex_exit(&p->p_lock);
5906 
5907 	if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
5908 	    pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, PR_CTDIR), 0)) != 0) {
5909 		mutex_enter(&p->p_lock);
5910 		prunlock(pnp);
5911 		return (error);
5912 	}
5913 
5914 	zid = VTOZONE(pnp->pr_vnode)->zone_uniqid;
5915 	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5916 		id_t next = contract_plookup(p, n, zid);
5917 		if (next == -1) {
5918 			eof = 1;
5919 			break;
5920 		}
5921 		error = gfs_readdir_emitn(&gstate, uiop, next,
5922 		    pmkino(next, pslot, PR_CT), next);
5923 		if (error)
5924 			break;
5925 	}
5926 
5927 	mutex_enter(&p->p_lock);
5928 	prunlock(pnp);
5929 
5930 	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5931 }
5932 
5933 /* ARGSUSED */
5934 static int
5935 prfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
5936 {
5937 	return (0);
5938 }
5939 
5940 /*
5941  * Utility: remove a /proc vnode from a linked list, threaded through pr_next.
5942  */
5943 static void
5944 pr_list_unlink(vnode_t *pvp, vnode_t **listp)
5945 {
5946 	vnode_t *vp;
5947 	prnode_t *pnp;
5948 
5949 	while ((vp = *listp) != NULL) {
5950 		pnp = VTOP(vp);
5951 		if (vp == pvp) {
5952 			*listp = pnp->pr_next;
5953 			pnp->pr_next = NULL;
5954 			break;
5955 		}
5956 		listp = &pnp->pr_next;
5957 	}
5958 }
5959 
5960 /* ARGSUSED */
5961 static void
5962 prinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5963 {
5964 	prnode_t *pnp = VTOP(vp);
5965 	prnodetype_t type = pnp->pr_type;
5966 	proc_t *p;
5967 	vnode_t *dp;
5968 	vnode_t *ovp = NULL;
5969 	prnode_t *opnp = NULL;
5970 
5971 	switch (type) {
5972 	case PR_OBJECT:
5973 	case PR_FD:
5974 	case PR_FDINFO:
5975 	case PR_SELF:
5976 	case PR_PATH:
5977 		/* These are not linked into the usual lists */
5978 		ASSERT(vp->v_count == 1);
5979 		if ((dp = pnp->pr_parent) != NULL)
5980 			VN_RELE(dp);
5981 		prfreenode(pnp);
5982 		return;
5983 	default:
5984 		break;
5985 	}
5986 
5987 	mutex_enter(&pr_pidlock);
5988 	if (pnp->pr_pcommon == NULL)
5989 		p = NULL;
5990 	else if ((p = pnp->pr_pcommon->prc_proc) != NULL)
5991 		mutex_enter(&p->p_lock);
5992 	mutex_enter(&vp->v_lock);
5993 
5994 	if (type == PR_PROCDIR || vp->v_count > 1) {
5995 		VN_RELE_LOCKED(vp);
5996 		mutex_exit(&vp->v_lock);
5997 		if (p != NULL)
5998 			mutex_exit(&p->p_lock);
5999 		mutex_exit(&pr_pidlock);
6000 		return;
6001 	}
6002 
6003 	if ((dp = pnp->pr_parent) != NULL) {
6004 		prnode_t *dpnp;
6005 
6006 		switch (type) {
6007 		case PR_PIDFILE:
6008 		case PR_LWPIDFILE:
6009 		case PR_OPAGEDATA:
6010 			break;
6011 		default:
6012 			dpnp = VTOP(dp);
6013 			mutex_enter(&dpnp->pr_mutex);
6014 			if (dpnp->pr_files != NULL &&
6015 			    dpnp->pr_files[pnp->pr_index] == vp)
6016 				dpnp->pr_files[pnp->pr_index] = NULL;
6017 			mutex_exit(&dpnp->pr_mutex);
6018 			break;
6019 		}
6020 		pnp->pr_parent = NULL;
6021 	}
6022 
6023 	ASSERT(vp->v_count == 1);
6024 
6025 	/*
6026 	 * If we allocated an old /proc/pid node, free it too.
6027 	 */
6028 	if (pnp->pr_pidfile != NULL) {
6029 		ASSERT(type == PR_PIDDIR);
6030 		ovp = pnp->pr_pidfile;
6031 		opnp = VTOP(ovp);
6032 		ASSERT(opnp->pr_type == PR_PIDFILE);
6033 		pnp->pr_pidfile = NULL;
6034 	}
6035 
6036 	mutex_exit(&pr_pidlock);
6037 
6038 	if (p != NULL) {
6039 		/*
6040 		 * Remove the vnodes from the lists of
6041 		 * /proc vnodes for the process.
6042 		 */
6043 		int slot;
6044 
6045 		switch (type) {
6046 		case PR_PIDDIR:
6047 			pr_list_unlink(vp, &p->p_trace);
6048 			break;
6049 		case PR_LWPIDDIR:
6050 			if ((slot = pnp->pr_common->prc_tslot) != -1) {
6051 				lwpent_t *lep = p->p_lwpdir[slot].ld_entry;
6052 				pr_list_unlink(vp, &lep->le_trace);
6053 			}
6054 			break;
6055 		default:
6056 			pr_list_unlink(vp, &p->p_plist);
6057 			break;
6058 		}
6059 		if (ovp != NULL)
6060 			pr_list_unlink(ovp, &p->p_plist);
6061 		mutex_exit(&p->p_lock);
6062 	}
6063 
6064 	mutex_exit(&vp->v_lock);
6065 
6066 	if (type == PR_CT && pnp->pr_contract != NULL) {
6067 		contract_rele(pnp->pr_contract);
6068 		pnp->pr_contract = NULL;
6069 	}
6070 
6071 	if (opnp != NULL)
6072 		prfreenode(opnp);
6073 	prfreenode(pnp);
6074 	if (dp != NULL) {
6075 		VN_RELE(dp);
6076 	}
6077 }
6078 
6079 /* ARGSUSED */
6080 static int
6081 prseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
6082 {
6083 	return (0);
6084 }
6085 
6086 /*
6087  * We use the p_execdir member of proc_t to expand the %d token in core file
6088  * paths (the directory path for the executable that dumped core; see
6089  * coreadm(8) for details). We'd like gcore(1) to be able to expand %d in
6090  * the same way as core dumping from the kernel, but there's no convenient
6091  * and comprehensible way to export the path name for p_execdir. To solve
6092  * this, we try to find the actual path to the executable that was used. In
6093  * pr_lookup_pathdir(), we mark the a.out path name vnode with the PR_AOUT
6094  * flag, and use that here to indicate that more work is needed beyond the
6095  * call to vnodetopath().
6096  */
6097 static int
6098 prreadlink_lookup(prnode_t *pnp, char *buf, size_t size, cred_t *cr)
6099 {
6100 	proc_t *p;
6101 	vnode_t *vp, *execvp, *vrootp;
6102 	int ret;
6103 	size_t len;
6104 	dirent64_t *dp;
6105 	size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
6106 	char *dbuf;
6107 
6108 	p = curproc;
6109 	mutex_enter(&p->p_lock);
6110 	if ((vrootp = PTOU(p)->u_rdir) == NULL)
6111 		vrootp = rootdir;
6112 	VN_HOLD(vrootp);
6113 	mutex_exit(&p->p_lock);
6114 
6115 	ret = vnodetopath(vrootp, pnp->pr_realvp, buf, size, cr);
6116 
6117 	/*
6118 	 * If PR_AOUT isn't set, then we looked up the path for the vnode;
6119 	 * otherwise, we looked up the path for (what we believe to be) the
6120 	 * containing directory.
6121 	 */
6122 	if ((pnp->pr_flags & PR_AOUT) == 0) {
6123 		VN_RELE(vrootp);
6124 		return (ret);
6125 	}
6126 
6127 	/*
6128 	 * Fail if there's a problem locking the process. This will only
6129 	 * occur if the process is changing so the information we would
6130 	 * report would already be invalid.
6131 	 */
6132 	if (prlock(pnp, ZNO) != 0) {
6133 		VN_RELE(vrootp);
6134 		return (EIO);
6135 	}
6136 
6137 	p = pnp->pr_common->prc_proc;
6138 	mutex_exit(&p->p_lock);
6139 
6140 	execvp = p->p_exec;
6141 	VN_HOLD(execvp);
6142 
6143 	/*
6144 	 * If our initial lookup of the directory failed, fall back to
6145 	 * the path name information for p_exec.
6146 	 */
6147 	if (ret != 0) {
6148 		mutex_enter(&p->p_lock);
6149 		prunlock(pnp);
6150 		ret = vnodetopath(vrootp, execvp, buf, size, cr);
6151 		VN_RELE(execvp);
6152 		VN_RELE(vrootp);
6153 		return (ret);
6154 	}
6155 
6156 	len = strlen(buf);
6157 
6158 	/*
6159 	 * We use u_comm as a guess for the last component of the full
6160 	 * executable path name. If there isn't going to be enough space
6161 	 * we fall back to using the p_exec so that we can have _an_
6162 	 * answer even if it's not perfect.
6163 	 */
6164 	if (strlen(PTOU(p)->u_comm) + len + 1 < size) {
6165 		buf[len] = '/';
6166 		(void) strcpy(buf + len + 1, PTOU(p)->u_comm);
6167 		mutex_enter(&p->p_lock);
6168 		prunlock(pnp);
6169 
6170 		/*
6171 		 * Do a forward lookup of our u_comm guess.
6172 		 */
6173 		if (lookupnameat(buf + len + 1, UIO_SYSSPACE, FOLLOW, NULLVPP,
6174 		    &vp, pnp->pr_realvp) == 0) {
6175 			if (vn_compare(vp, execvp)) {
6176 				VN_RELE(vp);
6177 				VN_RELE(execvp);
6178 				VN_RELE(vrootp);
6179 				return (0);
6180 			}
6181 
6182 			VN_RELE(vp);
6183 		}
6184 	} else {
6185 		mutex_enter(&p->p_lock);
6186 		prunlock(pnp);
6187 	}
6188 
6189 	dbuf = kmem_alloc(dlen, KM_SLEEP);
6190 
6191 	/*
6192 	 * Try to find a matching vnode by iterating through the directory's
6193 	 * entries. If that fails, fall back to the path information for
6194 	 * p_exec.
6195 	 */
6196 	if ((ret = dirfindvp(vrootp, pnp->pr_realvp, execvp, cr, dbuf,
6197 	    dlen, &dp)) == 0 && strlen(dp->d_name) + len + 1 < size) {
6198 		buf[len] = '/';
6199 		(void) strcpy(buf + len + 1, dp->d_name);
6200 	} else {
6201 		ret = vnodetopath(vrootp, execvp, buf, size, cr);
6202 	}
6203 
6204 	kmem_free(dbuf, dlen);
6205 	VN_RELE(execvp);
6206 	VN_RELE(vrootp);
6207 
6208 	return (ret);
6209 }
6210 
6211 /* ARGSUSED */
6212 static int
6213 prreadlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ctp)
6214 {
6215 	prnode_t *pnp = VTOP(vp);
6216 	char *buf;
6217 	int ret = EINVAL;
6218 	char idbuf[16];
6219 	int length, rlength;
6220 	contract_t *ct;
6221 
6222 	switch (pnp->pr_type) {
6223 	case PR_SELF:
6224 		(void) snprintf(idbuf, sizeof (idbuf), "%d", curproc->p_pid);
6225 		ret = uiomove(idbuf, strlen(idbuf), UIO_READ, uiop);
6226 		break;
6227 	case PR_OBJECT:
6228 	case PR_FD:
6229 	case PR_CURDIR:
6230 	case PR_ROOTDIR:
6231 		if (pnp->pr_realvp->v_type == VDIR)
6232 			ret = 0;
6233 		break;
6234 	case PR_PATH:
6235 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6236 
6237 		if ((ret = prreadlink_lookup(pnp, buf, MAXPATHLEN, cr)) == 0)
6238 			ret = uiomove(buf, strlen(buf), UIO_READ, uiop);
6239 
6240 		kmem_free(buf, MAXPATHLEN);
6241 		break;
6242 	case PR_CT:
6243 		ASSERT(pnp->pr_contract != NULL);
6244 		ct = pnp->pr_contract;
6245 		length = sizeof (CTFS_ROOT "//") + sizeof (idbuf) +
6246 		    strlen(ct->ct_type->ct_type_name);
6247 		buf = kmem_alloc(length, KM_SLEEP);
6248 		rlength = snprintf(buf, length, CTFS_ROOT "/%s/%d",
6249 		    ct->ct_type->ct_type_name, ct->ct_id);
6250 		ASSERT(rlength < length);
6251 		ret = uiomove(buf, rlength, UIO_READ, uiop);
6252 		kmem_free(buf, length);
6253 		break;
6254 	default:
6255 		break;
6256 	}
6257 
6258 	return (ret);
6259 }
6260 
6261 /*ARGSUSED2*/
6262 static int
6263 prcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
6264 {
6265 	prnode_t *pp1, *pp2;
6266 
6267 	if (vp1 == vp2)
6268 		return (1);
6269 
6270 	if (!vn_matchops(vp1, prvnodeops) || !vn_matchops(vp2, prvnodeops))
6271 		return (0);
6272 
6273 	pp1 = VTOP(vp1);
6274 	pp2 = VTOP(vp2);
6275 
6276 	if (pp1->pr_type != pp2->pr_type)
6277 		return (0);
6278 	if (pp1->pr_type == PR_PROCDIR)
6279 		return (1);
6280 	if (pp1->pr_ino || pp2->pr_ino)
6281 		return (pp2->pr_ino == pp1->pr_ino);
6282 
6283 	if (pp1->pr_common == NULL || pp2->pr_common == NULL)
6284 		return (0);
6285 
6286 	return (pp1->pr_common->prc_slot == pp2->pr_common->prc_slot &&
6287 	    pp1->pr_common->prc_tslot == pp2->pr_common->prc_tslot);
6288 }
6289 
6290 static int
6291 prrealvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6292 {
6293 	vnode_t *rvp;
6294 
6295 	if ((rvp = VTOP(vp)->pr_realvp) != NULL) {
6296 		vp = rvp;
6297 		if (VOP_REALVP(vp, &rvp, ct) == 0)
6298 			vp = rvp;
6299 	}
6300 
6301 	*vpp = vp;
6302 	return (0);
6303 }
6304 
6305 /*
6306  * Return the answer requested to poll().
6307  * POLLIN, POLLRDNORM, and POLLOUT are recognized as in fs_poll().
6308  * In addition, these have special meaning for /proc files:
6309  *	POLLPRI		process or lwp stopped on an event of interest
6310  *	POLLERR		/proc file descriptor is invalid
6311  *	POLLHUP		process or lwp has terminated
6312  */
6313 /*ARGSUSED5*/
6314 static int
6315 prpoll(vnode_t *vp, short events, int anyyet, short *reventsp,
6316     pollhead_t **phpp, caller_context_t *ct)
6317 {
6318 	prnode_t *pnp = VTOP(vp);
6319 	prcommon_t *pcp = pnp->pr_common;
6320 	pollhead_t *php = &pcp->prc_pollhead;
6321 	proc_t *p;
6322 	short revents;
6323 	int error;
6324 	int lockstate;
6325 
6326 	ASSERT(pnp->pr_type < PR_NFILES);
6327 
6328 	/*
6329 	 * Support for old /proc interface.
6330 	 */
6331 	if (pnp->pr_pidfile != NULL) {
6332 		vp = pnp->pr_pidfile;
6333 		pnp = VTOP(vp);
6334 		ASSERT(pnp->pr_type == PR_PIDFILE);
6335 		ASSERT(pnp->pr_common == pcp);
6336 	}
6337 
6338 	*reventsp = revents = 0;
6339 	*phpp = (pollhead_t *)NULL;
6340 
6341 	if (vp->v_type == VDIR) {
6342 		*reventsp |= POLLNVAL;
6343 		return (0);
6344 	}
6345 
6346 	/* avoid deadlock with prnotify() */
6347 	if (pollunlock(&lockstate) != 0) {
6348 		*reventsp = POLLNVAL;
6349 		return (0);
6350 	}
6351 
6352 	if ((error = prlock(pnp, ZNO)) != 0) {
6353 		pollrelock(lockstate);
6354 		switch (error) {
6355 		case ENOENT:		/* process or lwp died */
6356 			*reventsp = POLLHUP;
6357 			error = 0;
6358 			break;
6359 		case EAGAIN:		/* invalidated */
6360 			*reventsp = POLLERR;
6361 			error = 0;
6362 			break;
6363 		}
6364 		return (error);
6365 	}
6366 
6367 	/*
6368 	 * We have the process marked locked (P_PR_LOCK) and we are holding
6369 	 * its p->p_lock.  We want to unmark the process but retain
6370 	 * exclusive control w.r.t. other /proc controlling processes
6371 	 * before reacquiring the polling locks.
6372 	 *
6373 	 * prunmark() does this for us.  It unmarks the process
6374 	 * but retains p->p_lock so we still have exclusive control.
6375 	 * We will drop p->p_lock at the end to relinquish control.
6376 	 *
6377 	 * We cannot call prunlock() at the end to relinquish control
6378 	 * because prunlock(), like prunmark(), may drop and reacquire
6379 	 * p->p_lock and that would lead to a lock order violation
6380 	 * w.r.t. the polling locks we are about to reacquire.
6381 	 */
6382 	p = pcp->prc_proc;
6383 	ASSERT(p != NULL);
6384 	prunmark(p);
6385 
6386 	pollrelock(lockstate);		/* reacquire dropped poll locks */
6387 
6388 	if ((p->p_flag & SSYS) || p->p_as == &kas)
6389 		revents = POLLNVAL;
6390 	else {
6391 		short ev;
6392 
6393 		if ((ev = (events & (POLLIN|POLLRDNORM))) != 0)
6394 			revents |= ev;
6395 		/*
6396 		 * POLLWRNORM (same as POLLOUT) really should not be
6397 		 * used to indicate that the process or lwp stopped.
6398 		 * However, USL chose to use POLLWRNORM rather than
6399 		 * POLLPRI to indicate this, so we just accept either
6400 		 * requested event to indicate stopped.  (grr...)
6401 		 */
6402 		if ((ev = (events & (POLLPRI|POLLOUT|POLLWRNORM))) != 0) {
6403 			kthread_t *t;
6404 
6405 			if (pcp->prc_flags & PRC_LWP) {
6406 				t = pcp->prc_thread;
6407 				ASSERT(t != NULL);
6408 				thread_lock(t);
6409 			} else {
6410 				t = prchoose(p);	/* returns locked t */
6411 				ASSERT(t != NULL);
6412 			}
6413 
6414 			if (ISTOPPED(t) || VSTOPPED(t))
6415 				revents |= ev;
6416 			thread_unlock(t);
6417 		}
6418 	}
6419 
6420 	*reventsp = revents;
6421 	if ((!anyyet && revents == 0) || (events & POLLET)) {
6422 		/*
6423 		 * Arrange to wake up the polling lwp when
6424 		 * the target process/lwp stops or terminates
6425 		 * or when the file descriptor becomes invalid.
6426 		 */
6427 		pcp->prc_flags |= PRC_POLL;
6428 		*phpp = php;
6429 	}
6430 	mutex_exit(&p->p_lock);
6431 	return (0);
6432 }
6433 
6434 /* in prioctl.c */
6435 extern int prioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
6436 	caller_context_t *);
6437 
6438 /*
6439  * /proc vnode operations vector
6440  */
6441 const fs_operation_def_t pr_vnodeops_template[] = {
6442 	VOPNAME_OPEN,		{ .vop_open = propen },
6443 	VOPNAME_CLOSE,		{ .vop_close = prclose },
6444 	VOPNAME_READ,		{ .vop_read = prread },
6445 	VOPNAME_WRITE,		{ .vop_write = prwrite },
6446 	VOPNAME_IOCTL,		{ .vop_ioctl = prioctl },
6447 	VOPNAME_GETATTR,	{ .vop_getattr = prgetattr },
6448 	VOPNAME_ACCESS,		{ .vop_access = praccess },
6449 	VOPNAME_LOOKUP,		{ .vop_lookup = prlookup },
6450 	VOPNAME_CREATE,		{ .vop_create = prcreate },
6451 	VOPNAME_READDIR,	{ .vop_readdir = prreaddir },
6452 	VOPNAME_READLINK,	{ .vop_readlink = prreadlink },
6453 	VOPNAME_FSYNC,		{ .vop_fsync = prfsync },
6454 	VOPNAME_INACTIVE,	{ .vop_inactive = prinactive },
6455 	VOPNAME_SEEK,		{ .vop_seek = prseek },
6456 	VOPNAME_CMP,		{ .vop_cmp = prcmp },
6457 	VOPNAME_FRLOCK,		{ .error = fs_error },
6458 	VOPNAME_REALVP,		{ .vop_realvp = prrealvp },
6459 	VOPNAME_POLL,		{ .vop_poll = prpoll },
6460 	VOPNAME_DISPOSE,	{ .error = fs_error },
6461 	VOPNAME_SHRLOCK,	{ .error = fs_error },
6462 	NULL,			NULL
6463 };
6464