xref: /freebsd/sys/fs/pseudofs/pseudofs_vnops.c (revision 39beb93c3f8bdbf72a61fda42300b5ebed7390c8)
1 /*-
2  * Copyright (c) 2001 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_pseudofs.h"
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/ctype.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mount.h>
44 #include <sys/mutex.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/sbuf.h>
48 #include <sys/sx.h>
49 #include <sys/sysctl.h>
50 #include <sys/vnode.h>
51 
52 #include <fs/pseudofs/pseudofs.h>
53 #include <fs/pseudofs/pseudofs_internal.h>
54 
55 /*
56  * Returns the fileno, adjusted for target pid
57  */
58 static uint32_t
59 pn_fileno(struct pfs_node *pn, pid_t pid)
60 {
61 
62 	KASSERT(pn->pn_fileno > 0,
63 	    ("%s(): no fileno allocated", __func__));
64 	if (pid != NO_PID)
65 		return (pn->pn_fileno * NO_PID + pid);
66 	return (pn->pn_fileno);
67 }
68 
69 /*
70  * Returns non-zero if given file is visible to given thread.
71  */
72 static int
73 pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc)
74 {
75 	int visible;
76 
77 	if (proc == NULL)
78 		return (0);
79 
80 	PROC_LOCK_ASSERT(proc, MA_OWNED);
81 
82 	visible = ((proc->p_flag & P_WEXIT) == 0);
83 	if (visible)
84 		visible = (p_cansee(td, proc) == 0);
85 	if (visible && pn->pn_vis != NULL)
86 		visible = pn_vis(td, proc, pn);
87 	if (!visible)
88 		return (0);
89 	return (1);
90 }
91 
92 static int
93 pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, struct proc **p)
94 {
95 	struct proc *proc;
96 
97 	PFS_TRACE(("%s (pid: %d, req: %d)",
98 	    pn->pn_name, pid, td->td_proc->p_pid));
99 
100 	if (p)
101 		*p = NULL;
102 	if (pid == NO_PID)
103 		PFS_RETURN (1);
104 	if ((proc = pfind(pid)) == NULL)
105 		PFS_RETURN (0);
106 	if (pfs_visible_proc(td, pn, proc)) {
107 		if (p)
108 			*p = proc;
109 		else
110 			PROC_UNLOCK(proc);
111 		PFS_RETURN (1);
112 	}
113 	PROC_UNLOCK(proc);
114 	PFS_RETURN (0);
115 }
116 
117 /*
118  * Verify permissions
119  */
120 static int
121 pfs_access(struct vop_access_args *va)
122 {
123 	struct vnode *vn = va->a_vp;
124 	struct pfs_vdata *pvd = vn->v_data;
125 	struct vattr vattr;
126 	int error;
127 
128 	PFS_TRACE(("%s", pvd->pvd_pn->pn_name));
129 	(void)pvd;
130 
131 	error = VOP_GETATTR(vn, &vattr, va->a_cred);
132 	if (error)
133 		PFS_RETURN (error);
134 	error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid,
135 	    vattr.va_gid, va->a_accmode, va->a_cred, NULL);
136 	PFS_RETURN (error);
137 }
138 
139 /*
140  * Close a file or directory
141  */
142 static int
143 pfs_close(struct vop_close_args *va)
144 {
145 	struct vnode *vn = va->a_vp;
146 	struct pfs_vdata *pvd = vn->v_data;
147 	struct pfs_node *pn = pvd->pvd_pn;
148 	struct proc *proc;
149 	int error;
150 
151 	PFS_TRACE(("%s", pn->pn_name));
152 	pfs_assert_not_owned(pn);
153 
154 	/*
155 	 * Do nothing unless this is the last close and the node has a
156 	 * last-close handler.
157 	 */
158 	if (vrefcnt(vn) > 1 || pn->pn_close == NULL)
159 		PFS_RETURN (0);
160 
161 	if (pvd->pvd_pid != NO_PID) {
162 		proc = pfind(pvd->pvd_pid);
163 	} else {
164 		proc = NULL;
165 	}
166 
167 	error = pn_close(va->a_td, proc, pn);
168 
169 	if (proc != NULL)
170 		PROC_UNLOCK(proc);
171 
172 	PFS_RETURN (error);
173 }
174 
175 /*
176  * Get file attributes
177  */
178 static int
179 pfs_getattr(struct vop_getattr_args *va)
180 {
181 	struct vnode *vn = va->a_vp;
182 	struct pfs_vdata *pvd = vn->v_data;
183 	struct pfs_node *pn = pvd->pvd_pn;
184 	struct vattr *vap = va->a_vap;
185 	struct proc *proc;
186 	int error = 0;
187 
188 	PFS_TRACE(("%s", pn->pn_name));
189 	pfs_assert_not_owned(pn);
190 
191 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
192 		PFS_RETURN (ENOENT);
193 
194 	vap->va_type = vn->v_type;
195 	vap->va_fileid = pn_fileno(pn, pvd->pvd_pid);
196 	vap->va_flags = 0;
197 	vap->va_blocksize = PAGE_SIZE;
198 	vap->va_bytes = vap->va_size = 0;
199 	vap->va_filerev = 0;
200 	vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0];
201 	vap->va_nlink = 1;
202 	nanotime(&vap->va_ctime);
203 	vap->va_atime = vap->va_mtime = vap->va_ctime;
204 
205 	switch (pn->pn_type) {
206 	case pfstype_procdir:
207 	case pfstype_root:
208 	case pfstype_dir:
209 #if 0
210 		pfs_lock(pn);
211 		/* compute link count */
212 		pfs_unlock(pn);
213 #endif
214 		vap->va_mode = 0555;
215 		break;
216 	case pfstype_file:
217 	case pfstype_symlink:
218 		vap->va_mode = 0444;
219 		break;
220 	default:
221 		printf("shouldn't be here!\n");
222 		vap->va_mode = 0;
223 		break;
224 	}
225 
226 	if (proc != NULL) {
227 		vap->va_uid = proc->p_ucred->cr_ruid;
228 		vap->va_gid = proc->p_ucred->cr_rgid;
229 	} else {
230 		vap->va_uid = 0;
231 		vap->va_gid = 0;
232 	}
233 
234 	if (pn->pn_attr != NULL)
235 		error = pn_attr(curthread, proc, pn, vap);
236 
237 	if(proc != NULL)
238 		PROC_UNLOCK(proc);
239 
240 	PFS_RETURN (error);
241 }
242 
243 /*
244  * Perform an ioctl
245  */
246 static int
247 pfs_ioctl(struct vop_ioctl_args *va)
248 {
249 	struct vnode *vn = va->a_vp;
250 	struct pfs_vdata *pvd = vn->v_data;
251 	struct pfs_node *pn = pvd->pvd_pn;
252 	struct proc *proc;
253 	int error;
254 
255 	PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command));
256 	pfs_assert_not_owned(pn);
257 
258 	if (vn->v_type != VREG)
259 		PFS_RETURN (EINVAL);
260 
261 	if (pn->pn_ioctl == NULL)
262 		PFS_RETURN (ENOTTY);
263 
264 	/*
265 	 * This is necessary because process' privileges may
266 	 * have changed since the open() call.
267 	 */
268 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
269 		PFS_RETURN (EIO);
270 
271 	error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data);
272 
273 	if (proc != NULL)
274 		PROC_UNLOCK(proc);
275 
276 	PFS_RETURN (error);
277 }
278 
279 /*
280  * Perform getextattr
281  */
282 static int
283 pfs_getextattr(struct vop_getextattr_args *va)
284 {
285 	struct vnode *vn = va->a_vp;
286 	struct pfs_vdata *pvd = vn->v_data;
287 	struct pfs_node *pn = pvd->pvd_pn;
288 	struct proc *proc;
289 	int error;
290 
291 	PFS_TRACE(("%s", pn->pn_name));
292 	pfs_assert_not_owned(pn);
293 
294 	/*
295 	 * This is necessary because either process' privileges may
296 	 * have changed since the open() call.
297 	 */
298 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
299 		PFS_RETURN (EIO);
300 
301 	if (pn->pn_getextattr == NULL)
302 		error = EOPNOTSUPP;
303 	else
304 		error = pn_getextattr(curthread, proc, pn,
305 		    va->a_attrnamespace, va->a_name, va->a_uio,
306 		    va->a_size, va->a_cred);
307 
308 	if (proc != NULL)
309 		PROC_UNLOCK(proc);
310 
311 	pfs_unlock(pn);
312 	PFS_RETURN (error);
313 }
314 
315 /*
316  * Convert a vnode to its component name
317  */
318 static int
319 pfs_vptocnp(struct vop_vptocnp_args *ap)
320 {
321 	struct vnode *vp = ap->a_vp;
322 	struct vnode **dvp = ap->a_vpp;
323 	struct pfs_vdata *pvd = vp->v_data;
324 	struct pfs_node *pd = pvd->pvd_pn;
325 	struct pfs_node *pn;
326 	struct mount *mp;
327 	char *buf = ap->a_buf;
328 	int *buflen = ap->a_buflen;
329 	char pidbuf[PFS_NAMELEN];
330 	pid_t pid = pvd->pvd_pid;
331 	int len, i, error, locked;
332 
333 	i = *buflen;
334 	error = 0;
335 
336 	pfs_lock(pd);
337 
338 	if (vp->v_type == VDIR && pd->pn_type == pfstype_root) {
339 		*dvp = vp;
340 		vhold(*dvp);
341 		pfs_unlock(pd);
342 		PFS_RETURN (0);
343 	} else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) {
344 		len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
345 		i -= len;
346 		if (i < 0) {
347 			error = ENOMEM;
348 			goto failed;
349 		}
350 		bcopy(pidbuf, buf + i, len);
351 	} else {
352 		i -= strlen(pd->pn_name);
353 		if (i < 0) {
354 			error = ENOMEM;
355 			goto failed;
356 		}
357 		bcopy(pd->pn_name, buf + i, strlen(pd->pn_name));
358 	}
359 
360 	pn = pd->pn_parent;
361 	pfs_unlock(pd);
362 
363 	mp = vp->v_mount;
364 	error = vfs_busy(mp, 0);
365 	if (error)
366 		return (error);
367 
368 	/*
369 	 * vp is held by caller.
370 	 */
371 	locked = VOP_ISLOCKED(vp);
372 	VOP_UNLOCK(vp, 0);
373 
374 	error = pfs_vncache_alloc(mp, dvp, pn, pid);
375 	if (error) {
376 		vn_lock(vp, locked | LK_RETRY);
377 		vfs_unbusy(mp);
378 		PFS_RETURN(error);
379 	}
380 
381 	*buflen = i;
382 	vhold(*dvp);
383 	vput(*dvp);
384 	vn_lock(vp, locked | LK_RETRY);
385 	vfs_unbusy(mp);
386 
387 	PFS_RETURN (0);
388 failed:
389 	pfs_unlock(pd);
390 	PFS_RETURN(error);
391 }
392 
393 /*
394  * Look up a file or directory
395  */
396 static int
397 pfs_lookup(struct vop_cachedlookup_args *va)
398 {
399 	struct vnode *vn = va->a_dvp;
400 	struct vnode **vpp = va->a_vpp;
401 	struct componentname *cnp = va->a_cnp;
402 	struct pfs_vdata *pvd = vn->v_data;
403 	struct pfs_node *pd = pvd->pvd_pn;
404 	struct pfs_node *pn, *pdn = NULL;
405 	pid_t pid = pvd->pvd_pid;
406 	char *pname;
407 	int error, i, namelen, visible;
408 
409 	PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr));
410 	pfs_assert_not_owned(pd);
411 
412 	if (vn->v_type != VDIR)
413 		PFS_RETURN (ENOTDIR);
414 
415 	error = VOP_ACCESS(vn, VEXEC, cnp->cn_cred, cnp->cn_thread);
416 	if (error)
417 		PFS_RETURN (error);
418 
419 	/*
420 	 * Don't support DELETE or RENAME.  CREATE is supported so
421 	 * that O_CREAT will work, but the lookup will still fail if
422 	 * the file does not exist.
423 	 */
424 	if ((cnp->cn_flags & ISLASTCN) &&
425 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
426 		PFS_RETURN (EOPNOTSUPP);
427 
428 	/* shortcut: check if the name is too long */
429 	if (cnp->cn_namelen >= PFS_NAMELEN)
430 		PFS_RETURN (ENOENT);
431 
432 	/* check that parent directory is visible... */
433 	if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL))
434 		PFS_RETURN (ENOENT);
435 
436 	/* self */
437 	namelen = cnp->cn_namelen;
438 	pname = cnp->cn_nameptr;
439 	if (namelen == 1 && pname[0] == '.') {
440 		pn = pd;
441 		*vpp = vn;
442 		VREF(vn);
443 		PFS_RETURN (0);
444 	}
445 
446 	/* parent */
447 	if (cnp->cn_flags & ISDOTDOT) {
448 		if (pd->pn_type == pfstype_root)
449 			PFS_RETURN (EIO);
450 		VOP_UNLOCK(vn, 0);
451 		KASSERT(pd->pn_parent != NULL,
452 		    ("%s(): non-root directory has no parent", __func__));
453 		/*
454 		 * This one is tricky.  Descendents of procdir nodes
455 		 * inherit their parent's process affinity, but
456 		 * there's no easy reverse mapping.  For simplicity,
457 		 * we assume that if this node is a procdir, its
458 		 * parent isn't (which is correct as long as
459 		 * descendents of procdir nodes are never procdir
460 		 * nodes themselves)
461 		 */
462 		if (pd->pn_type == pfstype_procdir)
463 			pid = NO_PID;
464 		pfs_lock(pd);
465 		pn = pd->pn_parent;
466 		pfs_unlock(pd);
467 		goto got_pnode;
468 	}
469 
470 	pfs_lock(pd);
471 
472 	/* named node */
473 	for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next)
474 		if (pn->pn_type == pfstype_procdir)
475 			pdn = pn;
476 		else if (pn->pn_name[namelen] == '\0' &&
477 		    bcmp(pname, pn->pn_name, namelen) == 0) {
478 			pfs_unlock(pd);
479 			goto got_pnode;
480 		}
481 
482 	/* process dependent node */
483 	if ((pn = pdn) != NULL) {
484 		pid = 0;
485 		for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i)
486 			if ((pid = pid * 10 + pname[i] - '0') > PID_MAX)
487 				break;
488 		if (i == cnp->cn_namelen) {
489 			pfs_unlock(pd);
490 			goto got_pnode;
491 		}
492 	}
493 
494 	pfs_unlock(pd);
495 
496 	PFS_RETURN (ENOENT);
497 
498  got_pnode:
499 	pfs_assert_not_owned(pd);
500 	pfs_assert_not_owned(pn);
501 	visible = pfs_visible(curthread, pn, pid, NULL);
502 	if (!visible) {
503 		error = ENOENT;
504 		goto failed;
505 	}
506 
507 	error = pfs_vncache_alloc(vn->v_mount, vpp, pn, pid);
508 	if (error)
509 		goto failed;
510 
511 	if (cnp->cn_flags & ISDOTDOT)
512 		vn_lock(vn, LK_EXCLUSIVE|LK_RETRY);
513 	if (cnp->cn_flags & MAKEENTRY)
514 		cache_enter(vn, *vpp, cnp);
515 	PFS_RETURN (0);
516  failed:
517 	if (cnp->cn_flags & ISDOTDOT)
518 		vn_lock(vn, LK_EXCLUSIVE|LK_RETRY);
519 	PFS_RETURN(error);
520 }
521 
522 /*
523  * Open a file or directory.
524  */
525 static int
526 pfs_open(struct vop_open_args *va)
527 {
528 	struct vnode *vn = va->a_vp;
529 	struct pfs_vdata *pvd = vn->v_data;
530 	struct pfs_node *pn = pvd->pvd_pn;
531 	int mode = va->a_mode;
532 
533 	PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode));
534 	pfs_assert_not_owned(pn);
535 
536 	/* check if the requested mode is permitted */
537 	if (((mode & FREAD) && !(mode & PFS_RD)) ||
538 	    ((mode & FWRITE) && !(mode & PFS_WR)))
539 		PFS_RETURN (EPERM);
540 
541 	/* we don't support locking */
542 	if ((mode & O_SHLOCK) || (mode & O_EXLOCK))
543 		PFS_RETURN (EOPNOTSUPP);
544 
545 	PFS_RETURN (0);
546 }
547 
548 /*
549  * Read from a file
550  */
551 static int
552 pfs_read(struct vop_read_args *va)
553 {
554 	struct vnode *vn = va->a_vp;
555 	struct pfs_vdata *pvd = vn->v_data;
556 	struct pfs_node *pn = pvd->pvd_pn;
557 	struct uio *uio = va->a_uio;
558 	struct proc *proc;
559 	struct sbuf *sb = NULL;
560 	int error, locked;
561 	unsigned int buflen, offset, resid;
562 
563 	PFS_TRACE(("%s", pn->pn_name));
564 	pfs_assert_not_owned(pn);
565 
566 	if (vn->v_type != VREG)
567 		PFS_RETURN (EINVAL);
568 
569 	if (!(pn->pn_flags & PFS_RD))
570 		PFS_RETURN (EBADF);
571 
572 	if (pn->pn_fill == NULL)
573 		PFS_RETURN (EIO);
574 
575 	/*
576 	 * This is necessary because either process' privileges may
577 	 * have changed since the open() call.
578 	 */
579 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
580 		PFS_RETURN (EIO);
581 	if (proc != NULL) {
582 		_PHOLD(proc);
583 		PROC_UNLOCK(proc);
584 	}
585 
586 	vhold(vn);
587 	locked = VOP_ISLOCKED(vn);
588 	VOP_UNLOCK(vn, 0);
589 
590 	if (pn->pn_flags & PFS_RAWRD) {
591 		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
592 		error = pn_fill(curthread, proc, pn, NULL, uio);
593 		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
594 		goto ret;
595 	}
596 
597 	/* beaucoup sanity checks so we don't ask for bogus allocation */
598 	if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
599 	    (offset = uio->uio_offset) != uio->uio_offset ||
600 	    (resid = uio->uio_resid) != uio->uio_resid ||
601 	    (buflen = offset + resid + 1) < offset || buflen > INT_MAX) {
602 		if (proc != NULL)
603 			PRELE(proc);
604 		error = EINVAL;
605 		goto ret;
606 	}
607 	if (buflen > MAXPHYS + 1) {
608 		error = EIO;
609 		goto ret;
610 	}
611 
612 	sb = sbuf_new(sb, NULL, buflen, 0);
613 	if (sb == NULL) {
614 		error = EIO;
615 		goto ret;
616 	}
617 
618 	error = pn_fill(curthread, proc, pn, sb, uio);
619 
620 	if (error) {
621 		sbuf_delete(sb);
622 		goto ret;
623 	}
624 
625 	sbuf_finish(sb);
626 	error = uiomove_frombuf(sbuf_data(sb), sbuf_len(sb), uio);
627 	sbuf_delete(sb);
628 ret:
629 	vn_lock(vn, locked | LK_RETRY);
630 	vdrop(vn);
631 	if (proc != NULL)
632 		PRELE(proc);
633 	PFS_RETURN (error);
634 }
635 
636 /*
637  * Iterate through directory entries
638  */
639 static int
640 pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd,
641 	    struct pfs_node **pn, struct proc **p)
642 {
643 	int visible;
644 
645 	sx_assert(&allproc_lock, SX_SLOCKED);
646 	pfs_assert_owned(pd);
647  again:
648 	if (*pn == NULL) {
649 		/* first node */
650 		*pn = pd->pn_nodes;
651 	} else if ((*pn)->pn_type != pfstype_procdir) {
652 		/* next node */
653 		*pn = (*pn)->pn_next;
654 	}
655 	if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) {
656 		/* next process */
657 		if (*p == NULL)
658 			*p = LIST_FIRST(&allproc);
659 		else
660 			*p = LIST_NEXT(*p, p_list);
661 		/* out of processes: next node */
662 		if (*p == NULL)
663 			*pn = (*pn)->pn_next;
664 		else
665 			PROC_LOCK(*p);
666 	}
667 
668 	if ((*pn) == NULL)
669 		return (-1);
670 
671 	if (*p != NULL) {
672 		visible = pfs_visible_proc(td, *pn, *p);
673 		PROC_UNLOCK(*p);
674 	} else if (proc != NULL) {
675 		visible = pfs_visible_proc(td, *pn, proc);
676 	} else {
677 		visible = 1;
678 	}
679 	if (!visible)
680 		goto again;
681 
682 	return (0);
683 }
684 
685 /*
686  * Return directory entries.
687  */
688 static int
689 pfs_readdir(struct vop_readdir_args *va)
690 {
691 	struct vnode *vn = va->a_vp;
692 	struct pfs_vdata *pvd = vn->v_data;
693 	struct pfs_node *pd = pvd->pvd_pn;
694 	pid_t pid = pvd->pvd_pid;
695 	struct proc *p, *proc;
696 	struct pfs_node *pn;
697 	struct dirent *entry;
698 	struct uio *uio;
699 	off_t offset;
700 	int error, i, resid;
701 	char *buf, *ent;
702 
703 	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
704 	    ("%s(): pn_info does not match mountpoint", __func__));
705 	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
706 	pfs_assert_not_owned(pd);
707 
708 	if (vn->v_type != VDIR)
709 		PFS_RETURN (ENOTDIR);
710 	uio = va->a_uio;
711 
712 	/* only allow reading entire entries */
713 	offset = uio->uio_offset;
714 	resid = uio->uio_resid;
715 	if (offset < 0 || offset % PFS_DELEN != 0 ||
716 	    (resid && resid < PFS_DELEN))
717 		PFS_RETURN (EINVAL);
718 	if (resid == 0)
719 		PFS_RETURN (0);
720 
721 	/* can't do this while holding the proc lock... */
722 	buf = malloc(resid, M_IOV, M_WAITOK | M_ZERO);
723 	sx_slock(&allproc_lock);
724 	pfs_lock(pd);
725 
726         /* check if the directory is visible to the caller */
727         if (!pfs_visible(curthread, pd, pid, &proc)) {
728 		sx_sunlock(&allproc_lock);
729 		pfs_unlock(pd);
730 		free(buf, M_IOV);
731                 PFS_RETURN (ENOENT);
732 	}
733 	KASSERT(pid == NO_PID || proc != NULL,
734 	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));
735 
736 	/* skip unwanted entries */
737 	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
738 		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
739 			/* nothing left... */
740 			if (proc != NULL)
741 				PROC_UNLOCK(proc);
742 			pfs_unlock(pd);
743 			sx_sunlock(&allproc_lock);
744 			free(buf, M_IOV);
745 			PFS_RETURN (0);
746 		}
747 	}
748 
749 	/* fill in entries */
750 	ent = buf;
751 	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
752 	    resid >= PFS_DELEN) {
753 		entry = (struct dirent *)ent;
754 		entry->d_reclen = PFS_DELEN;
755 		entry->d_fileno = pn_fileno(pn, pid);
756 		/* PFS_DELEN was picked to fit PFS_NAMLEN */
757 		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
758 			entry->d_name[i] = pn->pn_name[i];
759 		entry->d_name[i] = 0;
760 		entry->d_namlen = i;
761 		switch (pn->pn_type) {
762 		case pfstype_procdir:
763 			KASSERT(p != NULL,
764 			    ("reached procdir node with p == NULL"));
765 			entry->d_namlen = snprintf(entry->d_name,
766 			    PFS_NAMELEN, "%d", p->p_pid);
767 			/* fall through */
768 		case pfstype_root:
769 		case pfstype_dir:
770 		case pfstype_this:
771 		case pfstype_parent:
772 			entry->d_type = DT_DIR;
773 			break;
774 		case pfstype_file:
775 			entry->d_type = DT_REG;
776 			break;
777 		case pfstype_symlink:
778 			entry->d_type = DT_LNK;
779 			break;
780 		default:
781 			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
782 		}
783 		PFS_TRACE(("%s", entry->d_name));
784 		offset += PFS_DELEN;
785 		resid -= PFS_DELEN;
786 		ent += PFS_DELEN;
787 	}
788 	if (proc != NULL)
789 		PROC_UNLOCK(proc);
790 	pfs_unlock(pd);
791 	sx_sunlock(&allproc_lock);
792 	PFS_TRACE(("%zd bytes", ent - buf));
793 	error = uiomove(buf, ent - buf, uio);
794 	free(buf, M_IOV);
795 	PFS_RETURN (error);
796 }
797 
798 /*
799  * Read a symbolic link
800  */
801 static int
802 pfs_readlink(struct vop_readlink_args *va)
803 {
804 	struct vnode *vn = va->a_vp;
805 	struct pfs_vdata *pvd = vn->v_data;
806 	struct pfs_node *pn = pvd->pvd_pn;
807 	struct uio *uio = va->a_uio;
808 	struct proc *proc = NULL;
809 	char buf[PATH_MAX];
810 	struct sbuf sb;
811 	int error;
812 
813 	PFS_TRACE(("%s", pn->pn_name));
814 	pfs_assert_not_owned(pn);
815 
816 	if (vn->v_type != VLNK)
817 		PFS_RETURN (EINVAL);
818 
819 	if (pn->pn_fill == NULL)
820 		PFS_RETURN (EIO);
821 
822 	if (pvd->pvd_pid != NO_PID) {
823 		if ((proc = pfind(pvd->pvd_pid)) == NULL)
824 			PFS_RETURN (EIO);
825 		if (proc->p_flag & P_WEXIT) {
826 			PROC_UNLOCK(proc);
827 			PFS_RETURN (EIO);
828 		}
829 		_PHOLD(proc);
830 		PROC_UNLOCK(proc);
831 	}
832 
833 	/* sbuf_new() can't fail with a static buffer */
834 	sbuf_new(&sb, buf, sizeof buf, 0);
835 
836 	error = pn_fill(curthread, proc, pn, &sb, NULL);
837 
838 	if (proc != NULL)
839 		PRELE(proc);
840 
841 	if (error) {
842 		sbuf_delete(&sb);
843 		PFS_RETURN (error);
844 	}
845 
846 	sbuf_finish(&sb);
847 	error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio);
848 	sbuf_delete(&sb);
849 	PFS_RETURN (error);
850 }
851 
852 /*
853  * Reclaim a vnode
854  */
855 static int
856 pfs_reclaim(struct vop_reclaim_args *va)
857 {
858 	struct vnode *vn = va->a_vp;
859 	struct pfs_vdata *pvd = vn->v_data;
860 	struct pfs_node *pn = pvd->pvd_pn;
861 
862 	PFS_TRACE(("%s", pn->pn_name));
863 	pfs_assert_not_owned(pn);
864 
865 	return (pfs_vncache_free(va->a_vp));
866 }
867 
868 /*
869  * Set attributes
870  */
871 static int
872 pfs_setattr(struct vop_setattr_args *va)
873 {
874 	struct vnode *vn = va->a_vp;
875 	struct pfs_vdata *pvd = vn->v_data;
876 	struct pfs_node *pn = pvd->pvd_pn;
877 
878 	PFS_TRACE(("%s", pn->pn_name));
879 	pfs_assert_not_owned(pn);
880 
881 	PFS_RETURN (EOPNOTSUPP);
882 }
883 
884 /*
885  * Write to a file
886  */
887 static int
888 pfs_write(struct vop_write_args *va)
889 {
890 	struct vnode *vn = va->a_vp;
891 	struct pfs_vdata *pvd = vn->v_data;
892 	struct pfs_node *pn = pvd->pvd_pn;
893 	struct uio *uio = va->a_uio;
894 	struct proc *proc;
895 	struct sbuf sb;
896 	int error;
897 
898 	PFS_TRACE(("%s", pn->pn_name));
899 	pfs_assert_not_owned(pn);
900 
901 	if (vn->v_type != VREG)
902 		PFS_RETURN (EINVAL);
903 	KASSERT(pn->pn_type != pfstype_file,
904 	    ("%s(): VREG vnode refers to non-file pfs_node", __func__));
905 
906 	if (!(pn->pn_flags & PFS_WR))
907 		PFS_RETURN (EBADF);
908 
909 	if (pn->pn_fill == NULL)
910 		PFS_RETURN (EIO);
911 
912 	/*
913 	 * This is necessary because either process' privileges may
914 	 * have changed since the open() call.
915 	 */
916 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
917 		PFS_RETURN (EIO);
918 	if (proc != NULL) {
919 		_PHOLD(proc);
920 		PROC_UNLOCK(proc);
921 	}
922 
923 	if (pn->pn_flags & PFS_RAWWR) {
924 		pfs_lock(pn);
925 		error = pn_fill(curthread, proc, pn, NULL, uio);
926 		pfs_unlock(pn);
927 		if (proc != NULL)
928 			PRELE(proc);
929 		PFS_RETURN (error);
930 	}
931 
932 	sbuf_uionew(&sb, uio, &error);
933 	if (error) {
934 		if (proc != NULL)
935 			PRELE(proc);
936 		PFS_RETURN (error);
937 	}
938 
939 	error = pn_fill(curthread, proc, pn, &sb, uio);
940 
941 	sbuf_delete(&sb);
942 	if (proc != NULL)
943 		PRELE(proc);
944 	PFS_RETURN (error);
945 }
946 
947 /*
948  * Vnode operations
949  */
950 struct vop_vector pfs_vnodeops = {
951 	.vop_default =		&default_vnodeops,
952 
953 	.vop_access =		pfs_access,
954 	.vop_cachedlookup =	pfs_lookup,
955 	.vop_close =		pfs_close,
956 	.vop_create =		VOP_EOPNOTSUPP,
957 	.vop_getattr =		pfs_getattr,
958 	.vop_getextattr =	pfs_getextattr,
959 	.vop_ioctl =		pfs_ioctl,
960 	.vop_link =		VOP_EOPNOTSUPP,
961 	.vop_lookup =		vfs_cache_lookup,
962 	.vop_mkdir =		VOP_EOPNOTSUPP,
963 	.vop_mknod =		VOP_EOPNOTSUPP,
964 	.vop_open =		pfs_open,
965 	.vop_read =		pfs_read,
966 	.vop_readdir =		pfs_readdir,
967 	.vop_readlink =		pfs_readlink,
968 	.vop_reclaim =		pfs_reclaim,
969 	.vop_remove =		VOP_EOPNOTSUPP,
970 	.vop_rename =		VOP_EOPNOTSUPP,
971 	.vop_rmdir =		VOP_EOPNOTSUPP,
972 	.vop_setattr =		pfs_setattr,
973 	.vop_symlink =		VOP_EOPNOTSUPP,
974 	.vop_vptocnp =		pfs_vptocnp,
975 	.vop_write =		pfs_write,
976 	/* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */
977 };
978