xref: /freebsd/sys/kern/vfs_subr.c (revision 0de89efe5c443f213c7ea28773ef2dc6cf3af2ed)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39  * $Id: vfs_subr.c,v 1.106 1997/09/25 16:17:57 phk Exp $
40  */
41 
42 /*
43  * External virtual filesystem routines
44  */
45 #include "opt_ddb.h"
46 #include "opt_devfs.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/mount.h>
53 #include <sys/vnode.h>
54 #include <sys/stat.h>
55 #include <sys/buf.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #include <sys/domain.h>
59 #include <sys/dirent.h>
60 
61 #include <machine/limits.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vnode_pager.h>
67 #include <sys/sysctl.h>
68 
69 #include <miscfs/specfs/specdev.h>
70 
71 #ifdef DDB
72 extern void	printlockedvnodes __P((void));
73 #endif
74 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
75 static void	vgonel __P((struct vnode *vp, struct proc *p));
76 unsigned long	numvnodes;
77 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
78 static void	vputrele __P((struct vnode *vp, int put));
79 
80 enum vtype iftovt_tab[16] = {
81 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
82 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
83 };
84 int vttoif_tab[9] = {
85 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
86 	S_IFSOCK, S_IFIFO, S_IFMT,
87 };
88 
89 /*
90  * Insq/Remq for the vnode usage lists.
91  */
92 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
93 #define	bufremvn(bp) {							\
94 	LIST_REMOVE(bp, b_vnbufs);					\
95 	(bp)->b_vnbufs.le_next = NOLIST;				\
96 }
97 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
98 static u_long wantfreevnodes = 25;
99 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
100 static u_long freevnodes = 0;
101 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
102 
103 struct mntlist mountlist;	/* mounted filesystem list */
104 struct simplelock mountlist_slock;
105 static struct simplelock mntid_slock;
106 struct simplelock mntvnode_slock;
107 struct simplelock vnode_free_list_slock;
108 static struct simplelock spechash_slock;
109 struct nfs_public nfs_pub;	/* publicly exported FS */
110 
111 int desiredvnodes;
112 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
113 
114 static void	vfs_free_addrlist __P((struct netexport *nep));
115 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
116 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
117 				       struct export_args *argp));
118 
119 /*
120  * Initialize the vnode management data structures.
121  */
122 void
123 vntblinit()
124 {
125 
126 	desiredvnodes = maxproc + vm_object_cache_max;
127 	simple_lock_init(&mntvnode_slock);
128 	simple_lock_init(&mntid_slock);
129 	simple_lock_init(&spechash_slock);
130 	TAILQ_INIT(&vnode_free_list);
131 	simple_lock_init(&vnode_free_list_slock);
132 	CIRCLEQ_INIT(&mountlist);
133 }
134 
135 /*
136  * Mark a mount point as busy. Used to synchronize access and to delay
137  * unmounting. Interlock is not released on failure.
138  */
139 int
140 vfs_busy(mp, flags, interlkp, p)
141 	struct mount *mp;
142 	int flags;
143 	struct simplelock *interlkp;
144 	struct proc *p;
145 {
146 	int lkflags;
147 
148 	if (mp->mnt_flag & MNT_UNMOUNT) {
149 		if (flags & LK_NOWAIT)
150 			return (ENOENT);
151 		mp->mnt_flag |= MNT_MWAIT;
152 		if (interlkp) {
153 			simple_unlock(interlkp);
154 		}
155 		/*
156 		 * Since all busy locks are shared except the exclusive
157 		 * lock granted when unmounting, the only place that a
158 		 * wakeup needs to be done is at the release of the
159 		 * exclusive lock at the end of dounmount.
160 		 */
161 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
162 		if (interlkp) {
163 			simple_lock(interlkp);
164 		}
165 		return (ENOENT);
166 	}
167 	lkflags = LK_SHARED;
168 	if (interlkp)
169 		lkflags |= LK_INTERLOCK;
170 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
171 		panic("vfs_busy: unexpected lock failure");
172 	return (0);
173 }
174 
175 /*
176  * Free a busy filesystem.
177  */
178 void
179 vfs_unbusy(mp, p)
180 	struct mount *mp;
181 	struct proc *p;
182 {
183 
184 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
185 }
186 
187 /*
188  * Lookup a filesystem type, and if found allocate and initialize
189  * a mount structure for it.
190  *
191  * Devname is usually updated by mount(8) after booting.
192  */
193 int
194 vfs_rootmountalloc(fstypename, devname, mpp)
195 	char *fstypename;
196 	char *devname;
197 	struct mount **mpp;
198 {
199 	struct proc *p = curproc;	/* XXX */
200 	struct vfsconf *vfsp;
201 	struct mount *mp;
202 
203 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
204 		if (!strcmp(vfsp->vfc_name, fstypename))
205 			break;
206 	if (vfsp == NULL)
207 		return (ENODEV);
208 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
209 	bzero((char *)mp, (u_long)sizeof(struct mount));
210 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
211 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
212 	LIST_INIT(&mp->mnt_vnodelist);
213 	mp->mnt_vfc = vfsp;
214 	mp->mnt_op = vfsp->vfc_vfsops;
215 	mp->mnt_flag = MNT_RDONLY;
216 	mp->mnt_vnodecovered = NULLVP;
217 	vfsp->vfc_refcount++;
218 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
219 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
220 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
221 	mp->mnt_stat.f_mntonname[0] = '/';
222 	mp->mnt_stat.f_mntonname[1] = 0;
223 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
224 	*mpp = mp;
225 	return (0);
226 }
227 
228 /*
229  * Find an appropriate filesystem to use for the root. If a filesystem
230  * has not been preselected, walk through the list of known filesystems
231  * trying those that have mountroot routines, and try them until one
232  * works or we have tried them all.
233  */
234 #ifdef notdef	/* XXX JH */
235 int
236 lite2_vfs_mountroot()
237 {
238 	struct vfsconf *vfsp;
239 	extern int (*lite2_mountroot) __P((void));
240 	int error;
241 
242 	if (lite2_mountroot != NULL)
243 		return ((*lite2_mountroot)());
244 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
245 		if (vfsp->vfc_mountroot == NULL)
246 			continue;
247 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
248 			return (0);
249 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
250 	}
251 	return (ENODEV);
252 }
253 #endif
254 
255 /*
256  * Lookup a mount point by filesystem identifier.
257  */
258 struct mount *
259 vfs_getvfs(fsid)
260 	fsid_t *fsid;
261 {
262 	register struct mount *mp;
263 
264 	simple_lock(&mountlist_slock);
265 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
266 	    mp = mp->mnt_list.cqe_next) {
267 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
268 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
269 			simple_unlock(&mountlist_slock);
270 			return (mp);
271 	    }
272 	}
273 	simple_unlock(&mountlist_slock);
274 	return ((struct mount *) 0);
275 }
276 
277 /*
278  * Get a new unique fsid
279  */
280 void
281 vfs_getnewfsid(mp)
282 	struct mount *mp;
283 {
284 	static u_short xxxfs_mntid;
285 
286 	fsid_t tfsid;
287 	int mtype;
288 
289 	simple_lock(&mntid_slock);
290 	mtype = mp->mnt_vfc->vfc_typenum;
291 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
292 	mp->mnt_stat.f_fsid.val[1] = mtype;
293 	if (xxxfs_mntid == 0)
294 		++xxxfs_mntid;
295 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
296 	tfsid.val[1] = mtype;
297 	if (mountlist.cqh_first != (void *)&mountlist) {
298 		while (vfs_getvfs(&tfsid)) {
299 			tfsid.val[0]++;
300 			xxxfs_mntid++;
301 		}
302 	}
303 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
304 	simple_unlock(&mntid_slock);
305 }
306 
307 /*
308  * Set vnode attributes to VNOVAL
309  */
310 void
311 vattr_null(vap)
312 	register struct vattr *vap;
313 {
314 
315 	vap->va_type = VNON;
316 	vap->va_size = VNOVAL;
317 	vap->va_bytes = VNOVAL;
318 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
319 	    vap->va_fsid = vap->va_fileid =
320 	    vap->va_blocksize = vap->va_rdev =
321 	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
322 	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
323 	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
324 	    vap->va_flags = vap->va_gen = VNOVAL;
325 	vap->va_vaflags = 0;
326 }
327 
328 /*
329  * Routines having to do with the management of the vnode table.
330  */
331 extern vop_t **dead_vnodeop_p;
332 
333 /*
334  * Return the next vnode from the free list.
335  */
336 int
337 getnewvnode(tag, mp, vops, vpp)
338 	enum vtagtype tag;
339 	struct mount *mp;
340 	vop_t **vops;
341 	struct vnode **vpp;
342 {
343 	struct proc *p = curproc;	/* XXX */
344 	struct vnode *vp;
345 
346 	/*
347 	 * We take the least recently used vnode from the freelist
348 	 * if we can get it and it has no cached pages, and no
349 	 * namecache entries are relative to it.
350 	 * Otherwise we allocate a new vnode
351 	 */
352 
353 	simple_lock(&vnode_free_list_slock);
354 
355 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
356 		vp = NULL;
357 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
358 		/*
359 		 * XXX: this is only here to be backwards compatible
360 		 */
361 		vp = NULL;
362 	} else {
363 		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
364 			if (!simple_lock_try(&vp->v_interlock))
365 				continue;
366 			if (vp->v_usecount)
367 				panic("free vnode isn't");
368 
369 			if (vp->v_object && vp->v_object->resident_page_count) {
370 				/* Don't recycle if it's caching some pages */
371 				simple_unlock(&vp->v_interlock);
372 				continue;
373 			} else if (LIST_FIRST(&vp->v_cache_src)) {
374 				/* Don't recycle if active in the namecache */
375 				simple_unlock(&vp->v_interlock);
376 				continue;
377 			} else {
378 				break;
379 			}
380 		}
381 	}
382 
383 	if (vp) {
384 		vp->v_flag |= VDOOMED;
385 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
386 		freevnodes--;
387 		simple_unlock(&vnode_free_list_slock);
388 		cache_purge(vp);
389 		vp->v_lease = NULL;
390 		if (vp->v_type != VBAD)
391 			vgonel(vp, p);
392 		else {
393 			simple_unlock(&vp->v_interlock);
394 		}
395 
396 #ifdef DIAGNOSTIC
397 		{
398 			int s;
399 
400 			if (vp->v_data)
401 				panic("cleaned vnode isn't");
402 			s = splbio();
403 			if (vp->v_numoutput)
404 				panic("Clean vnode has pending I/O's");
405 			splx(s);
406 		}
407 #endif
408 		vp->v_flag = 0;
409 		vp->v_lastr = 0;
410 		vp->v_lastw = 0;
411 		vp->v_lasta = 0;
412 		vp->v_cstart = 0;
413 		vp->v_clen = 0;
414 		vp->v_socket = 0;
415 		vp->v_writecount = 0;	/* XXX */
416 	} else {
417 		simple_unlock(&vnode_free_list_slock);
418 		vp = (struct vnode *) malloc((u_long) sizeof *vp,
419 		    M_VNODE, M_WAITOK);
420 		bzero((char *) vp, sizeof *vp);
421 		vp->v_dd = vp;
422 		cache_purge(vp);
423 		LIST_INIT(&vp->v_cache_src);
424 		TAILQ_INIT(&vp->v_cache_dst);
425 		numvnodes++;
426 	}
427 
428 	vp->v_type = VNON;
429 	vp->v_tag = tag;
430 	vp->v_op = vops;
431 	insmntque(vp, mp);
432 	*vpp = vp;
433 	vp->v_usecount = 1;
434 	vp->v_data = 0;
435 	return (0);
436 }
437 
438 /*
439  * Move a vnode from one mount queue to another.
440  */
441 void
442 insmntque(vp, mp)
443 	register struct vnode *vp;
444 	register struct mount *mp;
445 {
446 
447 	simple_lock(&mntvnode_slock);
448 	/*
449 	 * Delete from old mount point vnode list, if on one.
450 	 */
451 	if (vp->v_mount != NULL)
452 		LIST_REMOVE(vp, v_mntvnodes);
453 	/*
454 	 * Insert into list of vnodes for the new mount point, if available.
455 	 */
456 	if ((vp->v_mount = mp) == NULL) {
457 		simple_unlock(&mntvnode_slock);
458 		return;
459 	}
460 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
461 	simple_unlock(&mntvnode_slock);
462 }
463 
464 /*
465  * Update outstanding I/O count and do wakeup if requested.
466  */
467 void
468 vwakeup(bp)
469 	register struct buf *bp;
470 {
471 	register struct vnode *vp;
472 
473 	bp->b_flags &= ~B_WRITEINPROG;
474 	if ((vp = bp->b_vp)) {
475 		vp->v_numoutput--;
476 		if (vp->v_numoutput < 0)
477 			panic("vwakeup: neg numoutput");
478 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
479 			vp->v_flag &= ~VBWAIT;
480 			wakeup((caddr_t) &vp->v_numoutput);
481 		}
482 	}
483 }
484 
485 /*
486  * Flush out and invalidate all buffers associated with a vnode.
487  * Called with the underlying object locked.
488  */
489 int
490 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
491 	register struct vnode *vp;
492 	int flags;
493 	struct ucred *cred;
494 	struct proc *p;
495 	int slpflag, slptimeo;
496 {
497 	register struct buf *bp;
498 	struct buf *nbp, *blist;
499 	int s, error;
500 	vm_object_t object;
501 
502 	if (flags & V_SAVE) {
503 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
504 			return (error);
505 		if (vp->v_dirtyblkhd.lh_first != NULL)
506 			panic("vinvalbuf: dirty bufs");
507 	}
508 
509 	s = splbio();
510 	for (;;) {
511 		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
512 			while (blist && blist->b_lblkno < 0)
513 				blist = blist->b_vnbufs.le_next;
514 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
515 		    (flags & V_SAVEMETA))
516 			while (blist && blist->b_lblkno < 0)
517 				blist = blist->b_vnbufs.le_next;
518 		if (!blist)
519 			break;
520 
521 		for (bp = blist; bp; bp = nbp) {
522 			nbp = bp->b_vnbufs.le_next;
523 			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
524 				continue;
525 			if (bp->b_flags & B_BUSY) {
526 				bp->b_flags |= B_WANTED;
527 				error = tsleep((caddr_t) bp,
528 				    slpflag | (PRIBIO + 1), "vinvalbuf",
529 				    slptimeo);
530 				if (error) {
531 					splx(s);
532 					return (error);
533 				}
534 				break;
535 			}
536 			bremfree(bp);
537 			bp->b_flags |= B_BUSY;
538 			/*
539 			 * XXX Since there are no node locks for NFS, I
540 			 * believe there is a slight chance that a delayed
541 			 * write will occur while sleeping just above, so
542 			 * check for it.
543 			 */
544 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
545 				(void) VOP_BWRITE(bp);
546 				break;
547 			}
548 			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
549 			brelse(bp);
550 		}
551 	}
552 
553 	while (vp->v_numoutput > 0) {
554 		vp->v_flag |= VBWAIT;
555 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
556 	}
557 
558 	splx(s);
559 
560 	/*
561 	 * Destroy the copy in the VM cache, too.
562 	 */
563 	object = vp->v_object;
564 	if (object != NULL) {
565 		vm_object_page_remove(object, 0, object->size,
566 		    (flags & V_SAVE) ? TRUE : FALSE);
567 	}
568 	if (!(flags & V_SAVEMETA) &&
569 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
570 		panic("vinvalbuf: flush failed");
571 	return (0);
572 }
573 
574 /*
575  * Associate a buffer with a vnode.
576  */
577 void
578 bgetvp(vp, bp)
579 	register struct vnode *vp;
580 	register struct buf *bp;
581 {
582 	int s;
583 
584 	if (bp->b_vp)
585 		panic("bgetvp: not free");
586 	vhold(vp);
587 	bp->b_vp = vp;
588 	if (vp->v_type == VBLK || vp->v_type == VCHR)
589 		bp->b_dev = vp->v_rdev;
590 	else
591 		bp->b_dev = NODEV;
592 	/*
593 	 * Insert onto list for new vnode.
594 	 */
595 	s = splbio();
596 	bufinsvn(bp, &vp->v_cleanblkhd);
597 	splx(s);
598 }
599 
600 /*
601  * Disassociate a buffer from a vnode.
602  */
603 void
604 brelvp(bp)
605 	register struct buf *bp;
606 {
607 	struct vnode *vp;
608 	int s;
609 
610 	if (bp->b_vp == (struct vnode *) 0)
611 		panic("brelvp: NULL");
612 	/*
613 	 * Delete from old vnode list, if on one.
614 	 */
615 	s = splbio();
616 	if (bp->b_vnbufs.le_next != NOLIST)
617 		bufremvn(bp);
618 	splx(s);
619 
620 	vp = bp->b_vp;
621 	bp->b_vp = (struct vnode *) 0;
622 	vdrop(vp);
623 }
624 
625 /*
626  * Associate a p-buffer with a vnode.
627  */
628 void
629 pbgetvp(vp, bp)
630 	register struct vnode *vp;
631 	register struct buf *bp;
632 {
633 #if defined(DIAGNOSTIC)
634 	if (bp->b_vp)
635 		panic("pbgetvp: not free");
636 #endif
637 	bp->b_vp = vp;
638 	if (vp->v_type == VBLK || vp->v_type == VCHR)
639 		bp->b_dev = vp->v_rdev;
640 	else
641 		bp->b_dev = NODEV;
642 }
643 
644 /*
645  * Disassociate a p-buffer from a vnode.
646  */
647 void
648 pbrelvp(bp)
649 	register struct buf *bp;
650 {
651 	struct vnode *vp;
652 
653 #if defined(DIAGNOSTIC)
654 	if (bp->b_vp == (struct vnode *) 0)
655 		panic("pbrelvp: NULL");
656 #endif
657 
658 	bp->b_vp = (struct vnode *) 0;
659 }
660 
661 /*
662  * Reassign a buffer from one vnode to another.
663  * Used to assign file specific control information
664  * (indirect blocks) to the vnode to which they belong.
665  */
666 void
667 reassignbuf(bp, newvp)
668 	register struct buf *bp;
669 	register struct vnode *newvp;
670 {
671 	int s;
672 
673 	if (newvp == NULL) {
674 		printf("reassignbuf: NULL");
675 		return;
676 	}
677 
678 	s = splbio();
679 	/*
680 	 * Delete from old vnode list, if on one.
681 	 */
682 	if (bp->b_vnbufs.le_next != NOLIST) {
683 		bufremvn(bp);
684 		vdrop(bp->b_vp);
685 	}
686 	/*
687 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
688 	 * of clean buffers.
689 	 */
690 	if (bp->b_flags & B_DELWRI) {
691 		struct buf *tbp;
692 
693 		tbp = newvp->v_dirtyblkhd.lh_first;
694 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
695 			bufinsvn(bp, &newvp->v_dirtyblkhd);
696 		} else {
697 			while (tbp->b_vnbufs.le_next &&
698 				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
699 				tbp = tbp->b_vnbufs.le_next;
700 			}
701 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
702 		}
703 	} else {
704 		bufinsvn(bp, &newvp->v_cleanblkhd);
705 	}
706 	bp->b_vp = newvp;
707 	vhold(bp->b_vp);
708 	splx(s);
709 }
710 
711 #ifndef DEVFS_ROOT
712 /*
713  * Create a vnode for a block device.
714  * Used for mounting the root file system.
715  */
716 int
717 bdevvp(dev, vpp)
718 	dev_t dev;
719 	struct vnode **vpp;
720 {
721 	register struct vnode *vp;
722 	struct vnode *nvp;
723 	int error;
724 
725 	if (dev == NODEV)
726 		return (0);
727 	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
728 	if (error) {
729 		*vpp = 0;
730 		return (error);
731 	}
732 	vp = nvp;
733 	vp->v_type = VBLK;
734 	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
735 		vput(vp);
736 		vp = nvp;
737 	}
738 	*vpp = vp;
739 	return (0);
740 }
741 #endif /* !DEVFS_ROOT */
742 
743 /*
744  * Check to see if the new vnode represents a special device
745  * for which we already have a vnode (either because of
746  * bdevvp() or because of a different vnode representing
747  * the same block device). If such an alias exists, deallocate
748  * the existing contents and return the aliased vnode. The
749  * caller is responsible for filling it with its new contents.
750  */
751 struct vnode *
752 checkalias(nvp, nvp_rdev, mp)
753 	register struct vnode *nvp;
754 	dev_t nvp_rdev;
755 	struct mount *mp;
756 {
757 	struct proc *p = curproc;	/* XXX */
758 	struct vnode *vp;
759 	struct vnode **vpp;
760 
761 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
762 		return (NULLVP);
763 
764 	vpp = &speclisth[SPECHASH(nvp_rdev)];
765 loop:
766 	simple_lock(&spechash_slock);
767 	for (vp = *vpp; vp; vp = vp->v_specnext) {
768 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
769 			continue;
770 		/*
771 		 * Alias, but not in use, so flush it out.
772 		 */
773 		simple_lock(&vp->v_interlock);
774 		if (vp->v_usecount == 0) {
775 			simple_unlock(&spechash_slock);
776 			vgonel(vp, p);
777 			goto loop;
778 		}
779 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
780 			simple_unlock(&spechash_slock);
781 			goto loop;
782 		}
783 		break;
784 	}
785 	if (vp == NULL || vp->v_tag != VT_NON) {
786 		MALLOC(nvp->v_specinfo, struct specinfo *,
787 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
788 		nvp->v_rdev = nvp_rdev;
789 		nvp->v_hashchain = vpp;
790 		nvp->v_specnext = *vpp;
791 		nvp->v_specflags = 0;
792 		simple_unlock(&spechash_slock);
793 		*vpp = nvp;
794 		if (vp != NULLVP) {
795 			nvp->v_flag |= VALIASED;
796 			vp->v_flag |= VALIASED;
797 			vput(vp);
798 		}
799 		return (NULLVP);
800 	}
801 	simple_unlock(&spechash_slock);
802 	VOP_UNLOCK(vp, 0, p);
803 	simple_lock(&vp->v_interlock);
804 	vclean(vp, 0, p);
805 	vp->v_op = nvp->v_op;
806 	vp->v_tag = nvp->v_tag;
807 	nvp->v_type = VNON;
808 	insmntque(vp, mp);
809 	return (vp);
810 }
811 
812 /*
813  * Grab a particular vnode from the free list, increment its
814  * reference count and lock it. The vnode lock bit is set the
815  * vnode is being eliminated in vgone. The process is awakened
816  * when the transition is completed, and an error returned to
817  * indicate that the vnode is no longer usable (possibly having
818  * been changed to a new file system type).
819  */
820 int
821 vget(vp, flags, p)
822 	register struct vnode *vp;
823 	int flags;
824 	struct proc *p;
825 {
826 	int error;
827 
828 	/*
829 	 * If the vnode is in the process of being cleaned out for
830 	 * another use, we wait for the cleaning to finish and then
831 	 * return failure. Cleaning is determined by checking that
832 	 * the VXLOCK flag is set.
833 	 */
834 	if ((flags & LK_INTERLOCK) == 0) {
835 		simple_lock(&vp->v_interlock);
836 	}
837 	if (vp->v_flag & VXLOCK) {
838 		vp->v_flag |= VXWANT;
839 		simple_unlock(&vp->v_interlock);
840 		tsleep((caddr_t)vp, PINOD, "vget", 0);
841 		return (ENOENT);
842 	}
843 	vp->v_usecount++;
844 	if (VSHOULDBUSY(vp))
845 		vbusy(vp);
846 	/*
847 	 * Create the VM object, if needed
848 	 */
849 	if ((vp->v_type == VREG) &&
850 		((vp->v_object == NULL) ||
851 			(vp->v_object->flags & OBJ_VFS_REF) == 0 ||
852 			(vp->v_object->flags & OBJ_DEAD))) {
853 		/*
854 		 * XXX vfs_object_create probably needs the interlock.
855 		 */
856 		simple_unlock(&vp->v_interlock);
857 		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
858 		simple_lock(&vp->v_interlock);
859 	}
860 	if (flags & LK_TYPE_MASK) {
861 		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
862 			vrele(vp);
863 		return (error);
864 	}
865 	simple_unlock(&vp->v_interlock);
866 	return (0);
867 }
868 
869 /*
870  * Stubs to use when there is no locking to be done on the underlying object.
871  * A minimal shared lock is necessary to ensure that the underlying object
872  * is not revoked while an operation is in progress. So, an active shared
873  * count is maintained in an auxillary vnode lock structure.
874  */
875 int
876 vop_sharedlock(ap)
877 	struct vop_lock_args /* {
878 		struct vnode *a_vp;
879 		int a_flags;
880 		struct proc *a_p;
881 	} */ *ap;
882 {
883 	/*
884 	 * This code cannot be used until all the non-locking filesystems
885 	 * (notably NFS) are converted to properly lock and release nodes.
886 	 * Also, certain vnode operations change the locking state within
887 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
888 	 * and symlink). Ideally these operations should not change the
889 	 * lock state, but should be changed to let the caller of the
890 	 * function unlock them. Otherwise all intermediate vnode layers
891 	 * (such as union, umapfs, etc) must catch these functions to do
892 	 * the necessary locking at their layer. Note that the inactive
893 	 * and lookup operations also change their lock state, but this
894 	 * cannot be avoided, so these two operations will always need
895 	 * to be handled in intermediate layers.
896 	 */
897 	struct vnode *vp = ap->a_vp;
898 	int vnflags, flags = ap->a_flags;
899 
900 	if (vp->v_vnlock == NULL) {
901 		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
902 			return (0);
903 		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
904 		    M_VNODE, M_WAITOK);
905 		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
906 	}
907 	switch (flags & LK_TYPE_MASK) {
908 	case LK_DRAIN:
909 		vnflags = LK_DRAIN;
910 		break;
911 	case LK_EXCLUSIVE:
912 #ifdef DEBUG_VFS_LOCKS
913 		/*
914 		 * Normally, we use shared locks here, but that confuses
915 		 * the locking assertions.
916 		 */
917 		vnflags = LK_EXCLUSIVE;
918 		break;
919 #endif
920 	case LK_SHARED:
921 		vnflags = LK_SHARED;
922 		break;
923 	case LK_UPGRADE:
924 	case LK_EXCLUPGRADE:
925 	case LK_DOWNGRADE:
926 		return (0);
927 	case LK_RELEASE:
928 	default:
929 		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
930 	}
931 	if (flags & LK_INTERLOCK)
932 		vnflags |= LK_INTERLOCK;
933 	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
934 }
935 
936 /*
937  * Stubs to use when there is no locking to be done on the underlying object.
938  * A minimal shared lock is necessary to ensure that the underlying object
939  * is not revoked while an operation is in progress. So, an active shared
940  * count is maintained in an auxillary vnode lock structure.
941  */
942 int
943 vop_nolock(ap)
944 	struct vop_lock_args /* {
945 		struct vnode *a_vp;
946 		int a_flags;
947 		struct proc *a_p;
948 	} */ *ap;
949 {
950 #ifdef notyet
951 	/*
952 	 * This code cannot be used until all the non-locking filesystems
953 	 * (notably NFS) are converted to properly lock and release nodes.
954 	 * Also, certain vnode operations change the locking state within
955 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
956 	 * and symlink). Ideally these operations should not change the
957 	 * lock state, but should be changed to let the caller of the
958 	 * function unlock them. Otherwise all intermediate vnode layers
959 	 * (such as union, umapfs, etc) must catch these functions to do
960 	 * the necessary locking at their layer. Note that the inactive
961 	 * and lookup operations also change their lock state, but this
962 	 * cannot be avoided, so these two operations will always need
963 	 * to be handled in intermediate layers.
964 	 */
965 	struct vnode *vp = ap->a_vp;
966 	int vnflags, flags = ap->a_flags;
967 
968 	if (vp->v_vnlock == NULL) {
969 		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
970 			return (0);
971 		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
972 		    M_VNODE, M_WAITOK);
973 		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
974 	}
975 	switch (flags & LK_TYPE_MASK) {
976 	case LK_DRAIN:
977 		vnflags = LK_DRAIN;
978 		break;
979 	case LK_EXCLUSIVE:
980 	case LK_SHARED:
981 		vnflags = LK_SHARED;
982 		break;
983 	case LK_UPGRADE:
984 	case LK_EXCLUPGRADE:
985 	case LK_DOWNGRADE:
986 		return (0);
987 	case LK_RELEASE:
988 	default:
989 		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
990 	}
991 	if (flags & LK_INTERLOCK)
992 		vnflags |= LK_INTERLOCK;
993 	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
994 #else /* for now */
995 	/*
996 	 * Since we are not using the lock manager, we must clear
997 	 * the interlock here.
998 	 */
999 	if (ap->a_flags & LK_INTERLOCK) {
1000 		simple_unlock(&ap->a_vp->v_interlock);
1001 	}
1002 	return (0);
1003 #endif
1004 }
1005 
1006 /*
1007  * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1008  */
1009 int
1010 vop_nounlock(ap)
1011 	struct vop_unlock_args /* {
1012 		struct vnode *a_vp;
1013 		int a_flags;
1014 		struct proc *a_p;
1015 	} */ *ap;
1016 {
1017 	struct vnode *vp = ap->a_vp;
1018 
1019 	if (vp->v_vnlock == NULL) {
1020 		if (ap->a_flags & LK_INTERLOCK)
1021 			simple_unlock(&ap->a_vp->v_interlock);
1022 		return (0);
1023 	}
1024 	return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1025 		&ap->a_vp->v_interlock, ap->a_p));
1026 }
1027 
1028 /*
1029  * Return whether or not the node is in use.
1030  */
1031 int
1032 vop_noislocked(ap)
1033 	struct vop_islocked_args /* {
1034 		struct vnode *a_vp;
1035 	} */ *ap;
1036 {
1037 	struct vnode *vp = ap->a_vp;
1038 
1039 	if (vp->v_vnlock == NULL)
1040 		return (0);
1041 	return (lockstatus(vp->v_vnlock));
1042 }
1043 
1044 /* #ifdef DIAGNOSTIC */
1045 /*
1046  * Vnode reference, just increment the count
1047  */
1048 void
1049 vref(vp)
1050 	struct vnode *vp;
1051 {
1052 	simple_lock(&vp->v_interlock);
1053 	if (vp->v_usecount <= 0)
1054 		panic("vref used where vget required");
1055 
1056 	vp->v_usecount++;
1057 
1058 	if ((vp->v_type == VREG) &&
1059 		((vp->v_object == NULL) ||
1060 			((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1061 			(vp->v_object->flags & OBJ_DEAD))) {
1062 		/*
1063 		 * We need to lock to VP during the time that
1064 		 * the object is created.  This is necessary to
1065 		 * keep the system from re-entrantly doing it
1066 		 * multiple times.
1067 		 * XXX vfs_object_create probably needs the interlock?
1068 		 */
1069 		simple_unlock(&vp->v_interlock);
1070 		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1071 		return;
1072 	}
1073 	simple_unlock(&vp->v_interlock);
1074 }
1075 
1076 /*
1077  * Vnode put/release.
1078  * If count drops to zero, call inactive routine and return to freelist.
1079  */
1080 static void
1081 vputrele(vp, put)
1082 	struct vnode *vp;
1083 	int put;
1084 {
1085 	struct proc *p = curproc;	/* XXX */
1086 
1087 #ifdef DIAGNOSTIC
1088 	if (vp == NULL)
1089 		panic("vputrele: null vp");
1090 #endif
1091 	simple_lock(&vp->v_interlock);
1092 
1093 	if ((vp->v_usecount == 2) &&
1094 		vp->v_object &&
1095 		(vp->v_object->flags & OBJ_VFS_REF)) {
1096 		vp->v_usecount--;
1097 		vp->v_object->flags &= ~OBJ_VFS_REF;
1098 		if (put) {
1099 			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1100 		} else {
1101 			simple_unlock(&vp->v_interlock);
1102 		}
1103 		vm_object_deallocate(vp->v_object);
1104 		return;
1105 	}
1106 
1107 	if (vp->v_usecount > 1) {
1108 		vp->v_usecount--;
1109 		if (put) {
1110 			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1111 		} else {
1112 			simple_unlock(&vp->v_interlock);
1113 		}
1114 		return;
1115 	}
1116 
1117 	if (vp->v_usecount < 1) {
1118 #ifdef DIAGNOSTIC
1119 		vprint("vputrele: negative ref count", vp);
1120 #endif
1121 		panic("vputrele: negative ref cnt");
1122 	}
1123 
1124 	vp->v_usecount--;
1125 	if (VSHOULDFREE(vp))
1126 		vfree(vp);
1127 	/*
1128 	 * If we are doing a vput, the node is already locked, and we must
1129 	 * call VOP_INACTIVE with the node locked.  So, in the case of
1130 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1131 	 */
1132 	if (put) {
1133 		simple_unlock(&vp->v_interlock);
1134 		VOP_INACTIVE(vp, p);
1135 	} else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1136 		VOP_INACTIVE(vp, p);
1137 	}
1138 }
1139 
1140 /*
1141  * vput(), just unlock and vrele()
1142  */
1143 void
1144 vput(vp)
1145 	struct vnode *vp;
1146 {
1147 	vputrele(vp, 1);
1148 }
1149 
1150 void
1151 vrele(vp)
1152 	struct vnode *vp;
1153 {
1154 	vputrele(vp, 0);
1155 }
1156 
1157 /*
1158  * Somebody doesn't want the vnode recycled.
1159  */
1160 void
1161 vhold(vp)
1162 	register struct vnode *vp;
1163 {
1164 
1165 	simple_lock(&vp->v_interlock);
1166 	vp->v_holdcnt++;
1167 	if (VSHOULDBUSY(vp))
1168 		vbusy(vp);
1169 	simple_unlock(&vp->v_interlock);
1170 }
1171 
1172 /*
1173  * One less who cares about this vnode.
1174  */
1175 void
1176 vdrop(vp)
1177 	register struct vnode *vp;
1178 {
1179 
1180 	simple_lock(&vp->v_interlock);
1181 	if (vp->v_holdcnt <= 0)
1182 		panic("holdrele: holdcnt");
1183 	vp->v_holdcnt--;
1184 	if (VSHOULDFREE(vp))
1185 		vfree(vp);
1186 	simple_unlock(&vp->v_interlock);
1187 }
1188 
1189 /*
1190  * Remove any vnodes in the vnode table belonging to mount point mp.
1191  *
1192  * If MNT_NOFORCE is specified, there should not be any active ones,
1193  * return error if any are found (nb: this is a user error, not a
1194  * system error). If MNT_FORCE is specified, detach any active vnodes
1195  * that are found.
1196  */
1197 #ifdef DIAGNOSTIC
1198 static int busyprt = 0;		/* print out busy vnodes */
1199 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1200 #endif
1201 
1202 int
1203 vflush(mp, skipvp, flags)
1204 	struct mount *mp;
1205 	struct vnode *skipvp;
1206 	int flags;
1207 {
1208 	struct proc *p = curproc;	/* XXX */
1209 	struct vnode *vp, *nvp;
1210 	int busy = 0;
1211 
1212 	simple_lock(&mntvnode_slock);
1213 loop:
1214 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1215 		/*
1216 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1217 		 * Start over if it has (it won't be on the list anymore).
1218 		 */
1219 		if (vp->v_mount != mp)
1220 			goto loop;
1221 		nvp = vp->v_mntvnodes.le_next;
1222 		/*
1223 		 * Skip over a selected vnode.
1224 		 */
1225 		if (vp == skipvp)
1226 			continue;
1227 
1228 		simple_lock(&vp->v_interlock);
1229 		/*
1230 		 * Skip over a vnodes marked VSYSTEM.
1231 		 */
1232 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1233 			simple_unlock(&vp->v_interlock);
1234 			continue;
1235 		}
1236 		/*
1237 		 * If WRITECLOSE is set, only flush out regular file vnodes
1238 		 * open for writing.
1239 		 */
1240 		if ((flags & WRITECLOSE) &&
1241 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1242 			simple_unlock(&vp->v_interlock);
1243 			continue;
1244 		}
1245 
1246 		/*
1247 		 * With v_usecount == 0, all we need to do is clear out the
1248 		 * vnode data structures and we are done.
1249 		 */
1250 		if (vp->v_usecount == 0) {
1251 			simple_unlock(&mntvnode_slock);
1252 			vgonel(vp, p);
1253 			simple_lock(&mntvnode_slock);
1254 			continue;
1255 		}
1256 
1257 		/*
1258 		 * If FORCECLOSE is set, forcibly close the vnode. For block
1259 		 * or character devices, revert to an anonymous device. For
1260 		 * all other files, just kill them.
1261 		 */
1262 		if (flags & FORCECLOSE) {
1263 			simple_unlock(&mntvnode_slock);
1264 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1265 				vgonel(vp, p);
1266 			} else {
1267 				vclean(vp, 0, p);
1268 				vp->v_op = spec_vnodeop_p;
1269 				insmntque(vp, (struct mount *) 0);
1270 			}
1271 			simple_lock(&mntvnode_slock);
1272 			continue;
1273 		}
1274 #ifdef DIAGNOSTIC
1275 		if (busyprt)
1276 			vprint("vflush: busy vnode", vp);
1277 #endif
1278 		simple_unlock(&vp->v_interlock);
1279 		busy++;
1280 	}
1281 	simple_unlock(&mntvnode_slock);
1282 	if (busy)
1283 		return (EBUSY);
1284 	return (0);
1285 }
1286 
1287 /*
1288  * Disassociate the underlying file system from a vnode.
1289  */
1290 static void
1291 vclean(vp, flags, p)
1292 	struct vnode *vp;
1293 	int flags;
1294 	struct proc *p;
1295 {
1296 	int active, irefed;
1297 	vm_object_t object;
1298 
1299 	/*
1300 	 * Check to see if the vnode is in use. If so we have to reference it
1301 	 * before we clean it out so that its count cannot fall to zero and
1302 	 * generate a race against ourselves to recycle it.
1303 	 */
1304 	if ((active = vp->v_usecount))
1305 		vp->v_usecount++;
1306 	/*
1307 	 * Prevent the vnode from being recycled or brought into use while we
1308 	 * clean it out.
1309 	 */
1310 	if (vp->v_flag & VXLOCK)
1311 		panic("vclean: deadlock");
1312 	vp->v_flag |= VXLOCK;
1313 	/*
1314 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1315 	 * have the object locked while it cleans it out. The VOP_LOCK
1316 	 * ensures that the VOP_INACTIVE routine is done with its work.
1317 	 * For active vnodes, it ensures that no other activity can
1318 	 * occur while the underlying object is being cleaned out.
1319 	 */
1320 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1321 
1322 	object = vp->v_object;
1323 	irefed = 0;
1324 	if (object && ((object->flags & OBJ_DEAD) == 0)) {
1325 		if (object->ref_count == 0) {
1326 			vm_object_reference(object);
1327 			irefed = 1;
1328 		}
1329 		++object->ref_count;
1330 		pager_cache(object, FALSE);
1331 	}
1332 
1333 	/*
1334 	 * Clean out any buffers associated with the vnode.
1335 	 */
1336 	if (flags & DOCLOSE)
1337 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1338 
1339 	if (irefed) {
1340 		vm_object_deallocate(object);
1341 	}
1342 
1343 	/*
1344 	 * If purging an active vnode, it must be closed and
1345 	 * deactivated before being reclaimed. Note that the
1346 	 * VOP_INACTIVE will unlock the vnode.
1347 	 */
1348 	if (active) {
1349 		if (flags & DOCLOSE)
1350 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1351 		VOP_INACTIVE(vp, p);
1352 	} else {
1353 		/*
1354 		 * Any other processes trying to obtain this lock must first
1355 		 * wait for VXLOCK to clear, then call the new lock operation.
1356 		 */
1357 		VOP_UNLOCK(vp, 0, p);
1358 	}
1359 	/*
1360 	 * Reclaim the vnode.
1361 	 */
1362 	if (VOP_RECLAIM(vp, p))
1363 		panic("vclean: cannot reclaim");
1364 	if (active)
1365 		vrele(vp);
1366 	cache_purge(vp);
1367 	if (vp->v_vnlock) {
1368 #ifdef DIAGNOSTIC
1369 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1370 			vprint("vclean: lock not drained", vp);
1371 #endif
1372 		FREE(vp->v_vnlock, M_VNODE);
1373 		vp->v_vnlock = NULL;
1374 	}
1375 
1376 	/*
1377 	 * Done with purge, notify sleepers of the grim news.
1378 	 */
1379 	vp->v_op = dead_vnodeop_p;
1380 	vp->v_tag = VT_NON;
1381 	vp->v_flag &= ~VXLOCK;
1382 	if (vp->v_flag & VXWANT) {
1383 		vp->v_flag &= ~VXWANT;
1384 		wakeup((caddr_t) vp);
1385 	}
1386 }
1387 
1388 /*
1389  * Eliminate all activity associated with the requested vnode
1390  * and with all vnodes aliased to the requested vnode.
1391  */
1392 int
1393 vop_revoke(ap)
1394 	struct vop_revoke_args /* {
1395 		struct vnode *a_vp;
1396 		int a_flags;
1397 	} */ *ap;
1398 {
1399 	struct vnode *vp, *vq;
1400 	struct proc *p = curproc;	/* XXX */
1401 
1402 #ifdef DIAGNOSTIC
1403 	if ((ap->a_flags & REVOKEALL) == 0)
1404 		panic("vop_revoke");
1405 #endif
1406 
1407 	vp = ap->a_vp;
1408 	simple_lock(&vp->v_interlock);
1409 
1410 	if (vp->v_flag & VALIASED) {
1411 		/*
1412 		 * If a vgone (or vclean) is already in progress,
1413 		 * wait until it is done and return.
1414 		 */
1415 		if (vp->v_flag & VXLOCK) {
1416 			vp->v_flag |= VXWANT;
1417 			simple_unlock(&vp->v_interlock);
1418 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1419 			return (0);
1420 		}
1421 		/*
1422 		 * Ensure that vp will not be vgone'd while we
1423 		 * are eliminating its aliases.
1424 		 */
1425 		vp->v_flag |= VXLOCK;
1426 		simple_unlock(&vp->v_interlock);
1427 		while (vp->v_flag & VALIASED) {
1428 			simple_lock(&spechash_slock);
1429 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1430 				if (vq->v_rdev != vp->v_rdev ||
1431 				    vq->v_type != vp->v_type || vp == vq)
1432 					continue;
1433 				simple_unlock(&spechash_slock);
1434 				vgone(vq);
1435 				break;
1436 			}
1437 			if (vq == NULLVP) {
1438 				simple_unlock(&spechash_slock);
1439 			}
1440 		}
1441 		/*
1442 		 * Remove the lock so that vgone below will
1443 		 * really eliminate the vnode after which time
1444 		 * vgone will awaken any sleepers.
1445 		 */
1446 		simple_lock(&vp->v_interlock);
1447 		vp->v_flag &= ~VXLOCK;
1448 	}
1449 	vgonel(vp, p);
1450 	return (0);
1451 }
1452 
1453 /*
1454  * Recycle an unused vnode to the front of the free list.
1455  * Release the passed interlock if the vnode will be recycled.
1456  */
1457 int
1458 vrecycle(vp, inter_lkp, p)
1459 	struct vnode *vp;
1460 	struct simplelock *inter_lkp;
1461 	struct proc *p;
1462 {
1463 
1464 	simple_lock(&vp->v_interlock);
1465 	if (vp->v_usecount == 0) {
1466 		if (inter_lkp) {
1467 			simple_unlock(inter_lkp);
1468 		}
1469 		vgonel(vp, p);
1470 		return (1);
1471 	}
1472 	simple_unlock(&vp->v_interlock);
1473 	return (0);
1474 }
1475 
1476 /*
1477  * Eliminate all activity associated with a vnode
1478  * in preparation for reuse.
1479  */
1480 void
1481 vgone(vp)
1482 	register struct vnode *vp;
1483 {
1484 	struct proc *p = curproc;	/* XXX */
1485 
1486 	simple_lock(&vp->v_interlock);
1487 	vgonel(vp, p);
1488 }
1489 
1490 /*
1491  * vgone, with the vp interlock held.
1492  */
1493 static void
1494 vgonel(vp, p)
1495 	struct vnode *vp;
1496 	struct proc *p;
1497 {
1498 	struct vnode *vq;
1499 	struct vnode *vx;
1500 
1501 	/*
1502 	 * If a vgone (or vclean) is already in progress,
1503 	 * wait until it is done and return.
1504 	 */
1505 	if (vp->v_flag & VXLOCK) {
1506 		vp->v_flag |= VXWANT;
1507 		simple_unlock(&vp->v_interlock);
1508 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1509 		return;
1510 	}
1511 
1512 	if (vp->v_object) {
1513 		vp->v_object->flags |= OBJ_VNODE_GONE;
1514 	}
1515 
1516 	/*
1517 	 * Clean out the filesystem specific data.
1518 	 */
1519 	vclean(vp, DOCLOSE, p);
1520 	/*
1521 	 * Delete from old mount point vnode list, if on one.
1522 	 */
1523 	if (vp->v_mount != NULL)
1524 		insmntque(vp, (struct mount *)0);
1525 	/*
1526 	 * If special device, remove it from special device alias list
1527 	 * if it is on one.
1528 	 */
1529 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1530 		simple_lock(&spechash_slock);
1531 		if (*vp->v_hashchain == vp) {
1532 			*vp->v_hashchain = vp->v_specnext;
1533 		} else {
1534 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1535 				if (vq->v_specnext != vp)
1536 					continue;
1537 				vq->v_specnext = vp->v_specnext;
1538 				break;
1539 			}
1540 			if (vq == NULL)
1541 				panic("missing bdev");
1542 		}
1543 		if (vp->v_flag & VALIASED) {
1544 			vx = NULL;
1545 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1546 				if (vq->v_rdev != vp->v_rdev ||
1547 				    vq->v_type != vp->v_type)
1548 					continue;
1549 				if (vx)
1550 					break;
1551 				vx = vq;
1552 			}
1553 			if (vx == NULL)
1554 				panic("missing alias");
1555 			if (vq == NULL)
1556 				vx->v_flag &= ~VALIASED;
1557 			vp->v_flag &= ~VALIASED;
1558 		}
1559 		simple_unlock(&spechash_slock);
1560 		FREE(vp->v_specinfo, M_VNODE);
1561 		vp->v_specinfo = NULL;
1562 	}
1563 
1564 	/*
1565 	 * If it is on the freelist and not already at the head,
1566 	 * move it to the head of the list. The test of the back
1567 	 * pointer and the reference count of zero is because
1568 	 * it will be removed from the free list by getnewvnode,
1569 	 * but will not have its reference count incremented until
1570 	 * after calling vgone. If the reference count were
1571 	 * incremented first, vgone would (incorrectly) try to
1572 	 * close the previous instance of the underlying object.
1573 	 */
1574 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1575 		simple_lock(&vnode_free_list_slock);
1576 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1577 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1578 		simple_unlock(&vnode_free_list_slock);
1579 	}
1580 
1581 	vp->v_type = VBAD;
1582 }
1583 
1584 /*
1585  * Lookup a vnode by device number.
1586  */
1587 int
1588 vfinddev(dev, type, vpp)
1589 	dev_t dev;
1590 	enum vtype type;
1591 	struct vnode **vpp;
1592 {
1593 	register struct vnode *vp;
1594 	int rc = 0;
1595 
1596 	simple_lock(&spechash_slock);
1597 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1598 		if (dev != vp->v_rdev || type != vp->v_type)
1599 			continue;
1600 		*vpp = vp;
1601 		rc = 1;
1602 		break;
1603 	}
1604 	simple_unlock(&spechash_slock);
1605 	return (rc);
1606 }
1607 
1608 /*
1609  * Calculate the total number of references to a special device.
1610  */
1611 int
1612 vcount(vp)
1613 	register struct vnode *vp;
1614 {
1615 	struct vnode *vq, *vnext;
1616 	int count;
1617 
1618 loop:
1619 	if ((vp->v_flag & VALIASED) == 0)
1620 		return (vp->v_usecount);
1621 	simple_lock(&spechash_slock);
1622 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1623 		vnext = vq->v_specnext;
1624 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1625 			continue;
1626 		/*
1627 		 * Alias, but not in use, so flush it out.
1628 		 */
1629 		if (vq->v_usecount == 0 && vq != vp) {
1630 			simple_unlock(&spechash_slock);
1631 			vgone(vq);
1632 			goto loop;
1633 		}
1634 		count += vq->v_usecount;
1635 	}
1636 	simple_unlock(&spechash_slock);
1637 	return (count);
1638 }
1639 
1640 /*
1641  * Return true for select/poll.
1642  */
1643 int
1644 vop_nopoll(ap)
1645 	struct vop_poll_args /* {
1646 		struct vnode *a_vp;
1647 		int  a_events;
1648 		struct ucred *a_cred;
1649 		struct proc *a_p;
1650 	} */ *ap;
1651 {
1652 
1653 	/*
1654 	 * Just return what we were asked for.
1655 	 */
1656 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1657 }
1658 
1659 /*
1660  * Print out a description of a vnode.
1661  */
1662 static char *typename[] =
1663 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1664 
1665 void
1666 vprint(label, vp)
1667 	char *label;
1668 	register struct vnode *vp;
1669 {
1670 	char buf[64];
1671 
1672 	if (label != NULL)
1673 		printf("%s: %x: ", label, vp);
1674 	else
1675 		printf("%x: ", vp);
1676 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1677 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1678 	    vp->v_holdcnt);
1679 	buf[0] = '\0';
1680 	if (vp->v_flag & VROOT)
1681 		strcat(buf, "|VROOT");
1682 	if (vp->v_flag & VTEXT)
1683 		strcat(buf, "|VTEXT");
1684 	if (vp->v_flag & VSYSTEM)
1685 		strcat(buf, "|VSYSTEM");
1686 	if (vp->v_flag & VXLOCK)
1687 		strcat(buf, "|VXLOCK");
1688 	if (vp->v_flag & VXWANT)
1689 		strcat(buf, "|VXWANT");
1690 	if (vp->v_flag & VBWAIT)
1691 		strcat(buf, "|VBWAIT");
1692 	if (vp->v_flag & VALIASED)
1693 		strcat(buf, "|VALIASED");
1694 	if (vp->v_flag & VDOOMED)
1695 		strcat(buf, "|VDOOMED");
1696 	if (vp->v_flag & VFREE)
1697 		strcat(buf, "|VFREE");
1698 	if (buf[0] != '\0')
1699 		printf(" flags (%s)", &buf[1]);
1700 	if (vp->v_data == NULL) {
1701 		printf("\n");
1702 	} else {
1703 		printf("\n\t");
1704 		VOP_PRINT(vp);
1705 	}
1706 }
1707 
1708 #ifdef DDB
1709 /*
1710  * List all of the locked vnodes in the system.
1711  * Called when debugging the kernel.
1712  */
1713 void
1714 printlockedvnodes()
1715 {
1716 	struct proc *p = curproc;	/* XXX */
1717 	struct mount *mp, *nmp;
1718 	struct vnode *vp;
1719 
1720 	printf("Locked vnodes\n");
1721 	simple_lock(&mountlist_slock);
1722 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1723 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1724 			nmp = mp->mnt_list.cqe_next;
1725 			continue;
1726 		}
1727 		for (vp = mp->mnt_vnodelist.lh_first;
1728 		     vp != NULL;
1729 		     vp = vp->v_mntvnodes.le_next) {
1730 			if (VOP_ISLOCKED(vp))
1731 				vprint((char *)0, vp);
1732 		}
1733 		simple_lock(&mountlist_slock);
1734 		nmp = mp->mnt_list.cqe_next;
1735 		vfs_unbusy(mp, p);
1736 	}
1737 	simple_unlock(&mountlist_slock);
1738 }
1739 #endif
1740 
1741 /*
1742  * Top level filesystem related information gathering.
1743  */
1744 static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1745 
1746 static int
1747 vfs_sysctl SYSCTL_HANDLER_ARGS
1748 {
1749 	int *name = (int *)arg1 - 1;	/* XXX */
1750 	u_int namelen = arg2 + 1;	/* XXX */
1751 	struct vfsconf *vfsp;
1752 
1753 #ifndef NO_COMPAT_PRELITE2
1754 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1755 	if (namelen == 1)
1756 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1757 #endif
1758 
1759 #ifdef notyet
1760 	/* all sysctl names at this level are at least name and field */
1761 	if (namelen < 2)
1762 		return (ENOTDIR);		/* overloaded */
1763 	if (name[0] != VFS_GENERIC) {
1764 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1765 			if (vfsp->vfc_typenum == name[0])
1766 				break;
1767 		if (vfsp == NULL)
1768 			return (EOPNOTSUPP);
1769 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1770 		    oldp, oldlenp, newp, newlen, p));
1771 	}
1772 #endif
1773 	switch (name[1]) {
1774 	case VFS_MAXTYPENUM:
1775 		if (namelen != 2)
1776 			return (ENOTDIR);
1777 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1778 	case VFS_CONF:
1779 		if (namelen != 3)
1780 			return (ENOTDIR);	/* overloaded */
1781 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1782 			if (vfsp->vfc_typenum == name[2])
1783 				break;
1784 		if (vfsp == NULL)
1785 			return (EOPNOTSUPP);
1786 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1787 	}
1788 	return (EOPNOTSUPP);
1789 }
1790 
1791 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1792 	"Generic filesystem");
1793 
1794 #ifndef NO_COMPAT_PRELITE2
1795 
1796 static int
1797 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1798 {
1799 	int error;
1800 	struct vfsconf *vfsp;
1801 	struct ovfsconf ovfs;
1802 
1803 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1804 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1805 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1806 		ovfs.vfc_index = vfsp->vfc_typenum;
1807 		ovfs.vfc_refcount = vfsp->vfc_refcount;
1808 		ovfs.vfc_flags = vfsp->vfc_flags;
1809 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1810 		if (error)
1811 			return error;
1812 	}
1813 	return 0;
1814 }
1815 
1816 #endif /* !NO_COMPAT_PRELITE2 */
1817 
1818 int kinfo_vdebug = 1;
1819 int kinfo_vgetfailed;
1820 
1821 #define KINFO_VNODESLOP	10
1822 /*
1823  * Dump vnode list (via sysctl).
1824  * Copyout address of vnode followed by vnode.
1825  */
1826 /* ARGSUSED */
1827 static int
1828 sysctl_vnode SYSCTL_HANDLER_ARGS
1829 {
1830 	struct proc *p = curproc;	/* XXX */
1831 	struct mount *mp, *nmp;
1832 	struct vnode *nvp, *vp;
1833 	int error;
1834 
1835 #define VPTRSZ	sizeof (struct vnode *)
1836 #define VNODESZ	sizeof (struct vnode)
1837 
1838 	req->lock = 0;
1839 	if (!req->oldptr) /* Make an estimate */
1840 		return (SYSCTL_OUT(req, 0,
1841 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1842 
1843 	simple_lock(&mountlist_slock);
1844 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1845 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1846 			nmp = mp->mnt_list.cqe_next;
1847 			continue;
1848 		}
1849 again:
1850 		simple_lock(&mntvnode_slock);
1851 		for (vp = mp->mnt_vnodelist.lh_first;
1852 		     vp != NULL;
1853 		     vp = nvp) {
1854 			/*
1855 			 * Check that the vp is still associated with
1856 			 * this filesystem.  RACE: could have been
1857 			 * recycled onto the same filesystem.
1858 			 */
1859 			if (vp->v_mount != mp) {
1860 				simple_unlock(&mntvnode_slock);
1861 				if (kinfo_vdebug)
1862 					printf("kinfo: vp changed\n");
1863 				goto again;
1864 			}
1865 			nvp = vp->v_mntvnodes.le_next;
1866 			simple_unlock(&mntvnode_slock);
1867 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1868 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1869 				return (error);
1870 			simple_lock(&mntvnode_slock);
1871 		}
1872 		simple_unlock(&mntvnode_slock);
1873 		simple_lock(&mountlist_slock);
1874 		nmp = mp->mnt_list.cqe_next;
1875 		vfs_unbusy(mp, p);
1876 	}
1877 	simple_unlock(&mountlist_slock);
1878 
1879 	return (0);
1880 }
1881 
1882 /*
1883  * XXX
1884  * Exporting the vnode list on large systems causes them to crash.
1885  * Exporting the vnode list on medium systems causes sysctl to coredump.
1886  */
1887 #if 0
1888 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1889 	0, 0, sysctl_vnode, "S,vnode", "");
1890 #endif
1891 
1892 /*
1893  * Check to see if a filesystem is mounted on a block device.
1894  */
1895 int
1896 vfs_mountedon(vp)
1897 	struct vnode *vp;
1898 {
1899 	struct vnode *vq;
1900 	int error = 0;
1901 
1902 	if (vp->v_specflags & SI_MOUNTEDON)
1903 		return (EBUSY);
1904 	if (vp->v_flag & VALIASED) {
1905 		simple_lock(&spechash_slock);
1906 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1907 			if (vq->v_rdev != vp->v_rdev ||
1908 			    vq->v_type != vp->v_type)
1909 				continue;
1910 			if (vq->v_specflags & SI_MOUNTEDON) {
1911 				error = EBUSY;
1912 				break;
1913 			}
1914 		}
1915 		simple_unlock(&spechash_slock);
1916 	}
1917 	return (error);
1918 }
1919 
1920 /*
1921  * Unmount all filesystems. The list is traversed in reverse order
1922  * of mounting to avoid dependencies.
1923  */
1924 void
1925 vfs_unmountall()
1926 {
1927 	struct mount *mp, *nmp;
1928 	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1929 	int error;
1930 
1931 	/*
1932 	 * Since this only runs when rebooting, it is not interlocked.
1933 	 */
1934 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1935 		nmp = mp->mnt_list.cqe_prev;
1936 		error = dounmount(mp, MNT_FORCE, p);
1937 		if (error) {
1938 			printf("unmount of %s failed (",
1939 			    mp->mnt_stat.f_mntonname);
1940 			if (error == EBUSY)
1941 				printf("BUSY)\n");
1942 			else
1943 				printf("%d)\n", error);
1944 		}
1945 	}
1946 }
1947 
1948 /*
1949  * Build hash lists of net addresses and hang them off the mount point.
1950  * Called by ufs_mount() to set up the lists of export addresses.
1951  */
1952 static int
1953 vfs_hang_addrlist(mp, nep, argp)
1954 	struct mount *mp;
1955 	struct netexport *nep;
1956 	struct export_args *argp;
1957 {
1958 	register struct netcred *np;
1959 	register struct radix_node_head *rnh;
1960 	register int i;
1961 	struct radix_node *rn;
1962 	struct sockaddr *saddr, *smask = 0;
1963 	struct domain *dom;
1964 	int error;
1965 
1966 	if (argp->ex_addrlen == 0) {
1967 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1968 			return (EPERM);
1969 		np = &nep->ne_defexported;
1970 		np->netc_exflags = argp->ex_flags;
1971 		np->netc_anon = argp->ex_anon;
1972 		np->netc_anon.cr_ref = 1;
1973 		mp->mnt_flag |= MNT_DEFEXPORTED;
1974 		return (0);
1975 	}
1976 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1977 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1978 	bzero((caddr_t) np, i);
1979 	saddr = (struct sockaddr *) (np + 1);
1980 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1981 		goto out;
1982 	if (saddr->sa_len > argp->ex_addrlen)
1983 		saddr->sa_len = argp->ex_addrlen;
1984 	if (argp->ex_masklen) {
1985 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1986 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1987 		if (error)
1988 			goto out;
1989 		if (smask->sa_len > argp->ex_masklen)
1990 			smask->sa_len = argp->ex_masklen;
1991 	}
1992 	i = saddr->sa_family;
1993 	if ((rnh = nep->ne_rtable[i]) == 0) {
1994 		/*
1995 		 * Seems silly to initialize every AF when most are not used,
1996 		 * do so on demand here
1997 		 */
1998 		for (dom = domains; dom; dom = dom->dom_next)
1999 			if (dom->dom_family == i && dom->dom_rtattach) {
2000 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
2001 				    dom->dom_rtoffset);
2002 				break;
2003 			}
2004 		if ((rnh = nep->ne_rtable[i]) == 0) {
2005 			error = ENOBUFS;
2006 			goto out;
2007 		}
2008 	}
2009 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
2010 	    np->netc_rnodes);
2011 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2012 		error = EPERM;
2013 		goto out;
2014 	}
2015 	np->netc_exflags = argp->ex_flags;
2016 	np->netc_anon = argp->ex_anon;
2017 	np->netc_anon.cr_ref = 1;
2018 	return (0);
2019 out:
2020 	free(np, M_NETADDR);
2021 	return (error);
2022 }
2023 
2024 /* ARGSUSED */
2025 static int
2026 vfs_free_netcred(rn, w)
2027 	struct radix_node *rn;
2028 	void *w;
2029 {
2030 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2031 
2032 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2033 	free((caddr_t) rn, M_NETADDR);
2034 	return (0);
2035 }
2036 
2037 /*
2038  * Free the net address hash lists that are hanging off the mount points.
2039  */
2040 static void
2041 vfs_free_addrlist(nep)
2042 	struct netexport *nep;
2043 {
2044 	register int i;
2045 	register struct radix_node_head *rnh;
2046 
2047 	for (i = 0; i <= AF_MAX; i++)
2048 		if ((rnh = nep->ne_rtable[i])) {
2049 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2050 			    (caddr_t) rnh);
2051 			free((caddr_t) rnh, M_RTABLE);
2052 			nep->ne_rtable[i] = 0;
2053 		}
2054 }
2055 
2056 int
2057 vfs_export(mp, nep, argp)
2058 	struct mount *mp;
2059 	struct netexport *nep;
2060 	struct export_args *argp;
2061 {
2062 	int error;
2063 
2064 	if (argp->ex_flags & MNT_DELEXPORT) {
2065 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2066 			vfs_setpublicfs(NULL, NULL, NULL);
2067 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2068 		}
2069 		vfs_free_addrlist(nep);
2070 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2071 	}
2072 	if (argp->ex_flags & MNT_EXPORTED) {
2073 		if (argp->ex_flags & MNT_EXPUBLIC) {
2074 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2075 				return (error);
2076 			mp->mnt_flag |= MNT_EXPUBLIC;
2077 		}
2078 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2079 			return (error);
2080 		mp->mnt_flag |= MNT_EXPORTED;
2081 	}
2082 	return (0);
2083 }
2084 
2085 
2086 /*
2087  * Set the publicly exported filesystem (WebNFS). Currently, only
2088  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2089  */
2090 int
2091 vfs_setpublicfs(mp, nep, argp)
2092 	struct mount *mp;
2093 	struct netexport *nep;
2094 	struct export_args *argp;
2095 {
2096 	int error;
2097 	struct vnode *rvp;
2098 	char *cp;
2099 
2100 	/*
2101 	 * mp == NULL -> invalidate the current info, the FS is
2102 	 * no longer exported. May be called from either vfs_export
2103 	 * or unmount, so check if it hasn't already been done.
2104 	 */
2105 	if (mp == NULL) {
2106 		if (nfs_pub.np_valid) {
2107 			nfs_pub.np_valid = 0;
2108 			if (nfs_pub.np_index != NULL) {
2109 				FREE(nfs_pub.np_index, M_TEMP);
2110 				nfs_pub.np_index = NULL;
2111 			}
2112 		}
2113 		return (0);
2114 	}
2115 
2116 	/*
2117 	 * Only one allowed at a time.
2118 	 */
2119 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2120 		return (EBUSY);
2121 
2122 	/*
2123 	 * Get real filehandle for root of exported FS.
2124 	 */
2125 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2126 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2127 
2128 	if ((error = VFS_ROOT(mp, &rvp)))
2129 		return (error);
2130 
2131 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2132 		return (error);
2133 
2134 	vput(rvp);
2135 
2136 	/*
2137 	 * If an indexfile was specified, pull it in.
2138 	 */
2139 	if (argp->ex_indexfile != NULL) {
2140 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2141 		    M_WAITOK);
2142 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2143 		    MAXNAMLEN, (size_t *)0);
2144 		if (!error) {
2145 			/*
2146 			 * Check for illegal filenames.
2147 			 */
2148 			for (cp = nfs_pub.np_index; *cp; cp++) {
2149 				if (*cp == '/') {
2150 					error = EINVAL;
2151 					break;
2152 				}
2153 			}
2154 		}
2155 		if (error) {
2156 			FREE(nfs_pub.np_index, M_TEMP);
2157 			return (error);
2158 		}
2159 	}
2160 
2161 	nfs_pub.np_mount = mp;
2162 	nfs_pub.np_valid = 1;
2163 	return (0);
2164 }
2165 
2166 struct netcred *
2167 vfs_export_lookup(mp, nep, nam)
2168 	register struct mount *mp;
2169 	struct netexport *nep;
2170 	struct sockaddr *nam;
2171 {
2172 	register struct netcred *np;
2173 	register struct radix_node_head *rnh;
2174 	struct sockaddr *saddr;
2175 
2176 	np = NULL;
2177 	if (mp->mnt_flag & MNT_EXPORTED) {
2178 		/*
2179 		 * Lookup in the export list first.
2180 		 */
2181 		if (nam != NULL) {
2182 			saddr = nam;
2183 			rnh = nep->ne_rtable[saddr->sa_family];
2184 			if (rnh != NULL) {
2185 				np = (struct netcred *)
2186 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2187 							      rnh);
2188 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2189 					np = NULL;
2190 			}
2191 		}
2192 		/*
2193 		 * If no address match, use the default if it exists.
2194 		 */
2195 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2196 			np = &nep->ne_defexported;
2197 	}
2198 	return (np);
2199 }
2200 
2201 /*
2202  * perform msync on all vnodes under a mount point
2203  * the mount point must be locked.
2204  */
2205 void
2206 vfs_msync(struct mount *mp, int flags) {
2207 	struct vnode *vp, *nvp;
2208 loop:
2209 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2210 
2211 		if (vp->v_mount != mp)
2212 			goto loop;
2213 		nvp = vp->v_mntvnodes.le_next;
2214 		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2215 			continue;
2216 		if (vp->v_object &&
2217 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2218 			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2219 		}
2220 	}
2221 }
2222 
2223 /*
2224  * Create the VM object needed for VMIO and mmap support.  This
2225  * is done for all VREG files in the system.  Some filesystems might
2226  * afford the additional metadata buffering capability of the
2227  * VMIO code by making the device node be VMIO mode also.
2228  */
2229 int
2230 vfs_object_create(vp, p, cred, waslocked)
2231 	struct vnode *vp;
2232 	struct proc *p;
2233 	struct ucred *cred;
2234 	int waslocked;
2235 {
2236 	struct vattr vat;
2237 	vm_object_t object;
2238 	int error = 0;
2239 
2240 retry:
2241 	if ((object = vp->v_object) == NULL) {
2242 		if (vp->v_type == VREG) {
2243 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2244 				goto retn;
2245 			(void) vnode_pager_alloc(vp,
2246 				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2247 		} else {
2248 			/*
2249 			 * This simply allocates the biggest object possible
2250 			 * for a VBLK vnode.  This should be fixed, but doesn't
2251 			 * cause any problems (yet).
2252 			 */
2253 			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2254 		}
2255 		vp->v_object->flags |= OBJ_VFS_REF;
2256 	} else {
2257 		if (object->flags & OBJ_DEAD) {
2258 			if (waslocked)
2259 				VOP_UNLOCK(vp, 0, p);
2260 			tsleep(object, PVM, "vodead", 0);
2261 			if (waslocked)
2262 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2263 			goto retry;
2264 		}
2265 		if ((object->flags & OBJ_VFS_REF) == 0) {
2266 			object->flags |= OBJ_VFS_REF;
2267 			vm_object_reference(object);
2268 		}
2269 	}
2270 	if (vp->v_object)
2271 		vp->v_flag |= VVMIO;
2272 
2273 retn:
2274 	return error;
2275 }
2276 
2277 void
2278 vfree(vp)
2279 	struct vnode *vp;
2280 {
2281 	simple_lock(&vnode_free_list_slock);
2282 	if (vp->v_flag & VAGE) {
2283 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2284 	} else {
2285 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2286 	}
2287 	freevnodes++;
2288 	simple_unlock(&vnode_free_list_slock);
2289 	vp->v_flag &= ~VAGE;
2290 	vp->v_flag |= VFREE;
2291 }
2292 
2293 void
2294 vbusy(vp)
2295 	struct vnode *vp;
2296 {
2297 	simple_lock(&vnode_free_list_slock);
2298 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2299 	freevnodes--;
2300 	simple_unlock(&vnode_free_list_slock);
2301 	vp->v_flag &= ~VFREE;
2302 }
2303