xref: /freebsd/sys/kern/vfs_subr.c (revision e627b39baccd1ec9129690167cf5e6d860509655)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
39  * $Id: vfs_subr.c,v 1.60 1996/09/19 18:20:22 nate Exp $
40  */
41 
42 /*
43  * External virtual filesystem routines
44  */
45 #include "opt_ddb.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/mount.h>
53 #include <sys/time.h>
54 #include <sys/vnode.h>
55 #include <sys/stat.h>
56 #include <sys/namei.h>
57 #include <sys/ucred.h>
58 #include <sys/buf.h>
59 #include <sys/errno.h>
60 #include <sys/malloc.h>
61 #include <sys/domain.h>
62 #include <sys/mbuf.h>
63 
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_object.h>
67 #include <vm/vm_extern.h>
68 #include <vm/vm_pager.h>
69 #include <vm/vnode_pager.h>
70 #include <sys/sysctl.h>
71 
72 #include <miscfs/specfs/specdev.h>
73 
74 #ifdef DDB
75 extern void	printlockedvnodes __P((void));
76 #endif
77 extern void	vclean __P((struct vnode *vp, int flags));
78 extern void	vfs_unmountroot __P((struct mount *rootfs));
79 
80 enum vtype iftovt_tab[16] = {
81 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
82 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
83 };
84 int vttoif_tab[9] = {
85 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
86 	S_IFSOCK, S_IFIFO, S_IFMT,
87 };
88 
89 /*
90  * Insq/Remq for the vnode usage lists.
91  */
92 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
93 #define	bufremvn(bp) {  \
94 	LIST_REMOVE(bp, b_vnbufs); \
95 	(bp)->b_vnbufs.le_next = NOLIST; \
96 }
97 
98 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
99 static u_long freevnodes = 0;
100 
101 struct mntlist mountlist;	/* mounted filesystem list */
102 
103 int desiredvnodes;
104 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RD, &desiredvnodes, 0, "");
105 
106 static void	vfs_free_addrlist __P((struct netexport *nep));
107 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
108 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
109 				       struct export_args *argp));
110 
111 /*
112  * Initialize the vnode management data structures.
113  */
114 void
115 vntblinit()
116 {
117 	desiredvnodes = maxproc + vm_object_cache_max + extravnodes;
118 
119 	TAILQ_INIT(&vnode_free_list);
120 	CIRCLEQ_INIT(&mountlist);
121 }
122 
123 /*
124  * Lock a filesystem.
125  * Used to prevent access to it while mounting and unmounting.
126  */
127 int
128 vfs_lock(mp)
129 	register struct mount *mp;
130 {
131 
132 	while (mp->mnt_flag & MNT_MLOCK) {
133 		mp->mnt_flag |= MNT_MWAIT;
134 		(void) tsleep((caddr_t) mp, PVFS, "vfslck", 0);
135 	}
136 	mp->mnt_flag |= MNT_MLOCK;
137 	return (0);
138 }
139 
140 /*
141  * Unlock a locked filesystem.
142  * Panic if filesystem is not locked.
143  */
144 void
145 vfs_unlock(mp)
146 	register struct mount *mp;
147 {
148 
149 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
150 		panic("vfs_unlock: not locked");
151 	mp->mnt_flag &= ~MNT_MLOCK;
152 	if (mp->mnt_flag & MNT_MWAIT) {
153 		mp->mnt_flag &= ~MNT_MWAIT;
154 		wakeup((caddr_t) mp);
155 	}
156 }
157 
158 /*
159  * Mark a mount point as busy.
160  * Used to synchronize access and to delay unmounting.
161  */
162 int
163 vfs_busy(mp)
164 	register struct mount *mp;
165 {
166 
167 	while (mp->mnt_flag & MNT_MPBUSY) {
168 		mp->mnt_flag |= MNT_MPWANT;
169 		(void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0);
170 	}
171 	if (mp->mnt_flag & MNT_UNMOUNT)
172 		return (1);
173 	mp->mnt_flag |= MNT_MPBUSY;
174 	return (0);
175 }
176 
177 /*
178  * Free a busy filesystem.
179  * Panic if filesystem is not busy.
180  */
181 void
182 vfs_unbusy(mp)
183 	register struct mount *mp;
184 {
185 
186 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
187 		panic("vfs_unbusy: not busy");
188 	mp->mnt_flag &= ~MNT_MPBUSY;
189 	if (mp->mnt_flag & MNT_MPWANT) {
190 		mp->mnt_flag &= ~MNT_MPWANT;
191 		wakeup((caddr_t) &mp->mnt_flag);
192 	}
193 }
194 
195 void
196 vfs_unmountroot(struct mount *rootfs)
197 {
198 	struct mount *mp = rootfs;
199 	int error;
200 
201 	if (vfs_busy(mp)) {
202 		printf("failed to unmount root\n");
203 		return;
204 	}
205 	mp->mnt_flag |= MNT_UNMOUNT;
206 	if ((error = vfs_lock(mp))) {
207 		printf("lock of root filesystem failed (%d)\n", error);
208 		return;
209 	}
210 	vnode_pager_umount(mp);	/* release cached vnodes */
211 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
212 
213 	if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)))
214 		printf("sync of root filesystem failed (%d)\n", error);
215 
216 	if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) {
217 		printf("unmount of root filesystem failed (");
218 		if (error == EBUSY)
219 			printf("BUSY)\n");
220 		else
221 			printf("%d)\n", error);
222 	}
223 	mp->mnt_flag &= ~MNT_UNMOUNT;
224 	vfs_unbusy(mp);
225 }
226 
227 /*
228  * Unmount all filesystems.  Should only be called by halt().
229  */
230 void
231 vfs_unmountall()
232 {
233 	struct mount *mp, *nmp, *rootfs = NULL;
234 	int error;
235 
236 	/* unmount all but rootfs */
237 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
238 		nmp = mp->mnt_list.cqe_prev;
239 
240 		if (mp->mnt_flag & MNT_ROOTFS) {
241 			rootfs = mp;
242 			continue;
243 		}
244 		error = dounmount(mp, MNT_FORCE, initproc);
245 		if (error) {
246 			printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
247 			if (error == EBUSY)
248 				printf("BUSY)\n");
249 			else
250 				printf("%d)\n", error);
251 		}
252 	}
253 
254 	/* and finally... */
255 	if (rootfs) {
256 		vfs_unmountroot(rootfs);
257 	} else {
258 		printf("no root filesystem\n");
259 	}
260 }
261 
262 /*
263  * Lookup a mount point by filesystem identifier.
264  */
265 struct mount *
266 getvfs(fsid)
267 	fsid_t *fsid;
268 {
269 	register struct mount *mp;
270 
271 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
272 	    mp = mp->mnt_list.cqe_next) {
273 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
274 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
275 			return (mp);
276 	}
277 	return ((struct mount *) 0);
278 }
279 
280 /*
281  * Get a new unique fsid
282  */
283 void
284 getnewfsid(mp, mtype)
285 	struct mount *mp;
286 	int mtype;
287 {
288 	static u_short xxxfs_mntid;
289 
290 	fsid_t tfsid;
291 
292 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
293 	mp->mnt_stat.f_fsid.val[1] = mtype;
294 	if (xxxfs_mntid == 0)
295 		++xxxfs_mntid;
296 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
297 	tfsid.val[1] = mtype;
298 	if (mountlist.cqh_first != (void *)&mountlist) {
299 		while (getvfs(&tfsid)) {
300 			tfsid.val[0]++;
301 			xxxfs_mntid++;
302 		}
303 	}
304 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
305 }
306 
307 /*
308  * Set vnode attributes to VNOVAL
309  */
310 void
311 vattr_null(vap)
312 	register struct vattr *vap;
313 {
314 
315 	vap->va_type = VNON;
316 	vap->va_size = VNOVAL;
317 	vap->va_bytes = VNOVAL;
318 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
319 	    vap->va_fsid = vap->va_fileid =
320 	    vap->va_blocksize = vap->va_rdev =
321 	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
322 	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
323 	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
324 	    vap->va_flags = vap->va_gen = VNOVAL;
325 	vap->va_vaflags = 0;
326 }
327 
328 /*
329  * Routines having to do with the management of the vnode table.
330  */
331 extern vop_t **dead_vnodeop_p;
332 
333 /*
334  * Return the next vnode from the free list.
335  */
336 int
337 getnewvnode(tag, mp, vops, vpp)
338 	enum vtagtype tag;
339 	struct mount *mp;
340 	vop_t **vops;
341 	struct vnode **vpp;
342 {
343 	register struct vnode *vp;
344 
345 retry:
346 	vp = vnode_free_list.tqh_first;
347 	/*
348 	 * we allocate a new vnode if
349 	 * 	1. we don't have any free
350 	 *		Pretty obvious, we actually used to panic, but that
351 	 *		is a silly thing to do.
352 	 *	2. we havn't filled our pool yet
353 	 *		We don't want to trash the incore (VM-)vnodecache.
354 	 *	3. if less that 1/4th of our vnodes are free.
355 	 *		We don't want to trash the namei cache either.
356 	 */
357 	if (freevnodes < (numvnodes >> 2) ||
358 	    numvnodes < desiredvnodes ||
359 	    vp == NULL) {
360 		vp = (struct vnode *) malloc((u_long) sizeof *vp,
361 		    M_VNODE, M_WAITOK);
362 		bzero((char *) vp, sizeof *vp);
363 		numvnodes++;
364 	} else {
365 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
366 		if (vp->v_usage > 0) {
367 			--vp->v_usage;
368 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
369 			goto retry;
370 		}
371 		freevnodes--;
372 		if (vp->v_usecount)
373 			panic("free vnode isn't");
374 
375 		/* see comment on why 0xdeadb is set at end of vgone (below) */
376 		vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
377 		vp->v_lease = NULL;
378 		if (vp->v_type != VBAD)
379 			vgone(vp);
380 
381 #ifdef DIAGNOSTIC
382 		{
383 			int s;
384 
385 			if (vp->v_data)
386 				panic("cleaned vnode isn't");
387 			s = splbio();
388 			if (vp->v_numoutput)
389 				panic("Clean vnode has pending I/O's");
390 			splx(s);
391 		}
392 #endif
393 		vp->v_flag = 0;
394 		vp->v_lastr = 0;
395 		vp->v_ralen = 0;
396 		vp->v_maxra = 0;
397 		vp->v_lastw = 0;
398 		vp->v_lasta = 0;
399 		vp->v_cstart = 0;
400 		vp->v_clen = 0;
401 		vp->v_socket = 0;
402 		vp->v_writecount = 0;	/* XXX */
403 		vp->v_usage = 0;
404 	}
405 	vp->v_type = VNON;
406 	cache_purge(vp);
407 	vp->v_tag = tag;
408 	vp->v_op = vops;
409 	insmntque(vp, mp);
410 	*vpp = vp;
411 	vp->v_usecount = 1;
412 	vp->v_data = 0;
413 	return (0);
414 }
415 
416 /*
417  * Move a vnode from one mount queue to another.
418  */
419 void
420 insmntque(vp, mp)
421 	register struct vnode *vp;
422 	register struct mount *mp;
423 {
424 
425 	/*
426 	 * Delete from old mount point vnode list, if on one.
427 	 */
428 	if (vp->v_mount != NULL)
429 		LIST_REMOVE(vp, v_mntvnodes);
430 	/*
431 	 * Insert into list of vnodes for the new mount point, if available.
432 	 */
433 	if ((vp->v_mount = mp) == NULL)
434 		return;
435 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
436 }
437 
438 /*
439  * Update outstanding I/O count and do wakeup if requested.
440  */
441 void
442 vwakeup(bp)
443 	register struct buf *bp;
444 {
445 	register struct vnode *vp;
446 
447 	bp->b_flags &= ~B_WRITEINPROG;
448 	if ((vp = bp->b_vp)) {
449 		vp->v_numoutput--;
450 		if (vp->v_numoutput < 0)
451 			panic("vwakeup: neg numoutput");
452 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
453 			vp->v_flag &= ~VBWAIT;
454 			wakeup((caddr_t) &vp->v_numoutput);
455 		}
456 	}
457 }
458 
459 /*
460  * Flush out and invalidate all buffers associated with a vnode.
461  * Called with the underlying object locked.
462  */
463 int
464 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
465 	register struct vnode *vp;
466 	int flags;
467 	struct ucred *cred;
468 	struct proc *p;
469 	int slpflag, slptimeo;
470 {
471 	register struct buf *bp;
472 	struct buf *nbp, *blist;
473 	int s, error;
474 	vm_object_t object;
475 
476 	if (flags & V_SAVE) {
477 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
478 			return (error);
479 		if (vp->v_dirtyblkhd.lh_first != NULL)
480 			panic("vinvalbuf: dirty bufs");
481 	}
482 
483 	s = splbio();
484 	for (;;) {
485 		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
486 			while (blist && blist->b_lblkno < 0)
487 				blist = blist->b_vnbufs.le_next;
488 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
489 		    (flags & V_SAVEMETA))
490 			while (blist && blist->b_lblkno < 0)
491 				blist = blist->b_vnbufs.le_next;
492 		if (!blist)
493 			break;
494 
495 		for (bp = blist; bp; bp = nbp) {
496 			nbp = bp->b_vnbufs.le_next;
497 			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
498 				continue;
499 			if (bp->b_flags & B_BUSY) {
500 				bp->b_flags |= B_WANTED;
501 				error = tsleep((caddr_t) bp,
502 				    slpflag | (PRIBIO + 1), "vinvalbuf",
503 				    slptimeo);
504 				splx(s);
505 				if (error)
506 					return (error);
507 				break;
508 			}
509 			bremfree(bp);
510 			bp->b_flags |= B_BUSY;
511 			/*
512 			 * XXX Since there are no node locks for NFS, I
513 			 * believe there is a slight chance that a delayed
514 			 * write will occur while sleeping just above, so
515 			 * check for it.
516 			 */
517 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
518 				(void) VOP_BWRITE(bp);
519 				break;
520 			}
521 			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
522 			brelse(bp);
523 		}
524 	}
525 	splx(s);
526 
527 	s = splbio();
528 	while (vp->v_numoutput > 0) {
529 		vp->v_flag |= VBWAIT;
530 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
531 	}
532 	splx(s);
533 
534 	/*
535 	 * Destroy the copy in the VM cache, too.
536 	 */
537 	object = vp->v_object;
538 	if (object != NULL) {
539 		vm_object_page_remove(object, 0, object->size,
540 		    (flags & V_SAVE) ? TRUE : FALSE);
541 	}
542 	if (!(flags & V_SAVEMETA) &&
543 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
544 		panic("vinvalbuf: flush failed");
545 	return (0);
546 }
547 
548 /*
549  * Associate a buffer with a vnode.
550  */
551 void
552 bgetvp(vp, bp)
553 	register struct vnode *vp;
554 	register struct buf *bp;
555 {
556 	int s;
557 
558 	if (bp->b_vp)
559 		panic("bgetvp: not free");
560 	VHOLD(vp);
561 	bp->b_vp = vp;
562 	if (vp->v_type == VBLK || vp->v_type == VCHR)
563 		bp->b_dev = vp->v_rdev;
564 	else
565 		bp->b_dev = NODEV;
566 	/*
567 	 * Insert onto list for new vnode.
568 	 */
569 	s = splbio();
570 	bufinsvn(bp, &vp->v_cleanblkhd);
571 	splx(s);
572 }
573 
574 /*
575  * Disassociate a buffer from a vnode.
576  */
577 void
578 brelvp(bp)
579 	register struct buf *bp;
580 {
581 	struct vnode *vp;
582 	int s;
583 
584 	if (bp->b_vp == (struct vnode *) 0)
585 		panic("brelvp: NULL");
586 	/*
587 	 * Delete from old vnode list, if on one.
588 	 */
589 	s = splbio();
590 	if (bp->b_vnbufs.le_next != NOLIST)
591 		bufremvn(bp);
592 	splx(s);
593 
594 	vp = bp->b_vp;
595 	bp->b_vp = (struct vnode *) 0;
596 	HOLDRELE(vp);
597 }
598 
599 /*
600  * Associate a p-buffer with a vnode.
601  */
602 void
603 pbgetvp(vp, bp)
604 	register struct vnode *vp;
605 	register struct buf *bp;
606 {
607 	if (bp->b_vp)
608 		panic("pbgetvp: not free");
609 	VHOLD(vp);
610 	bp->b_vp = vp;
611 	if (vp->v_type == VBLK || vp->v_type == VCHR)
612 		bp->b_dev = vp->v_rdev;
613 	else
614 		bp->b_dev = NODEV;
615 }
616 
617 /*
618  * Disassociate a p-buffer from a vnode.
619  */
620 void
621 pbrelvp(bp)
622 	register struct buf *bp;
623 {
624 	struct vnode *vp;
625 
626 	if (bp->b_vp == (struct vnode *) 0)
627 		panic("brelvp: NULL");
628 
629 	vp = bp->b_vp;
630 	bp->b_vp = (struct vnode *) 0;
631 	HOLDRELE(vp);
632 }
633 
634 /*
635  * Reassign a buffer from one vnode to another.
636  * Used to assign file specific control information
637  * (indirect blocks) to the vnode to which they belong.
638  */
639 void
640 reassignbuf(bp, newvp)
641 	register struct buf *bp;
642 	register struct vnode *newvp;
643 {
644 	int s;
645 
646 	if (newvp == NULL) {
647 		printf("reassignbuf: NULL");
648 		return;
649 	}
650 
651 	s = splbio();
652 	/*
653 	 * Delete from old vnode list, if on one.
654 	 */
655 	if (bp->b_vnbufs.le_next != NOLIST)
656 		bufremvn(bp);
657 	/*
658 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
659 	 * of clean buffers.
660 	 */
661 	if (bp->b_flags & B_DELWRI) {
662 		struct buf *tbp;
663 
664 		tbp = newvp->v_dirtyblkhd.lh_first;
665 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
666 			bufinsvn(bp, &newvp->v_dirtyblkhd);
667 		} else {
668 			while (tbp->b_vnbufs.le_next &&
669 				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
670 				tbp = tbp->b_vnbufs.le_next;
671 			}
672 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
673 		}
674 	} else {
675 		bufinsvn(bp, &newvp->v_cleanblkhd);
676 	}
677 	splx(s);
678 }
679 
680 #ifndef DEVFS_ROOT
681 /*
682  * Create a vnode for a block device.
683  * Used for root filesystem, argdev, and swap areas.
684  * Also used for memory file system special devices.
685  */
686 int
687 bdevvp(dev, vpp)
688 	dev_t dev;
689 	struct vnode **vpp;
690 {
691 	register struct vnode *vp;
692 	struct vnode *nvp;
693 	int error;
694 
695 	if (dev == NODEV)
696 		return (0);
697 	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
698 	if (error) {
699 		*vpp = 0;
700 		return (error);
701 	}
702 	vp = nvp;
703 	vp->v_type = VBLK;
704 	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
705 		vput(vp);
706 		vp = nvp;
707 	}
708 	*vpp = vp;
709 	return (0);
710 }
711 #endif /* !DEVFS_ROOT */
712 
713 /*
714  * Check to see if the new vnode represents a special device
715  * for which we already have a vnode (either because of
716  * bdevvp() or because of a different vnode representing
717  * the same block device). If such an alias exists, deallocate
718  * the existing contents and return the aliased vnode. The
719  * caller is responsible for filling it with its new contents.
720  */
721 struct vnode *
722 checkalias(nvp, nvp_rdev, mp)
723 	register struct vnode *nvp;
724 	dev_t nvp_rdev;
725 	struct mount *mp;
726 {
727 	register struct vnode *vp;
728 	struct vnode **vpp;
729 
730 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
731 		return (NULLVP);
732 
733 	vpp = &speclisth[SPECHASH(nvp_rdev)];
734 loop:
735 	for (vp = *vpp; vp; vp = vp->v_specnext) {
736 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
737 			continue;
738 		/*
739 		 * Alias, but not in use, so flush it out.
740 		 */
741 		if (vp->v_usecount == 0) {
742 			vgone(vp);
743 			goto loop;
744 		}
745 		if (vget(vp, 1))
746 			goto loop;
747 		break;
748 	}
749 
750 	if (vp == NULL || vp->v_tag != VT_NON) {
751 		MALLOC(nvp->v_specinfo, struct specinfo *,
752 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
753 		nvp->v_rdev = nvp_rdev;
754 		nvp->v_hashchain = vpp;
755 		nvp->v_specnext = *vpp;
756 		nvp->v_specflags = 0;
757 		*vpp = nvp;
758 		if (vp != NULL) {
759 			nvp->v_flag |= VALIASED;
760 			vp->v_flag |= VALIASED;
761 			vput(vp);
762 		}
763 		return (NULLVP);
764 	}
765 	VOP_UNLOCK(vp);
766 	vclean(vp, 0);
767 	vp->v_op = nvp->v_op;
768 	vp->v_tag = nvp->v_tag;
769 	nvp->v_type = VNON;
770 	insmntque(vp, mp);
771 	return (vp);
772 }
773 
774 /*
775  * Grab a particular vnode from the free list, increment its
776  * reference count and lock it. The vnode lock bit is set the
777  * vnode is being eliminated in vgone. The process is awakened
778  * when the transition is completed, and an error returned to
779  * indicate that the vnode is no longer usable (possibly having
780  * been changed to a new file system type).
781  */
782 int
783 vget(vp, lockflag)
784 	register struct vnode *vp;
785 	int lockflag;
786 {
787 
788 	/*
789 	 * If the vnode is in the process of being cleaned out for another
790 	 * use, we wait for the cleaning to finish and then return failure.
791 	 * Cleaning is determined either by checking that the VXLOCK flag is
792 	 * set, or that the use count is zero with the back pointer set to
793 	 * show that it has been removed from the free list by getnewvnode.
794 	 * The VXLOCK flag may not have been set yet because vclean is blocked
795 	 * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
796 	 */
797 	if ((vp->v_flag & VXLOCK) ||
798 	    (vp->v_usecount == 0 &&
799 		vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) {
800 		vp->v_flag |= VXWANT;
801 		(void) tsleep((caddr_t) vp, PINOD, "vget", 0);
802 		return (1);
803 	}
804 	if (vp->v_usecount == 0) {
805 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
806 		freevnodes--;
807 	}
808 	vp->v_usecount++;
809 
810 	/*
811 	 * Create the VM object, if needed
812 	 */
813 	if ((vp->v_type == VREG) &&
814 		((vp->v_object == NULL) ||
815 			(vp->v_object->flags & OBJ_VFS_REF) == 0)) {
816 		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
817 	}
818 	if (lockflag)
819 		VOP_LOCK(vp);
820 
821 	return (0);
822 }
823 
824 /*
825  * Vnode reference, just increment the count
826  */
827 void
828 vref(vp)
829 	struct vnode *vp;
830 {
831 	if (vp->v_usecount <= 0)
832 		panic("vref used where vget required");
833 
834 	vp->v_usecount++;
835 
836 	if ((vp->v_type == VREG) &&
837 		((vp->v_object == NULL) ||
838 			((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) {
839 		/*
840 		 * We need to lock to VP during the time that
841 		 * the object is created.  This is necessary to
842 		 * keep the system from re-entrantly doing it
843 		 * multiple times.
844 		 */
845 		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
846 	}
847 }
848 
849 /*
850  * vput(), just unlock and vrele()
851  */
852 void
853 vput(vp)
854 	register struct vnode *vp;
855 {
856 	VOP_UNLOCK(vp);
857 	vrele(vp);
858 }
859 
860 /*
861  * Vnode release.
862  * If count drops to zero, call inactive routine and return to freelist.
863  */
864 void
865 vrele(vp)
866 	register struct vnode *vp;
867 {
868 
869 #ifdef DIAGNOSTIC
870 	if (vp == NULL)
871 		panic("vrele: null vp");
872 #endif
873 
874 	vp->v_usecount--;
875 
876 	if ((vp->v_usecount == 1) &&
877 		vp->v_object &&
878 		(vp->v_object->flags & OBJ_VFS_REF)) {
879 		vp->v_object->flags &= ~OBJ_VFS_REF;
880 		vm_object_deallocate(vp->v_object);
881 		return;
882 	}
883 
884 	if (vp->v_usecount > 0)
885 		return;
886 
887 	if (vp->v_usecount < 0) {
888 #ifdef DIAGNOSTIC
889 		vprint("vrele: negative ref count", vp);
890 #endif
891 		panic("vrele: negative reference cnt");
892 	}
893 	if (vp->v_flag & VAGE) {
894 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
895 		vp->v_flag &= ~VAGE;
896 		vp->v_usage = 0;
897 	} else {
898 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
899 	}
900 	freevnodes++;
901 
902 	VOP_INACTIVE(vp);
903 }
904 
905 #ifdef DIAGNOSTIC
906 /*
907  * Page or buffer structure gets a reference.
908  */
909 void
910 vhold(vp)
911 	register struct vnode *vp;
912 {
913 
914 	vp->v_holdcnt++;
915 }
916 
917 /*
918  * Page or buffer structure frees a reference.
919  */
920 void
921 holdrele(vp)
922 	register struct vnode *vp;
923 {
924 
925 	if (vp->v_holdcnt <= 0)
926 		panic("holdrele: holdcnt");
927 	vp->v_holdcnt--;
928 }
929 #endif /* DIAGNOSTIC */
930 
931 /*
932  * Remove any vnodes in the vnode table belonging to mount point mp.
933  *
934  * If MNT_NOFORCE is specified, there should not be any active ones,
935  * return error if any are found (nb: this is a user error, not a
936  * system error). If MNT_FORCE is specified, detach any active vnodes
937  * that are found.
938  */
939 #ifdef DIAGNOSTIC
940 static int busyprt = 0;		/* print out busy vnodes */
941 SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, "");
942 #endif
943 
944 int
945 vflush(mp, skipvp, flags)
946 	struct mount *mp;
947 	struct vnode *skipvp;
948 	int flags;
949 {
950 	register struct vnode *vp, *nvp;
951 	int busy = 0;
952 
953 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
954 		panic("vflush: not busy");
955 loop:
956 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
957 		/*
958 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
959 		 * Start over if it has (it won't be on the list anymore).
960 		 */
961 		if (vp->v_mount != mp)
962 			goto loop;
963 		nvp = vp->v_mntvnodes.le_next;
964 		/*
965 		 * Skip over a selected vnode.
966 		 */
967 		if (vp == skipvp)
968 			continue;
969 		/*
970 		 * Skip over a vnodes marked VSYSTEM.
971 		 */
972 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
973 			continue;
974 		/*
975 		 * If WRITECLOSE is set, only flush out regular file vnodes
976 		 * open for writing.
977 		 */
978 		if ((flags & WRITECLOSE) &&
979 		    (vp->v_writecount == 0 || vp->v_type != VREG))
980 			continue;
981 
982 		if ((vp->v_usecount == 1) && vp->v_object) {
983 			pager_cache(vp->v_object, FALSE);
984 		}
985 
986 		/*
987 		 * With v_usecount == 0, all we need to do is clear out the
988 		 * vnode data structures and we are done.
989 		 */
990 		if (vp->v_usecount == 0) {
991 			vgone(vp);
992 			continue;
993 		}
994 		/*
995 		 * If FORCECLOSE is set, forcibly close the vnode. For block
996 		 * or character devices, revert to an anonymous device. For
997 		 * all other files, just kill them.
998 		 */
999 		if (flags & FORCECLOSE) {
1000 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1001 				vgone(vp);
1002 			} else {
1003 				vclean(vp, 0);
1004 				vp->v_op = spec_vnodeop_p;
1005 				insmntque(vp, (struct mount *) 0);
1006 			}
1007 			continue;
1008 		}
1009 #ifdef DIAGNOSTIC
1010 		if (busyprt)
1011 			vprint("vflush: busy vnode", vp);
1012 #endif
1013 		busy++;
1014 	}
1015 	if (busy)
1016 		return (EBUSY);
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Disassociate the underlying file system from a vnode.
1022  */
1023 void
1024 vclean(struct vnode *vp, int flags)
1025 {
1026 	int active;
1027 
1028 	/*
1029 	 * Check to see if the vnode is in use. If so we have to reference it
1030 	 * before we clean it out so that its count cannot fall to zero and
1031 	 * generate a race against ourselves to recycle it.
1032 	 */
1033 	if ((active = vp->v_usecount))
1034 		VREF(vp);
1035 	/*
1036 	 * Even if the count is zero, the VOP_INACTIVE routine may still have
1037 	 * the object locked while it cleans it out. The VOP_LOCK ensures that
1038 	 * the VOP_INACTIVE routine is done with its work. For active vnodes,
1039 	 * it ensures that no other activity can occur while the underlying
1040 	 * object is being cleaned out.
1041 	 */
1042 	VOP_LOCK(vp);
1043 	/*
1044 	 * Prevent the vnode from being recycled or brought into use while we
1045 	 * clean it out.
1046 	 */
1047 	if (vp->v_flag & VXLOCK)
1048 		panic("vclean: deadlock");
1049 	vp->v_flag |= VXLOCK;
1050 	/*
1051 	 * Clean out any buffers associated with the vnode.
1052 	 */
1053 	if (flags & DOCLOSE)
1054 		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
1055 	/*
1056 	 * Any other processes trying to obtain this lock must first wait for
1057 	 * VXLOCK to clear, then call the new lock operation.
1058 	 */
1059 	VOP_UNLOCK(vp);
1060 	/*
1061 	 * If purging an active vnode, it must be closed and deactivated
1062 	 * before being reclaimed.
1063 	 */
1064 	if (active) {
1065 		if (flags & DOCLOSE)
1066 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1067 		VOP_INACTIVE(vp);
1068 	}
1069 	/*
1070 	 * Reclaim the vnode.
1071 	 */
1072 	if (VOP_RECLAIM(vp))
1073 		panic("vclean: cannot reclaim");
1074 	if (active)
1075 		vrele(vp);
1076 
1077 	/*
1078 	 * Done with purge, notify sleepers of the grim news.
1079 	 */
1080 	vp->v_op = dead_vnodeop_p;
1081 	vp->v_tag = VT_NON;
1082 	vp->v_flag &= ~VXLOCK;
1083 	if (vp->v_flag & VXWANT) {
1084 		vp->v_flag &= ~VXWANT;
1085 		wakeup((caddr_t) vp);
1086 	}
1087 }
1088 
1089 /*
1090  * Eliminate all activity associated with  the requested vnode
1091  * and with all vnodes aliased to the requested vnode.
1092  */
1093 void
1094 vgoneall(vp)
1095 	register struct vnode *vp;
1096 {
1097 	register struct vnode *vq;
1098 
1099 	if (vp->v_flag & VALIASED) {
1100 		/*
1101 		 * If a vgone (or vclean) is already in progress, wait until
1102 		 * it is done and return.
1103 		 */
1104 		if (vp->v_flag & VXLOCK) {
1105 			vp->v_flag |= VXWANT;
1106 			(void) tsleep((caddr_t) vp, PINOD, "vgall", 0);
1107 			return;
1108 		}
1109 		/*
1110 		 * Ensure that vp will not be vgone'd while we are eliminating
1111 		 * its aliases.
1112 		 */
1113 		vp->v_flag |= VXLOCK;
1114 		while (vp->v_flag & VALIASED) {
1115 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1116 				if (vq->v_rdev != vp->v_rdev ||
1117 				    vq->v_type != vp->v_type || vp == vq)
1118 					continue;
1119 				vgone(vq);
1120 				break;
1121 			}
1122 		}
1123 		/*
1124 		 * Remove the lock so that vgone below will really eliminate
1125 		 * the vnode after which time vgone will awaken any sleepers.
1126 		 */
1127 		vp->v_flag &= ~VXLOCK;
1128 	}
1129 	vgone(vp);
1130 }
1131 
1132 /*
1133  * Eliminate all activity associated with a vnode
1134  * in preparation for reuse.
1135  */
1136 void
1137 vgone(vp)
1138 	register struct vnode *vp;
1139 {
1140 	register struct vnode *vq;
1141 	struct vnode *vx;
1142 
1143 	/*
1144 	 * If a vgone (or vclean) is already in progress, wait until it is
1145 	 * done and return.
1146 	 */
1147 	if (vp->v_flag & VXLOCK) {
1148 		vp->v_flag |= VXWANT;
1149 		(void) tsleep((caddr_t) vp, PINOD, "vgone", 0);
1150 		return;
1151 	}
1152 	/*
1153 	 * Clean out the filesystem specific data.
1154 	 */
1155 	vclean(vp, DOCLOSE);
1156 	/*
1157 	 * Delete from old mount point vnode list, if on one.
1158 	 */
1159 	if (vp->v_mount != NULL) {
1160 		LIST_REMOVE(vp, v_mntvnodes);
1161 		vp->v_mount = NULL;
1162 	}
1163 	/*
1164 	 * If special device, remove it from special device alias list.
1165 	 */
1166 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
1167 		if (*vp->v_hashchain == vp) {
1168 			*vp->v_hashchain = vp->v_specnext;
1169 		} else {
1170 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1171 				if (vq->v_specnext != vp)
1172 					continue;
1173 				vq->v_specnext = vp->v_specnext;
1174 				break;
1175 			}
1176 			if (vq == NULL)
1177 				panic("missing bdev");
1178 		}
1179 		if (vp->v_flag & VALIASED) {
1180 			vx = NULL;
1181 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1182 				if (vq->v_rdev != vp->v_rdev ||
1183 				    vq->v_type != vp->v_type)
1184 					continue;
1185 				if (vx)
1186 					break;
1187 				vx = vq;
1188 			}
1189 			if (vx == NULL)
1190 				panic("missing alias");
1191 			if (vq == NULL)
1192 				vx->v_flag &= ~VALIASED;
1193 			vp->v_flag &= ~VALIASED;
1194 		}
1195 		FREE(vp->v_specinfo, M_VNODE);
1196 		vp->v_specinfo = NULL;
1197 	}
1198 	/*
1199 	 * If it is on the freelist and not already at the head, move it to
1200 	 * the head of the list. The test of the back pointer and the
1201 	 * reference count of zero is because it will be removed from the free
1202 	 * list by getnewvnode, but will not have its reference count
1203 	 * incremented until after calling vgone. If the reference count were
1204 	 * incremented first, vgone would (incorrectly) try to close the
1205 	 * previous instance of the underlying object. So, the back pointer is
1206 	 * explicitly set to `0xdeadb' in getnewvnode after removing it from
1207 	 * the freelist to ensure that we do not try to move it here.
1208 	 */
1209 	if (vp->v_usecount == 0 &&
1210 	    vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb &&
1211 	    vnode_free_list.tqh_first != vp) {
1212 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1213 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1214 	}
1215 	vp->v_type = VBAD;
1216 }
1217 
1218 /*
1219  * Lookup a vnode by device number.
1220  */
1221 int
1222 vfinddev(dev, type, vpp)
1223 	dev_t dev;
1224 	enum vtype type;
1225 	struct vnode **vpp;
1226 {
1227 	register struct vnode *vp;
1228 
1229 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1230 		if (dev != vp->v_rdev || type != vp->v_type)
1231 			continue;
1232 		*vpp = vp;
1233 		return (1);
1234 	}
1235 	return (0);
1236 }
1237 
1238 /*
1239  * Calculate the total number of references to a special device.
1240  */
1241 int
1242 vcount(vp)
1243 	register struct vnode *vp;
1244 {
1245 	register struct vnode *vq, *vnext;
1246 	int count;
1247 
1248 loop:
1249 	if ((vp->v_flag & VALIASED) == 0)
1250 		return (vp->v_usecount);
1251 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1252 		vnext = vq->v_specnext;
1253 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1254 			continue;
1255 		/*
1256 		 * Alias, but not in use, so flush it out.
1257 		 */
1258 		if (vq->v_usecount == 0 && vq != vp) {
1259 			vgone(vq);
1260 			goto loop;
1261 		}
1262 		count += vq->v_usecount;
1263 	}
1264 	return (count);
1265 }
1266 
1267 /*
1268  * Print out a description of a vnode.
1269  */
1270 static char *typename[] =
1271 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1272 
1273 void
1274 vprint(label, vp)
1275 	char *label;
1276 	register struct vnode *vp;
1277 {
1278 	char buf[64];
1279 
1280 	if (label != NULL)
1281 		printf("%s: ", label);
1282 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1283 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1284 	    vp->v_holdcnt);
1285 	buf[0] = '\0';
1286 	if (vp->v_flag & VROOT)
1287 		strcat(buf, "|VROOT");
1288 	if (vp->v_flag & VTEXT)
1289 		strcat(buf, "|VTEXT");
1290 	if (vp->v_flag & VSYSTEM)
1291 		strcat(buf, "|VSYSTEM");
1292 	if (vp->v_flag & VXLOCK)
1293 		strcat(buf, "|VXLOCK");
1294 	if (vp->v_flag & VXWANT)
1295 		strcat(buf, "|VXWANT");
1296 	if (vp->v_flag & VBWAIT)
1297 		strcat(buf, "|VBWAIT");
1298 	if (vp->v_flag & VALIASED)
1299 		strcat(buf, "|VALIASED");
1300 	if (buf[0] != '\0')
1301 		printf(" flags (%s)", &buf[1]);
1302 	if (vp->v_data == NULL) {
1303 		printf("\n");
1304 	} else {
1305 		printf("\n\t");
1306 		VOP_PRINT(vp);
1307 	}
1308 }
1309 
1310 #ifdef DDB
1311 /*
1312  * List all of the locked vnodes in the system.
1313  * Called when debugging the kernel.
1314  */
1315 void
1316 printlockedvnodes(void)
1317 {
1318 	register struct mount *mp;
1319 	register struct vnode *vp;
1320 
1321 	printf("Locked vnodes\n");
1322 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1323 	    mp = mp->mnt_list.cqe_next) {
1324 		for (vp = mp->mnt_vnodelist.lh_first;
1325 		    vp != NULL;
1326 		    vp = vp->v_mntvnodes.le_next)
1327 			if (VOP_ISLOCKED(vp))
1328 				vprint((char *) 0, vp);
1329 	}
1330 }
1331 #endif
1332 
1333 int kinfo_vdebug = 1;
1334 int kinfo_vgetfailed;
1335 
1336 #define KINFO_VNODESLOP	10
1337 /*
1338  * Dump vnode list (via sysctl).
1339  * Copyout address of vnode followed by vnode.
1340  */
1341 /* ARGSUSED */
1342 static int
1343 sysctl_vnode SYSCTL_HANDLER_ARGS
1344 {
1345 	register struct mount *mp, *nmp;
1346 	struct vnode *vp;
1347 	int error;
1348 
1349 #define VPTRSZ	sizeof (struct vnode *)
1350 #define VNODESZ	sizeof (struct vnode)
1351 
1352 	req->lock = 0;
1353 	if (!req->oldptr) /* Make an estimate */
1354 		return (SYSCTL_OUT(req, 0,
1355 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1356 
1357 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1358 		nmp = mp->mnt_list.cqe_next;
1359 		if (vfs_busy(mp))
1360 			continue;
1361 again:
1362 		for (vp = mp->mnt_vnodelist.lh_first;
1363 		    vp != NULL;
1364 		    vp = vp->v_mntvnodes.le_next) {
1365 			/*
1366 			 * Check that the vp is still associated with this
1367 			 * filesystem.  RACE: could have been recycled onto
1368 			 * the same filesystem.
1369 			 */
1370 			if (vp->v_mount != mp) {
1371 				if (kinfo_vdebug)
1372 					printf("kinfo: vp changed\n");
1373 				goto again;
1374 			}
1375 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1376 			    (error = SYSCTL_OUT(req, vp, VNODESZ))) {
1377 				vfs_unbusy(mp);
1378 				return (error);
1379 			}
1380 		}
1381 		vfs_unbusy(mp);
1382 	}
1383 
1384 	return (0);
1385 }
1386 
1387 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1388 	0, 0, sysctl_vnode, "S,vnode", "");
1389 
1390 /*
1391  * Check to see if a filesystem is mounted on a block device.
1392  */
1393 int
1394 vfs_mountedon(vp)
1395 	register struct vnode *vp;
1396 {
1397 	register struct vnode *vq;
1398 
1399 	if (vp->v_specflags & SI_MOUNTEDON)
1400 		return (EBUSY);
1401 	if (vp->v_flag & VALIASED) {
1402 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1403 			if (vq->v_rdev != vp->v_rdev ||
1404 			    vq->v_type != vp->v_type)
1405 				continue;
1406 			if (vq->v_specflags & SI_MOUNTEDON)
1407 				return (EBUSY);
1408 		}
1409 	}
1410 	return (0);
1411 }
1412 
1413 /*
1414  * Build hash lists of net addresses and hang them off the mount point.
1415  * Called by ufs_mount() to set up the lists of export addresses.
1416  */
1417 static int
1418 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1419 	struct export_args *argp)
1420 {
1421 	register struct netcred *np;
1422 	register struct radix_node_head *rnh;
1423 	register int i;
1424 	struct radix_node *rn;
1425 	struct sockaddr *saddr, *smask = 0;
1426 	struct domain *dom;
1427 	int error;
1428 
1429 	if (argp->ex_addrlen == 0) {
1430 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1431 			return (EPERM);
1432 		np = &nep->ne_defexported;
1433 		np->netc_exflags = argp->ex_flags;
1434 		np->netc_anon = argp->ex_anon;
1435 		np->netc_anon.cr_ref = 1;
1436 		mp->mnt_flag |= MNT_DEFEXPORTED;
1437 		return (0);
1438 	}
1439 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1440 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1441 	bzero((caddr_t) np, i);
1442 	saddr = (struct sockaddr *) (np + 1);
1443 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1444 		goto out;
1445 	if (saddr->sa_len > argp->ex_addrlen)
1446 		saddr->sa_len = argp->ex_addrlen;
1447 	if (argp->ex_masklen) {
1448 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1449 		error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen);
1450 		if (error)
1451 			goto out;
1452 		if (smask->sa_len > argp->ex_masklen)
1453 			smask->sa_len = argp->ex_masklen;
1454 	}
1455 	i = saddr->sa_family;
1456 	if ((rnh = nep->ne_rtable[i]) == 0) {
1457 		/*
1458 		 * Seems silly to initialize every AF when most are not used,
1459 		 * do so on demand here
1460 		 */
1461 		for (dom = domains; dom; dom = dom->dom_next)
1462 			if (dom->dom_family == i && dom->dom_rtattach) {
1463 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1464 				    dom->dom_rtoffset);
1465 				break;
1466 			}
1467 		if ((rnh = nep->ne_rtable[i]) == 0) {
1468 			error = ENOBUFS;
1469 			goto out;
1470 		}
1471 	}
1472 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1473 	    np->netc_rnodes);
1474 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1475 		error = EPERM;
1476 		goto out;
1477 	}
1478 	np->netc_exflags = argp->ex_flags;
1479 	np->netc_anon = argp->ex_anon;
1480 	np->netc_anon.cr_ref = 1;
1481 	return (0);
1482 out:
1483 	free(np, M_NETADDR);
1484 	return (error);
1485 }
1486 
1487 /* ARGSUSED */
1488 static int
1489 vfs_free_netcred(struct radix_node *rn, void *w)
1490 {
1491 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1492 
1493 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1494 	free((caddr_t) rn, M_NETADDR);
1495 	return (0);
1496 }
1497 
1498 /*
1499  * Free the net address hash lists that are hanging off the mount points.
1500  */
1501 static void
1502 vfs_free_addrlist(struct netexport *nep)
1503 {
1504 	register int i;
1505 	register struct radix_node_head *rnh;
1506 
1507 	for (i = 0; i <= AF_MAX; i++)
1508 		if ((rnh = nep->ne_rtable[i])) {
1509 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1510 			    (caddr_t) rnh);
1511 			free((caddr_t) rnh, M_RTABLE);
1512 			nep->ne_rtable[i] = 0;
1513 		}
1514 }
1515 
1516 int
1517 vfs_export(mp, nep, argp)
1518 	struct mount *mp;
1519 	struct netexport *nep;
1520 	struct export_args *argp;
1521 {
1522 	int error;
1523 
1524 	if (argp->ex_flags & MNT_DELEXPORT) {
1525 		vfs_free_addrlist(nep);
1526 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1527 	}
1528 	if (argp->ex_flags & MNT_EXPORTED) {
1529 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
1530 			return (error);
1531 		mp->mnt_flag |= MNT_EXPORTED;
1532 	}
1533 	return (0);
1534 }
1535 
1536 struct netcred *
1537 vfs_export_lookup(mp, nep, nam)
1538 	register struct mount *mp;
1539 	struct netexport *nep;
1540 	struct mbuf *nam;
1541 {
1542 	register struct netcred *np;
1543 	register struct radix_node_head *rnh;
1544 	struct sockaddr *saddr;
1545 
1546 	np = NULL;
1547 	if (mp->mnt_flag & MNT_EXPORTED) {
1548 		/*
1549 		 * Lookup in the export list first.
1550 		 */
1551 		if (nam != NULL) {
1552 			saddr = mtod(nam, struct sockaddr *);
1553 			rnh = nep->ne_rtable[saddr->sa_family];
1554 			if (rnh != NULL) {
1555 				np = (struct netcred *)
1556 				    (*rnh->rnh_matchaddr) ((caddr_t) saddr,
1557 				    rnh);
1558 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1559 					np = NULL;
1560 			}
1561 		}
1562 		/*
1563 		 * If no address match, use the default if it exists.
1564 		 */
1565 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1566 			np = &nep->ne_defexported;
1567 	}
1568 	return (np);
1569 }
1570 
1571 
1572 /*
1573  * perform msync on all vnodes under a mount point
1574  * the mount point must be locked.
1575  */
1576 void
1577 vfs_msync(struct mount *mp, int flags) {
1578 	struct vnode *vp, *nvp;
1579 loop:
1580 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
1581 
1582 		if (vp->v_mount != mp)
1583 			goto loop;
1584 		nvp = vp->v_mntvnodes.le_next;
1585 		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
1586 			continue;
1587 		if (vp->v_object &&
1588 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1589 			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
1590 		}
1591 	}
1592 }
1593 
1594 /*
1595  * Create the VM object needed for VMIO and mmap support.  This
1596  * is done for all VREG files in the system.  Some filesystems might
1597  * afford the additional metadata buffering capability of the
1598  * VMIO code by making the device node be VMIO mode also.
1599  */
1600 int
1601 vfs_object_create(vp, p, cred, waslocked)
1602 	struct vnode *vp;
1603 	struct proc *p;
1604 	struct ucred *cred;
1605 	int waslocked;
1606 {
1607 	struct vattr vat;
1608 	vm_object_t object;
1609 	int error = 0;
1610 
1611 retry:
1612 	if ((object = vp->v_object) == NULL) {
1613 		if (vp->v_type == VREG) {
1614 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
1615 				goto retn;
1616 			(void) vnode_pager_alloc(vp,
1617 				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
1618 		} else {
1619 			/*
1620 			 * This simply allocates the biggest object possible
1621 			 * for a VBLK vnode.  This should be fixed, but doesn't
1622 			 * cause any problems (yet).
1623 			 */
1624 			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
1625 		}
1626 		vp->v_object->flags |= OBJ_VFS_REF;
1627 	} else {
1628 		if (object->flags & OBJ_DEAD) {
1629 			if (waslocked)
1630 				VOP_UNLOCK(vp);
1631 			tsleep(object, PVM, "vodead", 0);
1632 			if (waslocked)
1633 				VOP_LOCK(vp);
1634 			goto retry;
1635 		}
1636 		if ((object->flags & OBJ_VFS_REF) == 0) {
1637 			object->flags |= OBJ_VFS_REF;
1638 			vm_object_reference(object);
1639 		}
1640 	}
1641 	if (vp->v_object)
1642 		vp->v_flag |= VVMIO;
1643 
1644 retn:
1645 	return error;
1646 }
1647