xref: /freebsd/sys/kern/vfs_export.c (revision 0ea3482342b4d7d6e71f3007ce4dafe445c639fd)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
39  * $Id: vfs_subr.c,v 1.39 1995/11/09 08:13:48 bde Exp $
40  */
41 
42 /*
43  * External virtual filesystem routines
44  */
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/mount.h>
51 #include <sys/time.h>
52 #include <sys/vnode.h>
53 #include <sys/stat.h>
54 #include <sys/namei.h>
55 #include <sys/ucred.h>
56 #include <sys/buf.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/domain.h>
60 #include <sys/mbuf.h>
61 
62 #include <vm/vm.h>
63 #include <sys/sysctl.h>
64 
65 #include <miscfs/specfs/specdev.h>
66 
67 enum vtype iftovt_tab[16] = {
68 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
69 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
70 };
71 int vttoif_tab[9] = {
72 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
73 	S_IFSOCK, S_IFIFO, S_IFMT,
74 };
75 
76 /*
77  * Insq/Remq for the vnode usage lists.
78  */
79 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
80 #define	bufremvn(bp) {  \
81 	LIST_REMOVE(bp, b_vnbufs); \
82 	(bp)->b_vnbufs.le_next = NOLIST; \
83 }
84 
85 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
86 u_long freevnodes	= 0;
87 
88 struct mntlist mountlist;	/* mounted filesystem list */
89 
90 int desiredvnodes;
91 
92 /*
93  * Initialize the vnode management data structures.
94  */
95 void
96 vntblinit()
97 {
98 	desiredvnodes = maxproc + vm_object_cache_max;
99 
100 	TAILQ_INIT(&vnode_free_list);
101 	CIRCLEQ_INIT(&mountlist);
102 }
103 
104 /*
105  * Lock a filesystem.
106  * Used to prevent access to it while mounting and unmounting.
107  */
108 int
109 vfs_lock(mp)
110 	register struct mount *mp;
111 {
112 
113 	while (mp->mnt_flag & MNT_MLOCK) {
114 		mp->mnt_flag |= MNT_MWAIT;
115 		(void) tsleep((caddr_t) mp, PVFS, "vfslck", 0);
116 	}
117 	mp->mnt_flag |= MNT_MLOCK;
118 	return (0);
119 }
120 
121 /*
122  * Unlock a locked filesystem.
123  * Panic if filesystem is not locked.
124  */
125 void
126 vfs_unlock(mp)
127 	register struct mount *mp;
128 {
129 
130 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
131 		panic("vfs_unlock: not locked");
132 	mp->mnt_flag &= ~MNT_MLOCK;
133 	if (mp->mnt_flag & MNT_MWAIT) {
134 		mp->mnt_flag &= ~MNT_MWAIT;
135 		wakeup((caddr_t) mp);
136 	}
137 }
138 
139 /*
140  * Mark a mount point as busy.
141  * Used to synchronize access and to delay unmounting.
142  */
143 int
144 vfs_busy(mp)
145 	register struct mount *mp;
146 {
147 
148 	while (mp->mnt_flag & MNT_MPBUSY) {
149 		mp->mnt_flag |= MNT_MPWANT;
150 		(void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0);
151 	}
152 	if (mp->mnt_flag & MNT_UNMOUNT)
153 		return (1);
154 	mp->mnt_flag |= MNT_MPBUSY;
155 	return (0);
156 }
157 
158 /*
159  * Free a busy filesystem.
160  * Panic if filesystem is not busy.
161  */
162 void
163 vfs_unbusy(mp)
164 	register struct mount *mp;
165 {
166 
167 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
168 		panic("vfs_unbusy: not busy");
169 	mp->mnt_flag &= ~MNT_MPBUSY;
170 	if (mp->mnt_flag & MNT_MPWANT) {
171 		mp->mnt_flag &= ~MNT_MPWANT;
172 		wakeup((caddr_t) &mp->mnt_flag);
173 	}
174 }
175 
176 void
177 vfs_unmountroot(rootfs)
178 	struct mount *rootfs;
179 {
180 	struct mount *mp = rootfs;
181 	int error;
182 
183 	if (vfs_busy(mp)) {
184 		printf("failed to unmount root\n");
185 		return;
186 	}
187 	mp->mnt_flag |= MNT_UNMOUNT;
188 	if ((error = vfs_lock(mp))) {
189 		printf("lock of root filesystem failed (%d)\n", error);
190 		return;
191 	}
192 	vnode_pager_umount(mp);	/* release cached vnodes */
193 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
194 
195 	if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)))
196 		printf("sync of root filesystem failed (%d)\n", error);
197 
198 	if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) {
199 		printf("unmount of root filesystem failed (");
200 		if (error == EBUSY)
201 			printf("BUSY)\n");
202 		else
203 			printf("%d)\n", error);
204 	}
205 	mp->mnt_flag &= ~MNT_UNMOUNT;
206 	vfs_unbusy(mp);
207 }
208 
209 /*
210  * Unmount all filesystems.  Should only be called by halt().
211  */
212 void
213 vfs_unmountall()
214 {
215 	struct mount *mp, *nmp, *rootfs = NULL;
216 	int error;
217 
218 	/* unmount all but rootfs */
219 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
220 		nmp = mp->mnt_list.cqe_prev;
221 
222 		if (mp->mnt_flag & MNT_ROOTFS) {
223 			rootfs = mp;
224 			continue;
225 		}
226 		error = dounmount(mp, MNT_FORCE, initproc);
227 		if (error) {
228 			printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
229 			if (error == EBUSY)
230 				printf("BUSY)\n");
231 			else
232 				printf("%d)\n", error);
233 		}
234 	}
235 
236 	/* and finally... */
237 	if (rootfs) {
238 		vfs_unmountroot(rootfs);
239 	} else {
240 		printf("no root filesystem\n");
241 	}
242 }
243 
244 /*
245  * Lookup a mount point by filesystem identifier.
246  */
247 struct mount *
248 getvfs(fsid)
249 	fsid_t *fsid;
250 {
251 	register struct mount *mp;
252 
253 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
254 	    mp = mp->mnt_list.cqe_next) {
255 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
256 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
257 			return (mp);
258 	}
259 	return ((struct mount *) 0);
260 }
261 
262 /*
263  * Get a new unique fsid
264  */
265 void
266 getnewfsid(mp, mtype)
267 	struct mount *mp;
268 	int mtype;
269 {
270 	static u_short xxxfs_mntid;
271 
272 	fsid_t tfsid;
273 
274 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
275 	mp->mnt_stat.f_fsid.val[1] = mtype;
276 	if (xxxfs_mntid == 0)
277 		++xxxfs_mntid;
278 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
279 	tfsid.val[1] = mtype;
280 	if (mountlist.cqh_first != (void *)&mountlist) {
281 		while (getvfs(&tfsid)) {
282 			tfsid.val[0]++;
283 			xxxfs_mntid++;
284 		}
285 	}
286 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
287 }
288 
289 /*
290  * Set vnode attributes to VNOVAL
291  */
292 void
293 vattr_null(vap)
294 	register struct vattr *vap;
295 {
296 
297 	vap->va_type = VNON;
298 	vap->va_size = VNOVAL;
299 	vap->va_bytes = VNOVAL;
300 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
301 	    vap->va_fsid = vap->va_fileid =
302 	    vap->va_blocksize = vap->va_rdev =
303 	    vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
304 	    vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
305 	    vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
306 	    vap->va_flags = vap->va_gen = VNOVAL;
307 	vap->va_vaflags = 0;
308 }
309 
310 /*
311  * Routines having to do with the management of the vnode table.
312  */
313 extern vop_t **dead_vnodeop_p;
314 extern void vclean();
315 
316 /*
317  * Return the next vnode from the free list.
318  */
319 int
320 getnewvnode(tag, mp, vops, vpp)
321 	enum vtagtype tag;
322 	struct mount *mp;
323 	vop_t **vops;
324 	struct vnode **vpp;
325 {
326 	register struct vnode *vp;
327 
328 	vp = vnode_free_list.tqh_first;
329 	/*
330 	 * we allocate a new vnode if
331 	 * 	1. we don't have any free
332 	 *		Pretty obvious, we actually used to panic, but that
333 	 *		is a silly thing to do.
334 	 *	2. we havn't filled our pool yet
335 	 *		We don't want to trash the incore (VM-)vnodecache.
336 	 *	3. if less that 1/4th of our vnodes are free.
337 	 *		We don't want to trash the namei cache either.
338 	 */
339 	if (freevnodes < (numvnodes >> 2) ||
340 	    numvnodes < desiredvnodes ||
341 	    vp == NULL) {
342 		vp = (struct vnode *) malloc((u_long) sizeof *vp,
343 		    M_VNODE, M_WAITOK);
344 		bzero((char *) vp, sizeof *vp);
345 		numvnodes++;
346 	} else {
347 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
348 		freevnodes--;
349 
350 		if (vp->v_usecount)
351 			panic("free vnode isn't");
352 
353 		/* see comment on why 0xdeadb is set at end of vgone (below) */
354 		vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
355 		vp->v_lease = NULL;
356 		if (vp->v_type != VBAD)
357 			vgone(vp);
358 #ifdef DIAGNOSTIC
359 		{
360 			int s;
361 
362 			if (vp->v_data)
363 				panic("cleaned vnode isn't");
364 			s = splbio();
365 			if (vp->v_numoutput)
366 				panic("Clean vnode has pending I/O's");
367 			splx(s);
368 		}
369 #endif
370 		vp->v_flag = 0;
371 		vp->v_lastr = 0;
372 		vp->v_ralen = 0;
373 		vp->v_maxra = 0;
374 		vp->v_lastw = 0;
375 		vp->v_lasta = 0;
376 		vp->v_cstart = 0;
377 		vp->v_clen = 0;
378 		vp->v_socket = 0;
379 		vp->v_writecount = 0;	/* XXX */
380 	}
381 	vp->v_type = VNON;
382 	cache_purge(vp);
383 	vp->v_tag = tag;
384 	vp->v_op = vops;
385 	insmntque(vp, mp);
386 	*vpp = vp;
387 	vp->v_usecount = 1;
388 	vp->v_data = 0;
389 	return (0);
390 }
391 
392 /*
393  * Move a vnode from one mount queue to another.
394  */
395 void
396 insmntque(vp, mp)
397 	register struct vnode *vp;
398 	register struct mount *mp;
399 {
400 
401 	/*
402 	 * Delete from old mount point vnode list, if on one.
403 	 */
404 	if (vp->v_mount != NULL)
405 		LIST_REMOVE(vp, v_mntvnodes);
406 	/*
407 	 * Insert into list of vnodes for the new mount point, if available.
408 	 */
409 	if ((vp->v_mount = mp) == NULL)
410 		return;
411 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
412 }
413 
414 /*
415  * Update outstanding I/O count and do wakeup if requested.
416  */
417 void
418 vwakeup(bp)
419 	register struct buf *bp;
420 {
421 	register struct vnode *vp;
422 
423 	bp->b_flags &= ~B_WRITEINPROG;
424 	if ((vp = bp->b_vp)) {
425 		vp->v_numoutput--;
426 		if (vp->v_numoutput < 0)
427 			panic("vwakeup: neg numoutput");
428 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
429 			vp->v_flag &= ~VBWAIT;
430 			wakeup((caddr_t) &vp->v_numoutput);
431 		}
432 	}
433 }
434 
435 /*
436  * Flush out and invalidate all buffers associated with a vnode.
437  * Called with the underlying object locked.
438  */
439 int
440 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
441 	register struct vnode *vp;
442 	int flags;
443 	struct ucred *cred;
444 	struct proc *p;
445 	int slpflag, slptimeo;
446 {
447 	register struct buf *bp;
448 	struct buf *nbp, *blist;
449 	int s, error;
450 	vm_object_t object;
451 
452 	if (flags & V_SAVE) {
453 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
454 			return (error);
455 		if (vp->v_dirtyblkhd.lh_first != NULL)
456 			panic("vinvalbuf: dirty bufs");
457 	}
458 	for (;;) {
459 		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
460 			while (blist && blist->b_lblkno < 0)
461 				blist = blist->b_vnbufs.le_next;
462 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
463 		    (flags & V_SAVEMETA))
464 			while (blist && blist->b_lblkno < 0)
465 				blist = blist->b_vnbufs.le_next;
466 		if (!blist)
467 			break;
468 
469 		for (bp = blist; bp; bp = nbp) {
470 			nbp = bp->b_vnbufs.le_next;
471 			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
472 				continue;
473 			s = splbio();
474 			if (bp->b_flags & B_BUSY) {
475 				bp->b_flags |= B_WANTED;
476 				error = tsleep((caddr_t) bp,
477 				    slpflag | (PRIBIO + 1), "vinvalbuf",
478 				    slptimeo);
479 				splx(s);
480 				if (error)
481 					return (error);
482 				break;
483 			}
484 			bremfree(bp);
485 			bp->b_flags |= B_BUSY;
486 			splx(s);
487 			/*
488 			 * XXX Since there are no node locks for NFS, I
489 			 * believe there is a slight chance that a delayed
490 			 * write will occur while sleeping just above, so
491 			 * check for it.
492 			 */
493 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
494 				(void) VOP_BWRITE(bp);
495 				break;
496 			}
497 			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
498 			brelse(bp);
499 		}
500 	}
501 
502 	s = splbio();
503 	while (vp->v_numoutput > 0) {
504 		vp->v_flag |= VBWAIT;
505 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
506 	}
507 	splx(s);
508 
509 	/*
510 	 * Destroy the copy in the VM cache, too.
511 	 */
512 	object = vp->v_object;
513 	if (object != NULL) {
514 		vm_object_page_remove(object, 0, object->size,
515 		    (flags & V_SAVE) ? TRUE : FALSE);
516 	}
517 	if (!(flags & V_SAVEMETA) &&
518 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
519 		panic("vinvalbuf: flush failed");
520 	return (0);
521 }
522 
523 /*
524  * Associate a buffer with a vnode.
525  */
526 void
527 bgetvp(vp, bp)
528 	register struct vnode *vp;
529 	register struct buf *bp;
530 {
531 	int s;
532 
533 	if (bp->b_vp)
534 		panic("bgetvp: not free");
535 	VHOLD(vp);
536 	bp->b_vp = vp;
537 	if (vp->v_type == VBLK || vp->v_type == VCHR)
538 		bp->b_dev = vp->v_rdev;
539 	else
540 		bp->b_dev = NODEV;
541 	/*
542 	 * Insert onto list for new vnode.
543 	 */
544 	s = splbio();
545 	bufinsvn(bp, &vp->v_cleanblkhd);
546 	splx(s);
547 }
548 
549 /*
550  * Disassociate a buffer from a vnode.
551  */
552 void
553 brelvp(bp)
554 	register struct buf *bp;
555 {
556 	struct vnode *vp;
557 	int s;
558 
559 	if (bp->b_vp == (struct vnode *) 0)
560 		panic("brelvp: NULL");
561 	/*
562 	 * Delete from old vnode list, if on one.
563 	 */
564 	s = splbio();
565 	if (bp->b_vnbufs.le_next != NOLIST)
566 		bufremvn(bp);
567 	splx(s);
568 
569 	vp = bp->b_vp;
570 	bp->b_vp = (struct vnode *) 0;
571 	HOLDRELE(vp);
572 }
573 
574 /*
575  * Associate a p-buffer with a vnode.
576  */
577 void
578 pbgetvp(vp, bp)
579 	register struct vnode *vp;
580 	register struct buf *bp;
581 {
582 	if (bp->b_vp)
583 		panic("pbgetvp: not free");
584 	VHOLD(vp);
585 	bp->b_vp = vp;
586 	if (vp->v_type == VBLK || vp->v_type == VCHR)
587 		bp->b_dev = vp->v_rdev;
588 	else
589 		bp->b_dev = NODEV;
590 }
591 
592 /*
593  * Disassociate a p-buffer from a vnode.
594  */
595 void
596 pbrelvp(bp)
597 	register struct buf *bp;
598 {
599 	struct vnode *vp;
600 
601 	if (bp->b_vp == (struct vnode *) 0)
602 		panic("brelvp: NULL");
603 
604 	vp = bp->b_vp;
605 	bp->b_vp = (struct vnode *) 0;
606 	HOLDRELE(vp);
607 }
608 
609 /*
610  * Reassign a buffer from one vnode to another.
611  * Used to assign file specific control information
612  * (indirect blocks) to the vnode to which they belong.
613  */
614 void
615 reassignbuf(bp, newvp)
616 	register struct buf *bp;
617 	register struct vnode *newvp;
618 {
619 	register struct buflists *listheadp;
620 
621 	if (newvp == NULL) {
622 		printf("reassignbuf: NULL");
623 		return;
624 	}
625 	/*
626 	 * Delete from old vnode list, if on one.
627 	 */
628 	if (bp->b_vnbufs.le_next != NOLIST)
629 		bufremvn(bp);
630 	/*
631 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
632 	 * of clean buffers.
633 	 */
634 	if (bp->b_flags & B_DELWRI) {
635 		struct buf *tbp;
636 
637 		tbp = newvp->v_dirtyblkhd.lh_first;
638 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
639 			bufinsvn(bp, &newvp->v_dirtyblkhd);
640 		} else {
641 			while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
642 				tbp = tbp->b_vnbufs.le_next;
643 			}
644 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
645 		}
646 	} else {
647 		listheadp = &newvp->v_cleanblkhd;
648 		bufinsvn(bp, listheadp);
649 	}
650 }
651 
652 /*
653  * Create a vnode for a block device.
654  * Used for root filesystem, argdev, and swap areas.
655  * Also used for memory file system special devices.
656  */
657 int
658 bdevvp(dev, vpp)
659 	dev_t dev;
660 	struct vnode **vpp;
661 {
662 	register struct vnode *vp;
663 	struct vnode *nvp;
664 	int error;
665 
666 	if (dev == NODEV)
667 		return (0);
668 	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
669 	if (error) {
670 		*vpp = 0;
671 		return (error);
672 	}
673 	vp = nvp;
674 	vp->v_type = VBLK;
675 	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
676 		vput(vp);
677 		vp = nvp;
678 	}
679 	*vpp = vp;
680 	return (0);
681 }
682 
683 /*
684  * Check to see if the new vnode represents a special device
685  * for which we already have a vnode (either because of
686  * bdevvp() or because of a different vnode representing
687  * the same block device). If such an alias exists, deallocate
688  * the existing contents and return the aliased vnode. The
689  * caller is responsible for filling it with its new contents.
690  */
691 struct vnode *
692 checkalias(nvp, nvp_rdev, mp)
693 	register struct vnode *nvp;
694 	dev_t nvp_rdev;
695 	struct mount *mp;
696 {
697 	register struct vnode *vp;
698 	struct vnode **vpp;
699 
700 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
701 		return (NULLVP);
702 
703 	vpp = &speclisth[SPECHASH(nvp_rdev)];
704 loop:
705 	for (vp = *vpp; vp; vp = vp->v_specnext) {
706 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
707 			continue;
708 		/*
709 		 * Alias, but not in use, so flush it out.
710 		 */
711 		if (vp->v_usecount == 0) {
712 			vgone(vp);
713 			goto loop;
714 		}
715 		if (vget(vp, 1))
716 			goto loop;
717 		break;
718 	}
719 	if (vp == NULL || vp->v_tag != VT_NON) {
720 		MALLOC(nvp->v_specinfo, struct specinfo *,
721 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
722 		nvp->v_rdev = nvp_rdev;
723 		nvp->v_hashchain = vpp;
724 		nvp->v_specnext = *vpp;
725 		nvp->v_specflags = 0;
726 		*vpp = nvp;
727 		if (vp != NULL) {
728 			nvp->v_flag |= VALIASED;
729 			vp->v_flag |= VALIASED;
730 			vput(vp);
731 		}
732 		return (NULLVP);
733 	}
734 	VOP_UNLOCK(vp);
735 	vclean(vp, 0);
736 	vp->v_op = nvp->v_op;
737 	vp->v_tag = nvp->v_tag;
738 	nvp->v_type = VNON;
739 	insmntque(vp, mp);
740 	return (vp);
741 }
742 
743 /*
744  * Grab a particular vnode from the free list, increment its
745  * reference count and lock it. The vnode lock bit is set the
746  * vnode is being eliminated in vgone. The process is awakened
747  * when the transition is completed, and an error returned to
748  * indicate that the vnode is no longer usable (possibly having
749  * been changed to a new file system type).
750  */
751 int
752 vget(vp, lockflag)
753 	register struct vnode *vp;
754 	int lockflag;
755 {
756 
757 	/*
758 	 * If the vnode is in the process of being cleaned out for another
759 	 * use, we wait for the cleaning to finish and then return failure.
760 	 * Cleaning is determined either by checking that the VXLOCK flag is
761 	 * set, or that the use count is zero with the back pointer set to
762 	 * show that it has been removed from the free list by getnewvnode.
763 	 * The VXLOCK flag may not have been set yet because vclean is blocked
764 	 * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
765 	 */
766 	if ((vp->v_flag & VXLOCK) ||
767 	    (vp->v_usecount == 0 &&
768 		vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) {
769 		vp->v_flag |= VXWANT;
770 		(void) tsleep((caddr_t) vp, PINOD, "vget", 0);
771 		return (1);
772 	}
773 	if (vp->v_usecount == 0) {
774 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
775 		freevnodes--;
776 	}
777 	vp->v_usecount++;
778 	if (lockflag)
779 		VOP_LOCK(vp);
780 	return (0);
781 }
782 
783 /*
784  * Vnode reference, just increment the count
785  */
786 void
787 vref(vp)
788 	struct vnode *vp;
789 {
790 
791 	if (vp->v_usecount <= 0)
792 		panic("vref used where vget required");
793 	vp->v_usecount++;
794 }
795 
796 /*
797  * vput(), just unlock and vrele()
798  */
799 void
800 vput(vp)
801 	register struct vnode *vp;
802 {
803 
804 	VOP_UNLOCK(vp);
805 	vrele(vp);
806 }
807 
808 /*
809  * Vnode release.
810  * If count drops to zero, call inactive routine and return to freelist.
811  */
812 void
813 vrele(vp)
814 	register struct vnode *vp;
815 {
816 
817 #ifdef DIAGNOSTIC
818 	if (vp == NULL)
819 		panic("vrele: null vp");
820 #endif
821 	vp->v_usecount--;
822 	if (vp->v_usecount > 0)
823 		return;
824 #ifdef DIAGNOSTIC
825 	if (vp->v_usecount < 0 /* || vp->v_writecount < 0 */ ) {
826 		vprint("vrele: negative ref count", vp);
827 		panic("vrele: negative reference cnt");
828 	}
829 #endif
830 	if (vp->v_flag & VAGE) {
831 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
832 		vp->v_flag &= ~VAGE;
833 	} else {
834 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
835 	}
836 	freevnodes++;
837 
838 	VOP_INACTIVE(vp);
839 }
840 
841 #ifdef DIAGNOSTIC
842 /*
843  * Page or buffer structure gets a reference.
844  */
845 void
846 vhold(vp)
847 	register struct vnode *vp;
848 {
849 
850 	vp->v_holdcnt++;
851 }
852 
853 /*
854  * Page or buffer structure frees a reference.
855  */
856 void
857 holdrele(vp)
858 	register struct vnode *vp;
859 {
860 
861 	if (vp->v_holdcnt <= 0)
862 		panic("holdrele: holdcnt");
863 	vp->v_holdcnt--;
864 }
865 #endif /* DIAGNOSTIC */
866 
867 /*
868  * Remove any vnodes in the vnode table belonging to mount point mp.
869  *
870  * If MNT_NOFORCE is specified, there should not be any active ones,
871  * return error if any are found (nb: this is a user error, not a
872  * system error). If MNT_FORCE is specified, detach any active vnodes
873  * that are found.
874  */
875 #ifdef DIAGNOSTIC
876 int busyprt = 0;		/* print out busy vnodes */
877 struct ctldebug debug1 = {"busyprt", &busyprt};
878 
879 #endif
880 
881 int
882 vflush(mp, skipvp, flags)
883 	struct mount *mp;
884 	struct vnode *skipvp;
885 	int flags;
886 {
887 	register struct vnode *vp, *nvp;
888 	int busy = 0;
889 
890 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
891 		panic("vflush: not busy");
892 loop:
893 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
894 		/*
895 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
896 		 * Start over if it has (it won't be on the list anymore).
897 		 */
898 		if (vp->v_mount != mp)
899 			goto loop;
900 		nvp = vp->v_mntvnodes.le_next;
901 		/*
902 		 * Skip over a selected vnode.
903 		 */
904 		if (vp == skipvp)
905 			continue;
906 		/*
907 		 * Skip over a vnodes marked VSYSTEM.
908 		 */
909 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
910 			continue;
911 		/*
912 		 * If WRITECLOSE is set, only flush out regular file vnodes
913 		 * open for writing.
914 		 */
915 		if ((flags & WRITECLOSE) &&
916 		    (vp->v_writecount == 0 || vp->v_type != VREG))
917 			continue;
918 		/*
919 		 * With v_usecount == 0, all we need to do is clear out the
920 		 * vnode data structures and we are done.
921 		 */
922 		if (vp->v_usecount == 0) {
923 			vgone(vp);
924 			continue;
925 		}
926 		/*
927 		 * If FORCECLOSE is set, forcibly close the vnode. For block
928 		 * or character devices, revert to an anonymous device. For
929 		 * all other files, just kill them.
930 		 */
931 		if (flags & FORCECLOSE) {
932 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
933 				vgone(vp);
934 			} else {
935 				vclean(vp, 0);
936 				vp->v_op = spec_vnodeop_p;
937 				insmntque(vp, (struct mount *) 0);
938 			}
939 			continue;
940 		}
941 #ifdef DIAGNOSTIC
942 		if (busyprt)
943 			vprint("vflush: busy vnode", vp);
944 #endif
945 		busy++;
946 	}
947 	if (busy)
948 		return (EBUSY);
949 	return (0);
950 }
951 
952 /*
953  * Disassociate the underlying file system from a vnode.
954  */
955 void
956 vclean(vp, flags)
957 	register struct vnode *vp;
958 	int flags;
959 {
960 	int active;
961 
962 	/*
963 	 * Check to see if the vnode is in use. If so we have to reference it
964 	 * before we clean it out so that its count cannot fall to zero and
965 	 * generate a race against ourselves to recycle it.
966 	 */
967 	if ((active = vp->v_usecount))
968 		VREF(vp);
969 	/*
970 	 * Even if the count is zero, the VOP_INACTIVE routine may still have
971 	 * the object locked while it cleans it out. The VOP_LOCK ensures that
972 	 * the VOP_INACTIVE routine is done with its work. For active vnodes,
973 	 * it ensures that no other activity can occur while the underlying
974 	 * object is being cleaned out.
975 	 */
976 	VOP_LOCK(vp);
977 	/*
978 	 * Prevent the vnode from being recycled or brought into use while we
979 	 * clean it out.
980 	 */
981 	if (vp->v_flag & VXLOCK)
982 		panic("vclean: deadlock");
983 	vp->v_flag |= VXLOCK;
984 	/*
985 	 * Clean out any buffers associated with the vnode.
986 	 */
987 	if (flags & DOCLOSE)
988 		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
989 	/*
990 	 * Any other processes trying to obtain this lock must first wait for
991 	 * VXLOCK to clear, then call the new lock operation.
992 	 */
993 	VOP_UNLOCK(vp);
994 	/*
995 	 * If purging an active vnode, it must be closed and deactivated
996 	 * before being reclaimed.
997 	 */
998 	if (active) {
999 		if (flags & DOCLOSE)
1000 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1001 		VOP_INACTIVE(vp);
1002 	}
1003 	/*
1004 	 * Reclaim the vnode.
1005 	 */
1006 	if (VOP_RECLAIM(vp))
1007 		panic("vclean: cannot reclaim");
1008 	if (active)
1009 		vrele(vp);
1010 
1011 	/*
1012 	 * Done with purge, notify sleepers of the grim news.
1013 	 */
1014 	vp->v_op = dead_vnodeop_p;
1015 	vp->v_tag = VT_NON;
1016 	vp->v_flag &= ~VXLOCK;
1017 	if (vp->v_flag & VXWANT) {
1018 		vp->v_flag &= ~VXWANT;
1019 		wakeup((caddr_t) vp);
1020 	}
1021 }
1022 
1023 /*
1024  * Eliminate all activity associated with  the requested vnode
1025  * and with all vnodes aliased to the requested vnode.
1026  */
1027 void
1028 vgoneall(vp)
1029 	register struct vnode *vp;
1030 {
1031 	register struct vnode *vq;
1032 
1033 	if (vp->v_flag & VALIASED) {
1034 		/*
1035 		 * If a vgone (or vclean) is already in progress, wait until
1036 		 * it is done and return.
1037 		 */
1038 		if (vp->v_flag & VXLOCK) {
1039 			vp->v_flag |= VXWANT;
1040 			(void) tsleep((caddr_t) vp, PINOD, "vgall", 0);
1041 			return;
1042 		}
1043 		/*
1044 		 * Ensure that vp will not be vgone'd while we are eliminating
1045 		 * its aliases.
1046 		 */
1047 		vp->v_flag |= VXLOCK;
1048 		while (vp->v_flag & VALIASED) {
1049 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1050 				if (vq->v_rdev != vp->v_rdev ||
1051 				    vq->v_type != vp->v_type || vp == vq)
1052 					continue;
1053 				vgone(vq);
1054 				break;
1055 			}
1056 		}
1057 		/*
1058 		 * Remove the lock so that vgone below will really eliminate
1059 		 * the vnode after which time vgone will awaken any sleepers.
1060 		 */
1061 		vp->v_flag &= ~VXLOCK;
1062 	}
1063 	vgone(vp);
1064 }
1065 
1066 /*
1067  * Eliminate all activity associated with a vnode
1068  * in preparation for reuse.
1069  */
1070 void
1071 vgone(vp)
1072 	register struct vnode *vp;
1073 {
1074 	register struct vnode *vq;
1075 	struct vnode *vx;
1076 
1077 	/*
1078 	 * If a vgone (or vclean) is already in progress, wait until it is
1079 	 * done and return.
1080 	 */
1081 	if (vp->v_flag & VXLOCK) {
1082 		vp->v_flag |= VXWANT;
1083 		(void) tsleep((caddr_t) vp, PINOD, "vgone", 0);
1084 		return;
1085 	}
1086 	/*
1087 	 * Clean out the filesystem specific data.
1088 	 */
1089 	vclean(vp, DOCLOSE);
1090 	/*
1091 	 * Delete from old mount point vnode list, if on one.
1092 	 */
1093 	if (vp->v_mount != NULL) {
1094 		LIST_REMOVE(vp, v_mntvnodes);
1095 		vp->v_mount = NULL;
1096 	}
1097 	/*
1098 	 * If special device, remove it from special device alias list.
1099 	 */
1100 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
1101 		if (*vp->v_hashchain == vp) {
1102 			*vp->v_hashchain = vp->v_specnext;
1103 		} else {
1104 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1105 				if (vq->v_specnext != vp)
1106 					continue;
1107 				vq->v_specnext = vp->v_specnext;
1108 				break;
1109 			}
1110 			if (vq == NULL)
1111 				panic("missing bdev");
1112 		}
1113 		if (vp->v_flag & VALIASED) {
1114 			vx = NULL;
1115 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1116 				if (vq->v_rdev != vp->v_rdev ||
1117 				    vq->v_type != vp->v_type)
1118 					continue;
1119 				if (vx)
1120 					break;
1121 				vx = vq;
1122 			}
1123 			if (vx == NULL)
1124 				panic("missing alias");
1125 			if (vq == NULL)
1126 				vx->v_flag &= ~VALIASED;
1127 			vp->v_flag &= ~VALIASED;
1128 		}
1129 		FREE(vp->v_specinfo, M_VNODE);
1130 		vp->v_specinfo = NULL;
1131 	}
1132 	/*
1133 	 * If it is on the freelist and not already at the head, move it to
1134 	 * the head of the list. The test of the back pointer and the
1135 	 * reference count of zero is because it will be removed from the free
1136 	 * list by getnewvnode, but will not have its reference count
1137 	 * incremented until after calling vgone. If the reference count were
1138 	 * incremented first, vgone would (incorrectly) try to close the
1139 	 * previous instance of the underlying object. So, the back pointer is
1140 	 * explicitly set to `0xdeadb' in getnewvnode after removing it from
1141 	 * the freelist to ensure that we do not try to move it here.
1142 	 */
1143 	if (vp->v_usecount == 0 &&
1144 	    vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb &&
1145 	    vnode_free_list.tqh_first != vp) {
1146 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1147 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1148 	}
1149 	vp->v_type = VBAD;
1150 }
1151 
1152 /*
1153  * Lookup a vnode by device number.
1154  */
1155 int
1156 vfinddev(dev, type, vpp)
1157 	dev_t dev;
1158 	enum vtype type;
1159 	struct vnode **vpp;
1160 {
1161 	register struct vnode *vp;
1162 
1163 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1164 		if (dev != vp->v_rdev || type != vp->v_type)
1165 			continue;
1166 		*vpp = vp;
1167 		return (1);
1168 	}
1169 	return (0);
1170 }
1171 
1172 /*
1173  * Calculate the total number of references to a special device.
1174  */
1175 int
1176 vcount(vp)
1177 	register struct vnode *vp;
1178 {
1179 	register struct vnode *vq, *vnext;
1180 	int count;
1181 
1182 loop:
1183 	if ((vp->v_flag & VALIASED) == 0)
1184 		return (vp->v_usecount);
1185 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1186 		vnext = vq->v_specnext;
1187 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1188 			continue;
1189 		/*
1190 		 * Alias, but not in use, so flush it out.
1191 		 */
1192 		if (vq->v_usecount == 0 && vq != vp) {
1193 			vgone(vq);
1194 			goto loop;
1195 		}
1196 		count += vq->v_usecount;
1197 	}
1198 	return (count);
1199 }
1200 
1201 /*
1202  * Print out a description of a vnode.
1203  */
1204 static char *typename[] =
1205 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1206 
1207 void
1208 vprint(label, vp)
1209 	char *label;
1210 	register struct vnode *vp;
1211 {
1212 	char buf[64];
1213 
1214 	if (label != NULL)
1215 		printf("%s: ", label);
1216 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1217 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1218 	    vp->v_holdcnt);
1219 	buf[0] = '\0';
1220 	if (vp->v_flag & VROOT)
1221 		strcat(buf, "|VROOT");
1222 	if (vp->v_flag & VTEXT)
1223 		strcat(buf, "|VTEXT");
1224 	if (vp->v_flag & VSYSTEM)
1225 		strcat(buf, "|VSYSTEM");
1226 	if (vp->v_flag & VXLOCK)
1227 		strcat(buf, "|VXLOCK");
1228 	if (vp->v_flag & VXWANT)
1229 		strcat(buf, "|VXWANT");
1230 	if (vp->v_flag & VBWAIT)
1231 		strcat(buf, "|VBWAIT");
1232 	if (vp->v_flag & VALIASED)
1233 		strcat(buf, "|VALIASED");
1234 	if (buf[0] != '\0')
1235 		printf(" flags (%s)", &buf[1]);
1236 	if (vp->v_data == NULL) {
1237 		printf("\n");
1238 	} else {
1239 		printf("\n\t");
1240 		VOP_PRINT(vp);
1241 	}
1242 }
1243 
1244 #ifdef DDB
1245 /*
1246  * List all of the locked vnodes in the system.
1247  * Called when debugging the kernel.
1248  */
1249 void
1250 printlockedvnodes()
1251 {
1252 	register struct mount *mp;
1253 	register struct vnode *vp;
1254 
1255 	printf("Locked vnodes\n");
1256 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1257 	    mp = mp->mnt_list.cqe_next) {
1258 		for (vp = mp->mnt_vnodelist.lh_first;
1259 		    vp != NULL;
1260 		    vp = vp->v_mntvnodes.le_next)
1261 			if (VOP_ISLOCKED(vp))
1262 				vprint((char *) 0, vp);
1263 	}
1264 }
1265 #endif
1266 
1267 int kinfo_vdebug = 1;
1268 int kinfo_vgetfailed;
1269 
1270 #define KINFO_VNODESLOP	10
1271 /*
1272  * Dump vnode list (via sysctl).
1273  * Copyout address of vnode followed by vnode.
1274  */
1275 /* ARGSUSED */
1276 int
1277 sysctl_vnode(where, sizep)
1278 	char *where;
1279 	size_t *sizep;
1280 {
1281 	register struct mount *mp, *nmp;
1282 	struct vnode *vp;
1283 	register char *bp = where, *savebp;
1284 	char *ewhere;
1285 	int error;
1286 
1287 #define VPTRSZ	sizeof (struct vnode *)
1288 #define VNODESZ	sizeof (struct vnode)
1289 	if (where == NULL) {
1290 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1291 		return (0);
1292 	}
1293 	ewhere = where + *sizep;
1294 
1295 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1296 		nmp = mp->mnt_list.cqe_next;
1297 		if (vfs_busy(mp))
1298 			continue;
1299 		savebp = bp;
1300 again:
1301 		for (vp = mp->mnt_vnodelist.lh_first;
1302 		    vp != NULL;
1303 		    vp = vp->v_mntvnodes.le_next) {
1304 			/*
1305 			 * Check that the vp is still associated with this
1306 			 * filesystem.  RACE: could have been recycled onto
1307 			 * the same filesystem.
1308 			 */
1309 			if (vp->v_mount != mp) {
1310 				if (kinfo_vdebug)
1311 					printf("kinfo: vp changed\n");
1312 				bp = savebp;
1313 				goto again;
1314 			}
1315 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1316 				vfs_unbusy(mp);
1317 				*sizep = bp - where;
1318 				return (ENOMEM);
1319 			}
1320 			if ((error = copyout(&vp, bp, VPTRSZ)) ||
1321 			    (error = copyout(vp, bp + VPTRSZ, VNODESZ))) {
1322 				vfs_unbusy(mp);
1323 				*sizep = bp - where;
1324 				return (error);
1325 			}
1326 			bp += VPTRSZ + VNODESZ;
1327 		}
1328 		vfs_unbusy(mp);
1329 	}
1330 
1331 	*sizep = bp - where;
1332 	return (0);
1333 }
1334 
1335 /*
1336  * Check to see if a filesystem is mounted on a block device.
1337  */
1338 int
1339 vfs_mountedon(vp)
1340 	register struct vnode *vp;
1341 {
1342 	register struct vnode *vq;
1343 
1344 	if (vp->v_specflags & SI_MOUNTEDON)
1345 		return (EBUSY);
1346 	if (vp->v_flag & VALIASED) {
1347 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1348 			if (vq->v_rdev != vp->v_rdev ||
1349 			    vq->v_type != vp->v_type)
1350 				continue;
1351 			if (vq->v_specflags & SI_MOUNTEDON)
1352 				return (EBUSY);
1353 		}
1354 	}
1355 	return (0);
1356 }
1357 
1358 /*
1359  * Build hash lists of net addresses and hang them off the mount point.
1360  * Called by ufs_mount() to set up the lists of export addresses.
1361  */
1362 static int
1363 vfs_hang_addrlist(mp, nep, argp)
1364 	struct mount *mp;
1365 	struct netexport *nep;
1366 	struct export_args *argp;
1367 {
1368 	register struct netcred *np;
1369 	register struct radix_node_head *rnh;
1370 	register int i;
1371 	struct radix_node *rn;
1372 	struct sockaddr *saddr, *smask = 0;
1373 	struct domain *dom;
1374 	int error;
1375 
1376 	if (argp->ex_addrlen == 0) {
1377 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1378 			return (EPERM);
1379 		np = &nep->ne_defexported;
1380 		np->netc_exflags = argp->ex_flags;
1381 		np->netc_anon = argp->ex_anon;
1382 		np->netc_anon.cr_ref = 1;
1383 		mp->mnt_flag |= MNT_DEFEXPORTED;
1384 		return (0);
1385 	}
1386 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1387 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1388 	bzero((caddr_t) np, i);
1389 	saddr = (struct sockaddr *) (np + 1);
1390 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1391 		goto out;
1392 	if (saddr->sa_len > argp->ex_addrlen)
1393 		saddr->sa_len = argp->ex_addrlen;
1394 	if (argp->ex_masklen) {
1395 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1396 		error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen);
1397 		if (error)
1398 			goto out;
1399 		if (smask->sa_len > argp->ex_masklen)
1400 			smask->sa_len = argp->ex_masklen;
1401 	}
1402 	i = saddr->sa_family;
1403 	if ((rnh = nep->ne_rtable[i]) == 0) {
1404 		/*
1405 		 * Seems silly to initialize every AF when most are not used,
1406 		 * do so on demand here
1407 		 */
1408 		for (dom = domains; dom; dom = dom->dom_next)
1409 			if (dom->dom_family == i && dom->dom_rtattach) {
1410 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1411 				    dom->dom_rtoffset);
1412 				break;
1413 			}
1414 		if ((rnh = nep->ne_rtable[i]) == 0) {
1415 			error = ENOBUFS;
1416 			goto out;
1417 		}
1418 	}
1419 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1420 	    np->netc_rnodes);
1421 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1422 		error = EPERM;
1423 		goto out;
1424 	}
1425 	np->netc_exflags = argp->ex_flags;
1426 	np->netc_anon = argp->ex_anon;
1427 	np->netc_anon.cr_ref = 1;
1428 	return (0);
1429 out:
1430 	free(np, M_NETADDR);
1431 	return (error);
1432 }
1433 
1434 /* ARGSUSED */
1435 static int
1436 vfs_free_netcred(rn, w)
1437 	struct radix_node *rn;
1438 	void *w;
1439 {
1440 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1441 
1442 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1443 	free((caddr_t) rn, M_NETADDR);
1444 	return (0);
1445 }
1446 
1447 /*
1448  * Free the net address hash lists that are hanging off the mount points.
1449  */
1450 static void
1451 vfs_free_addrlist(nep)
1452 	struct netexport *nep;
1453 {
1454 	register int i;
1455 	register struct radix_node_head *rnh;
1456 
1457 	for (i = 0; i <= AF_MAX; i++)
1458 		if ((rnh = nep->ne_rtable[i])) {
1459 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1460 			    (caddr_t) rnh);
1461 			free((caddr_t) rnh, M_RTABLE);
1462 			nep->ne_rtable[i] = 0;
1463 		}
1464 }
1465 
1466 int
1467 vfs_export(mp, nep, argp)
1468 	struct mount *mp;
1469 	struct netexport *nep;
1470 	struct export_args *argp;
1471 {
1472 	int error;
1473 
1474 	if (argp->ex_flags & MNT_DELEXPORT) {
1475 		vfs_free_addrlist(nep);
1476 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1477 	}
1478 	if (argp->ex_flags & MNT_EXPORTED) {
1479 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
1480 			return (error);
1481 		mp->mnt_flag |= MNT_EXPORTED;
1482 	}
1483 	return (0);
1484 }
1485 
1486 struct netcred *
1487 vfs_export_lookup(mp, nep, nam)
1488 	register struct mount *mp;
1489 	struct netexport *nep;
1490 	struct mbuf *nam;
1491 {
1492 	register struct netcred *np;
1493 	register struct radix_node_head *rnh;
1494 	struct sockaddr *saddr;
1495 
1496 	np = NULL;
1497 	if (mp->mnt_flag & MNT_EXPORTED) {
1498 		/*
1499 		 * Lookup in the export list first.
1500 		 */
1501 		if (nam != NULL) {
1502 			saddr = mtod(nam, struct sockaddr *);
1503 			rnh = nep->ne_rtable[saddr->sa_family];
1504 			if (rnh != NULL) {
1505 				np = (struct netcred *)
1506 				    (*rnh->rnh_matchaddr) ((caddr_t) saddr,
1507 				    rnh);
1508 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1509 					np = NULL;
1510 			}
1511 		}
1512 		/*
1513 		 * If no address match, use the default if it exists.
1514 		 */
1515 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1516 			np = &nep->ne_defexported;
1517 	}
1518 	return (np);
1519 }
1520 
1521 
1522 /*
1523  * perform msync on all vnodes under a mount point
1524  * the mount point must be locked.
1525  */
1526 void
1527 vfs_msync(struct mount *mp, int flags) {
1528 	struct vnode *vp;
1529 loop:
1530 	for (vp = mp->mnt_vnodelist.lh_first;
1531 	     vp != NULL;
1532 	     vp = vp->v_mntvnodes.le_next) {
1533 
1534 		if (vp->v_mount != mp)
1535 			goto loop;
1536 		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
1537 			continue;
1538 		if (vp->v_object &&
1539 		   (((vm_object_t) vp->v_object)->flags & OBJ_MIGHTBEDIRTY)) {
1540 			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
1541 		}
1542 	}
1543 }
1544