xref: /freebsd/sys/kern/vfs_export.c (revision 21a9039725ea6b859a4115ff4b7f39d12d81a1ca)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1989, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38996c772fSJohn Dyson  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39c3aac50fSPeter Wemm  * $FreeBSD$
40df8bae1dSRodney W. Grimes  */
41df8bae1dSRodney W. Grimes 
42df8bae1dSRodney W. Grimes /*
43df8bae1dSRodney W. Grimes  * External virtual filesystem routines
44df8bae1dSRodney W. Grimes  */
450e41ee30SGarrett Wollman #include "opt_ddb.h"
463275cf73SPoul-Henning Kamp #include "opt_ffs.h"
47df8bae1dSRodney W. Grimes 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
509626b608SPoul-Henning Kamp #include <sys/bio.h>
515e950839SLuoqi Chen #include <sys/buf.h>
525e950839SLuoqi Chen #include <sys/conf.h>
535e950839SLuoqi Chen #include <sys/dirent.h>
545e950839SLuoqi Chen #include <sys/domain.h>
555e950839SLuoqi Chen #include <sys/eventhandler.h>
564d948813SBruce Evans #include <sys/fcntl.h>
57986f4ce7SBruce Evans #include <sys/kernel.h>
589c8b8baaSPeter Wemm #include <sys/kthread.h>
590384fff8SJason Evans #include <sys/ktr.h>
60a1c995b6SPoul-Henning Kamp #include <sys/malloc.h>
61df8bae1dSRodney W. Grimes #include <sys/mount.h>
62e12d97d2SEivind Eklund #include <sys/namei.h>
635e950839SLuoqi Chen #include <sys/proc.h>
645e950839SLuoqi Chen #include <sys/reboot.h>
65771b51efSBruce Evans #include <sys/socket.h>
66df8bae1dSRodney W. Grimes #include <sys/stat.h>
675e950839SLuoqi Chen #include <sys/sysctl.h>
682be70f79SJohn Dyson #include <sys/vmmeter.h>
695e950839SLuoqi Chen #include <sys/vnode.h>
70df8bae1dSRodney W. Grimes 
71d3114049SBruce Evans #include <machine/limits.h>
720384fff8SJason Evans #include <machine/mutex.h>
73d3114049SBruce Evans 
74df8bae1dSRodney W. Grimes #include <vm/vm.h>
75efeaf95aSDavid Greenman #include <vm/vm_object.h>
76efeaf95aSDavid Greenman #include <vm/vm_extern.h>
771efb74fbSJohn Dyson #include <vm/pmap.h>
781efb74fbSJohn Dyson #include <vm/vm_map.h>
791c7c3c6aSMatthew Dillon #include <vm/vm_page.h>
8047221757SJohn Dyson #include <vm/vm_pager.h>
816476c0d2SJohn Dyson #include <vm/vnode_pager.h>
822d8acc0fSJohn Dyson #include <vm/vm_zone.h>
83df8bae1dSRodney W. Grimes 
84a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
8555166637SPoul-Henning Kamp 
86cb451ebdSBruce Evans static void	insmntque __P((struct vnode *vp, struct mount *mp));
87996c772fSJohn Dyson static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
88cb451ebdSBruce Evans static unsigned long	numvnodes;
89b15a966eSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
9098d93822SBruce Evans 
91df8bae1dSRodney W. Grimes enum vtype iftovt_tab[16] = {
92df8bae1dSRodney W. Grimes 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
93df8bae1dSRodney W. Grimes 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
94df8bae1dSRodney W. Grimes };
95df8bae1dSRodney W. Grimes int vttoif_tab[9] = {
96df8bae1dSRodney W. Grimes 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
97df8bae1dSRodney W. Grimes 	S_IFSOCK, S_IFIFO, S_IFMT,
98df8bae1dSRodney W. Grimes };
99df8bae1dSRodney W. Grimes 
100e3975643SJake Burkholder static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
101925a3a41SJohn Dyson 
10287b1940aSPoul-Henning Kamp static u_long wantfreevnodes = 25;
10300544193SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
104cba2a7c6SBruce Evans static u_long freevnodes = 0;
105a051452aSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
106fbd6e6c9SPoul-Henning Kamp 
107e929c00dSKirk McKusick static int reassignbufcalls;
108e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
109e929c00dSKirk McKusick static int reassignbufloops;
110e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
111e929c00dSKirk McKusick static int reassignbufsortgood;
112e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
113e929c00dSKirk McKusick static int reassignbufsortbad;
114e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
115e929c00dSKirk McKusick static int reassignbufmethod = 1;
116e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
117e929c00dSKirk McKusick 
118bef608bdSJohn Dyson #ifdef ENABLE_VFS_IOOPT
119ad8ac923SKirk McKusick int vfs_ioopt = 0;
12060f8d464SJohn Dyson SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
12126300b34SJohn Dyson #endif
12260f8d464SJohn Dyson 
1230429e37aSPoul-Henning Kamp struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */
124996c772fSJohn Dyson struct simplelock mountlist_slock;
125996c772fSJohn Dyson struct simplelock mntvnode_slock;
126500b04a2SBruce Evans int	nfs_mount_type = -1;
127289bdf33SBruce Evans #ifndef NULL_SIMPLELOCKS
128289bdf33SBruce Evans static struct simplelock mntid_slock;
129303b270bSEivind Eklund static struct simplelock vnode_free_list_slock;
130996c772fSJohn Dyson static struct simplelock spechash_slock;
131289bdf33SBruce Evans #endif
132f6b4c285SDoug Rabson struct nfs_public nfs_pub;	/* publicly exported FS */
1332d8acc0fSJohn Dyson static vm_zone_t vnode_zone;
134a8b1f9d2SPoul-Henning Kamp int	prtactive = 0;		/* 1 => print out reclaim of active vnodes */
135df8bae1dSRodney W. Grimes 
136b1897c19SJulian Elischer /*
137b1897c19SJulian Elischer  * The workitem queue.
138b1897c19SJulian Elischer  */
139b1897c19SJulian Elischer #define SYNCER_MAXDELAY		32
140db878ba4SEivind Eklund static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
141e4ab40bcSKirk McKusick time_t syncdelay = 30;		/* max time to delay syncing data */
142e4ab40bcSKirk McKusick time_t filedelay = 30;		/* time to delay syncing files */
143e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
14467812eacSKirk McKusick time_t dirdelay = 29;		/* time to delay syncing directories */
145e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
14667812eacSKirk McKusick time_t metadelay = 28;		/* time to delay syncing metadata */
147e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
148e4ab40bcSKirk McKusick static int rushjob;			/* number of slots to run ASAP */
149e4ab40bcSKirk McKusick static int stat_rush_requests;	/* number of times I/O speeded up */
150e4ab40bcSKirk McKusick SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
151b1897c19SJulian Elischer 
152b1897c19SJulian Elischer static int syncer_delayno = 0;
153b1897c19SJulian Elischer static long syncer_mask;
154e3975643SJake Burkholder LIST_HEAD(synclist, vnode);
155b1897c19SJulian Elischer static struct synclist *syncer_workitem_pending;
156b1897c19SJulian Elischer 
1570d94caffSDavid Greenman int desiredvnodes;
1583d177f46SBill Fumerola SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
1593d177f46SBill Fumerola     &desiredvnodes, 0, "Maximum number of vnodes");
1600d94caffSDavid Greenman 
16198d93822SBruce Evans static void	vfs_free_addrlist __P((struct netexport *nep));
16298d93822SBruce Evans static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
16398d93822SBruce Evans static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
16498d93822SBruce Evans 				       struct export_args *argp));
16598d93822SBruce Evans 
166df8bae1dSRodney W. Grimes /*
167df8bae1dSRodney W. Grimes  * Initialize the vnode management data structures.
168df8bae1dSRodney W. Grimes  */
16926f9a767SRodney W. Grimes void
170df8bae1dSRodney W. Grimes vntblinit()
171df8bae1dSRodney W. Grimes {
172df8bae1dSRodney W. Grimes 
1732be70f79SJohn Dyson 	desiredvnodes = maxproc + cnt.v_page_count / 4;
174996c772fSJohn Dyson 	simple_lock_init(&mntvnode_slock);
175996c772fSJohn Dyson 	simple_lock_init(&mntid_slock);
176996c772fSJohn Dyson 	simple_lock_init(&spechash_slock);
177df8bae1dSRodney W. Grimes 	TAILQ_INIT(&vnode_free_list);
178996c772fSJohn Dyson 	simple_lock_init(&vnode_free_list_slock);
1792d8acc0fSJohn Dyson 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
180b1897c19SJulian Elischer 	/*
181b1897c19SJulian Elischer 	 * Initialize the filesystem syncer.
182b1897c19SJulian Elischer 	 */
183b1897c19SJulian Elischer 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
184b1897c19SJulian Elischer 		&syncer_mask);
185b1897c19SJulian Elischer 	syncer_maxdelay = syncer_mask + 1;
186df8bae1dSRodney W. Grimes }
187df8bae1dSRodney W. Grimes 
188df8bae1dSRodney W. Grimes /*
189996c772fSJohn Dyson  * Mark a mount point as busy. Used to synchronize access and to delay
190996c772fSJohn Dyson  * unmounting. Interlock is not released on failure.
191df8bae1dSRodney W. Grimes  */
19226f9a767SRodney W. Grimes int
193996c772fSJohn Dyson vfs_busy(mp, flags, interlkp, p)
194996c772fSJohn Dyson 	struct mount *mp;
195996c772fSJohn Dyson 	int flags;
196996c772fSJohn Dyson 	struct simplelock *interlkp;
197996c772fSJohn Dyson 	struct proc *p;
198df8bae1dSRodney W. Grimes {
199996c772fSJohn Dyson 	int lkflags;
200df8bae1dSRodney W. Grimes 
201b1f4a44bSJulian Elischer 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
202996c772fSJohn Dyson 		if (flags & LK_NOWAIT)
203996c772fSJohn Dyson 			return (ENOENT);
204b1f4a44bSJulian Elischer 		mp->mnt_kern_flag |= MNTK_MWAIT;
205996c772fSJohn Dyson 		if (interlkp) {
206996c772fSJohn Dyson 			simple_unlock(interlkp);
207df8bae1dSRodney W. Grimes 		}
208df8bae1dSRodney W. Grimes 		/*
209996c772fSJohn Dyson 		 * Since all busy locks are shared except the exclusive
210996c772fSJohn Dyson 		 * lock granted when unmounting, the only place that a
211996c772fSJohn Dyson 		 * wakeup needs to be done is at the release of the
212996c772fSJohn Dyson 		 * exclusive lock at the end of dounmount.
213df8bae1dSRodney W. Grimes 		 */
214996c772fSJohn Dyson 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
215996c772fSJohn Dyson 		if (interlkp) {
216996c772fSJohn Dyson 			simple_lock(interlkp);
217df8bae1dSRodney W. Grimes 		}
218996c772fSJohn Dyson 		return (ENOENT);
219df8bae1dSRodney W. Grimes 	}
2208f9110f6SJohn Dyson 	lkflags = LK_SHARED | LK_NOPAUSE;
221996c772fSJohn Dyson 	if (interlkp)
222996c772fSJohn Dyson 		lkflags |= LK_INTERLOCK;
223996c772fSJohn Dyson 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
224996c772fSJohn Dyson 		panic("vfs_busy: unexpected lock failure");
225df8bae1dSRodney W. Grimes 	return (0);
226df8bae1dSRodney W. Grimes }
227df8bae1dSRodney W. Grimes 
228df8bae1dSRodney W. Grimes /*
229df8bae1dSRodney W. Grimes  * Free a busy filesystem.
230df8bae1dSRodney W. Grimes  */
23126f9a767SRodney W. Grimes void
232996c772fSJohn Dyson vfs_unbusy(mp, p)
233996c772fSJohn Dyson 	struct mount *mp;
234996c772fSJohn Dyson 	struct proc *p;
235df8bae1dSRodney W. Grimes {
236df8bae1dSRodney W. Grimes 
237996c772fSJohn Dyson 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
238e0e9c421SDavid Greenman }
239e0e9c421SDavid Greenman 
240e0e9c421SDavid Greenman /*
241996c772fSJohn Dyson  * Lookup a filesystem type, and if found allocate and initialize
242996c772fSJohn Dyson  * a mount structure for it.
243996c772fSJohn Dyson  *
244996c772fSJohn Dyson  * Devname is usually updated by mount(8) after booting.
245e0e9c421SDavid Greenman  */
246996c772fSJohn Dyson int
247996c772fSJohn Dyson vfs_rootmountalloc(fstypename, devname, mpp)
248996c772fSJohn Dyson 	char *fstypename;
249996c772fSJohn Dyson 	char *devname;
250996c772fSJohn Dyson 	struct mount **mpp;
251e0e9c421SDavid Greenman {
252996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
253996c772fSJohn Dyson 	struct vfsconf *vfsp;
254996c772fSJohn Dyson 	struct mount *mp;
255996c772fSJohn Dyson 
256ecbb00a2SDoug Rabson 	if (fstypename == NULL)
257ecbb00a2SDoug Rabson 		return (ENODEV);
258996c772fSJohn Dyson 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
259996c772fSJohn Dyson 		if (!strcmp(vfsp->vfc_name, fstypename))
260996c772fSJohn Dyson 			break;
261996c772fSJohn Dyson 	if (vfsp == NULL)
262996c772fSJohn Dyson 		return (ENODEV);
263996c772fSJohn Dyson 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
264996c772fSJohn Dyson 	bzero((char *)mp, (u_long)sizeof(struct mount));
2658f9110f6SJohn Dyson 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
266996c772fSJohn Dyson 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
267996c772fSJohn Dyson 	LIST_INIT(&mp->mnt_vnodelist);
268996c772fSJohn Dyson 	mp->mnt_vfc = vfsp;
269996c772fSJohn Dyson 	mp->mnt_op = vfsp->vfc_vfsops;
270996c772fSJohn Dyson 	mp->mnt_flag = MNT_RDONLY;
271996c772fSJohn Dyson 	mp->mnt_vnodecovered = NULLVP;
272996c772fSJohn Dyson 	vfsp->vfc_refcount++;
2731b5464efSPoul-Henning Kamp 	mp->mnt_iosize_max = DFLTPHYS;
274996c772fSJohn Dyson 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
275996c772fSJohn Dyson 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
276996c772fSJohn Dyson 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
277996c772fSJohn Dyson 	mp->mnt_stat.f_mntonname[0] = '/';
278996c772fSJohn Dyson 	mp->mnt_stat.f_mntonname[1] = 0;
279996c772fSJohn Dyson 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
280996c772fSJohn Dyson 	*mpp = mp;
281996c772fSJohn Dyson 	return (0);
282996c772fSJohn Dyson }
283996c772fSJohn Dyson 
284996c772fSJohn Dyson /*
285996c772fSJohn Dyson  * Find an appropriate filesystem to use for the root. If a filesystem
286996c772fSJohn Dyson  * has not been preselected, walk through the list of known filesystems
287996c772fSJohn Dyson  * trying those that have mountroot routines, and try them until one
288996c772fSJohn Dyson  * works or we have tried them all.
289996c772fSJohn Dyson  */
290996c772fSJohn Dyson #ifdef notdef	/* XXX JH */
291996c772fSJohn Dyson int
292514ede09SBruce Evans lite2_vfs_mountroot()
293996c772fSJohn Dyson {
294996c772fSJohn Dyson 	struct vfsconf *vfsp;
295514ede09SBruce Evans 	extern int (*lite2_mountroot) __P((void));
296e0e9c421SDavid Greenman 	int error;
297e0e9c421SDavid Greenman 
298996c772fSJohn Dyson 	if (lite2_mountroot != NULL)
299996c772fSJohn Dyson 		return ((*lite2_mountroot)());
300996c772fSJohn Dyson 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
301996c772fSJohn Dyson 		if (vfsp->vfc_mountroot == NULL)
302e0e9c421SDavid Greenman 			continue;
303996c772fSJohn Dyson 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
304996c772fSJohn Dyson 			return (0);
305996c772fSJohn Dyson 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
306e0e9c421SDavid Greenman 	}
307996c772fSJohn Dyson 	return (ENODEV);
308e0e9c421SDavid Greenman }
309996c772fSJohn Dyson #endif
310e0e9c421SDavid Greenman 
311df8bae1dSRodney W. Grimes /*
312df8bae1dSRodney W. Grimes  * Lookup a mount point by filesystem identifier.
313df8bae1dSRodney W. Grimes  */
314df8bae1dSRodney W. Grimes struct mount *
315996c772fSJohn Dyson vfs_getvfs(fsid)
316df8bae1dSRodney W. Grimes 	fsid_t *fsid;
317df8bae1dSRodney W. Grimes {
318df8bae1dSRodney W. Grimes 	register struct mount *mp;
319df8bae1dSRodney W. Grimes 
320996c772fSJohn Dyson 	simple_lock(&mountlist_slock);
3210429e37aSPoul-Henning Kamp 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
322df8bae1dSRodney W. Grimes 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
323996c772fSJohn Dyson 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
324996c772fSJohn Dyson 			simple_unlock(&mountlist_slock);
325df8bae1dSRodney W. Grimes 			return (mp);
326df8bae1dSRodney W. Grimes 	    }
327996c772fSJohn Dyson 	}
328996c772fSJohn Dyson 	simple_unlock(&mountlist_slock);
329df8bae1dSRodney W. Grimes 	return ((struct mount *) 0);
330df8bae1dSRodney W. Grimes }
331df8bae1dSRodney W. Grimes 
332df8bae1dSRodney W. Grimes /*
33305ecdd70SBruce Evans  * Get a new unique fsid.  Try to make its val[0] unique, since this value
33405ecdd70SBruce Evans  * will be used to create fake device numbers for stat().  Also try (but
33505ecdd70SBruce Evans  * not so hard) make its val[0] unique mod 2^16, since some emulators only
33605ecdd70SBruce Evans  * support 16-bit device numbers.  We end up with unique val[0]'s for the
33705ecdd70SBruce Evans  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
338e6f71111SMatthew Dillon  *
33961214975SBruce Evans  * Keep in mind that several mounts may be running in parallel.  Starting
34005ecdd70SBruce Evans  * the search one past where the previous search terminated is both a
34105ecdd70SBruce Evans  * micro-optimization and a defense against returning the same fsid to
34205ecdd70SBruce Evans  * different mounts.
343df8bae1dSRodney W. Grimes  */
344df8bae1dSRodney W. Grimes void
345996c772fSJohn Dyson vfs_getnewfsid(mp)
346df8bae1dSRodney W. Grimes 	struct mount *mp;
347df8bae1dSRodney W. Grimes {
34805ecdd70SBruce Evans 	static u_int16_t mntid_base;
349df8bae1dSRodney W. Grimes 	fsid_t tfsid;
35005ecdd70SBruce Evans 	int mtype;
351df8bae1dSRodney W. Grimes 
352996c772fSJohn Dyson 	simple_lock(&mntid_slock);
353996c772fSJohn Dyson 	mtype = mp->mnt_vfc->vfc_typenum;
354df8bae1dSRodney W. Grimes 	tfsid.val[1] = mtype;
3553660ebc2SBoris Popov 	mtype = (mtype & 0xFF) << 24;
35605ecdd70SBruce Evans 	for (;;) {
3573660ebc2SBoris Popov 		tfsid.val[0] = makeudev(255,
3583660ebc2SBoris Popov 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
3593660ebc2SBoris Popov 		mntid_base++;
360e6f71111SMatthew Dillon 		if (vfs_getvfs(&tfsid) == NULL)
361e6f71111SMatthew Dillon 			break;
362df8bae1dSRodney W. Grimes 	}
363df8bae1dSRodney W. Grimes 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
364e6f71111SMatthew Dillon 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
365996c772fSJohn Dyson 	simple_unlock(&mntid_slock);
366df8bae1dSRodney W. Grimes }
367df8bae1dSRodney W. Grimes 
368df8bae1dSRodney W. Grimes /*
369a2801b77SJohn Polstra  * Knob to control the precision of file timestamps:
370a2801b77SJohn Polstra  *
371a2801b77SJohn Polstra  *   0 = seconds only; nanoseconds zeroed.
372a2801b77SJohn Polstra  *   1 = seconds and nanoseconds, accurate within 1/HZ.
373a2801b77SJohn Polstra  *   2 = seconds and nanoseconds, truncated to microseconds.
374a2801b77SJohn Polstra  * >=3 = seconds and nanoseconds, maximum precision.
375a2801b77SJohn Polstra  */
376a2801b77SJohn Polstra enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
377a2801b77SJohn Polstra 
378a2801b77SJohn Polstra static int timestamp_precision = TSP_SEC;
379a2801b77SJohn Polstra SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
380a2801b77SJohn Polstra     &timestamp_precision, 0, "");
381a2801b77SJohn Polstra 
382a2801b77SJohn Polstra /*
383a2801b77SJohn Polstra  * Get a current timestamp.
384a2801b77SJohn Polstra  */
385a2801b77SJohn Polstra void
386a2801b77SJohn Polstra vfs_timestamp(tsp)
387a2801b77SJohn Polstra 	struct timespec *tsp;
388a2801b77SJohn Polstra {
389a2801b77SJohn Polstra 	struct timeval tv;
390a2801b77SJohn Polstra 
391a2801b77SJohn Polstra 	switch (timestamp_precision) {
392a2801b77SJohn Polstra 	case TSP_SEC:
393a2801b77SJohn Polstra 		tsp->tv_sec = time_second;
394a2801b77SJohn Polstra 		tsp->tv_nsec = 0;
395a2801b77SJohn Polstra 		break;
396a2801b77SJohn Polstra 	case TSP_HZ:
397a2801b77SJohn Polstra 		getnanotime(tsp);
398a2801b77SJohn Polstra 		break;
399a2801b77SJohn Polstra 	case TSP_USEC:
400a2801b77SJohn Polstra 		microtime(&tv);
401a2801b77SJohn Polstra 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
402a2801b77SJohn Polstra 		break;
403a2801b77SJohn Polstra 	case TSP_NSEC:
404a2801b77SJohn Polstra 	default:
405a2801b77SJohn Polstra 		nanotime(tsp);
406a2801b77SJohn Polstra 		break;
407a2801b77SJohn Polstra 	}
408a2801b77SJohn Polstra }
409a2801b77SJohn Polstra 
410a2801b77SJohn Polstra /*
411df8bae1dSRodney W. Grimes  * Set vnode attributes to VNOVAL
412df8bae1dSRodney W. Grimes  */
41326f9a767SRodney W. Grimes void
41426f9a767SRodney W. Grimes vattr_null(vap)
415df8bae1dSRodney W. Grimes 	register struct vattr *vap;
416df8bae1dSRodney W. Grimes {
417df8bae1dSRodney W. Grimes 
418df8bae1dSRodney W. Grimes 	vap->va_type = VNON;
41926f9a767SRodney W. Grimes 	vap->va_size = VNOVAL;
42026f9a767SRodney W. Grimes 	vap->va_bytes = VNOVAL;
4217a6c46b5SDoug Rabson 	vap->va_mode = VNOVAL;
4227a6c46b5SDoug Rabson 	vap->va_nlink = VNOVAL;
4237a6c46b5SDoug Rabson 	vap->va_uid = VNOVAL;
4247a6c46b5SDoug Rabson 	vap->va_gid = VNOVAL;
4257a6c46b5SDoug Rabson 	vap->va_fsid = VNOVAL;
4267a6c46b5SDoug Rabson 	vap->va_fileid = VNOVAL;
4277a6c46b5SDoug Rabson 	vap->va_blocksize = VNOVAL;
4287a6c46b5SDoug Rabson 	vap->va_rdev = VNOVAL;
4297a6c46b5SDoug Rabson 	vap->va_atime.tv_sec = VNOVAL;
4307a6c46b5SDoug Rabson 	vap->va_atime.tv_nsec = VNOVAL;
4317a6c46b5SDoug Rabson 	vap->va_mtime.tv_sec = VNOVAL;
4327a6c46b5SDoug Rabson 	vap->va_mtime.tv_nsec = VNOVAL;
4337a6c46b5SDoug Rabson 	vap->va_ctime.tv_sec = VNOVAL;
4347a6c46b5SDoug Rabson 	vap->va_ctime.tv_nsec = VNOVAL;
4357a6c46b5SDoug Rabson 	vap->va_flags = VNOVAL;
4367a6c46b5SDoug Rabson 	vap->va_gen = VNOVAL;
437df8bae1dSRodney W. Grimes 	vap->va_vaflags = 0;
438df8bae1dSRodney W. Grimes }
439df8bae1dSRodney W. Grimes 
440df8bae1dSRodney W. Grimes /*
441df8bae1dSRodney W. Grimes  * Routines having to do with the management of the vnode table.
442df8bae1dSRodney W. Grimes  */
443df8bae1dSRodney W. Grimes 
444df8bae1dSRodney W. Grimes /*
445df8bae1dSRodney W. Grimes  * Return the next vnode from the free list.
446df8bae1dSRodney W. Grimes  */
44726f9a767SRodney W. Grimes int
448df8bae1dSRodney W. Grimes getnewvnode(tag, mp, vops, vpp)
449df8bae1dSRodney W. Grimes 	enum vtagtype tag;
450df8bae1dSRodney W. Grimes 	struct mount *mp;
451f57e6547SBruce Evans 	vop_t **vops;
452df8bae1dSRodney W. Grimes 	struct vnode **vpp;
453df8bae1dSRodney W. Grimes {
454c904bbbdSKirk McKusick 	int s, count;
455996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
456c904bbbdSKirk McKusick 	struct vnode *vp = NULL;
457f2a2857bSKirk McKusick 	struct mount *vnmp;
45895e5e988SJohn Dyson 	vm_object_t object;
459df8bae1dSRodney W. Grimes 
460b15a966eSPoul-Henning Kamp 	/*
461b15a966eSPoul-Henning Kamp 	 * We take the least recently used vnode from the freelist
462b15a966eSPoul-Henning Kamp 	 * if we can get it and it has no cached pages, and no
463b15a966eSPoul-Henning Kamp 	 * namecache entries are relative to it.
464b15a966eSPoul-Henning Kamp 	 * Otherwise we allocate a new vnode
465b15a966eSPoul-Henning Kamp 	 */
466b15a966eSPoul-Henning Kamp 
467925a3a41SJohn Dyson 	s = splbio();
468996c772fSJohn Dyson 	simple_lock(&vnode_free_list_slock);
469925a3a41SJohn Dyson 
47000544193SPoul-Henning Kamp 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
47100544193SPoul-Henning Kamp 		vp = NULL;
472d047b580SPoul-Henning Kamp 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
473d047b580SPoul-Henning Kamp 		/*
474d047b580SPoul-Henning Kamp 		 * XXX: this is only here to be backwards compatible
475d047b580SPoul-Henning Kamp 		 */
47600544193SPoul-Henning Kamp 		vp = NULL;
477c904bbbdSKirk McKusick 	} else for (count = 0; count < freevnodes; count++) {
478c904bbbdSKirk McKusick 		vp = TAILQ_FIRST(&vnode_free_list);
479c904bbbdSKirk McKusick 		if (vp == NULL || vp->v_usecount)
480c904bbbdSKirk McKusick 			panic("getnewvnode: free vnode isn't");
48195e5e988SJohn Dyson 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
482c904bbbdSKirk McKusick 		/*
483c904bbbdSKirk McKusick 		 * Don't recycle if active in the namecache or
484c904bbbdSKirk McKusick 		 * if it still has cached pages or we cannot get
485c904bbbdSKirk McKusick 		 * its interlock.
486c904bbbdSKirk McKusick 		 */
487c904bbbdSKirk McKusick 		if (LIST_FIRST(&vp->v_cache_src) != NULL ||
4889ff5ce6bSBoris Popov 		    (VOP_GETVOBJECT(vp, &object) == 0 &&
4899ff5ce6bSBoris Popov 		     (object->resident_page_count || object->ref_count)) ||
490c904bbbdSKirk McKusick 		    !simple_lock_try(&vp->v_interlock)) {
491c904bbbdSKirk McKusick 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
492c904bbbdSKirk McKusick 			vp = NULL;
493b15a966eSPoul-Henning Kamp 			continue;
494c904bbbdSKirk McKusick 		}
495f2a2857bSKirk McKusick 		/*
496f2a2857bSKirk McKusick 		 * Skip over it if its filesystem is being suspended.
497f2a2857bSKirk McKusick 		 */
498f2a2857bSKirk McKusick 		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
499b15a966eSPoul-Henning Kamp 			break;
500f2a2857bSKirk McKusick 		simple_unlock(&vp->v_interlock);
501f2a2857bSKirk McKusick 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
502f2a2857bSKirk McKusick 		vp = NULL;
503b15a966eSPoul-Henning Kamp 	}
504b15a966eSPoul-Henning Kamp 	if (vp) {
505a051452aSPoul-Henning Kamp 		vp->v_flag |= VDOOMED;
506b15a966eSPoul-Henning Kamp 		freevnodes--;
507996c772fSJohn Dyson 		simple_unlock(&vnode_free_list_slock);
508a051452aSPoul-Henning Kamp 		cache_purge(vp);
509df8bae1dSRodney W. Grimes 		vp->v_lease = NULL;
5102be70f79SJohn Dyson 		if (vp->v_type != VBAD) {
511996c772fSJohn Dyson 			vgonel(vp, p);
5122be70f79SJohn Dyson 		} else {
513996c772fSJohn Dyson 			simple_unlock(&vp->v_interlock);
514996c772fSJohn Dyson 		}
515f2a2857bSKirk McKusick 		vn_finished_write(vnmp);
516bd7e5f99SJohn Dyson 
5175526d2d9SEivind Eklund #ifdef INVARIANTS
518797f2d22SPoul-Henning Kamp 		{
519797f2d22SPoul-Henning Kamp 			int s;
5200d94caffSDavid Greenman 
521df8bae1dSRodney W. Grimes 			if (vp->v_data)
522df8bae1dSRodney W. Grimes 				panic("cleaned vnode isn't");
523df8bae1dSRodney W. Grimes 			s = splbio();
524df8bae1dSRodney W. Grimes 			if (vp->v_numoutput)
525df8bae1dSRodney W. Grimes 				panic("Clean vnode has pending I/O's");
526df8bae1dSRodney W. Grimes 			splx(s);
527f2a2857bSKirk McKusick 			if (vp->v_writecount != 0)
528f2a2857bSKirk McKusick 				panic("Non-zero write count");
529797f2d22SPoul-Henning Kamp 		}
530df8bae1dSRodney W. Grimes #endif
531df8bae1dSRodney W. Grimes 		vp->v_flag = 0;
532df8bae1dSRodney W. Grimes 		vp->v_lastw = 0;
533df8bae1dSRodney W. Grimes 		vp->v_lasta = 0;
534df8bae1dSRodney W. Grimes 		vp->v_cstart = 0;
535df8bae1dSRodney W. Grimes 		vp->v_clen = 0;
536df8bae1dSRodney W. Grimes 		vp->v_socket = 0;
537b15a966eSPoul-Henning Kamp 	} else {
538b15a966eSPoul-Henning Kamp 		simple_unlock(&vnode_free_list_slock);
5392d8acc0fSJohn Dyson 		vp = (struct vnode *) zalloc(vnode_zone);
540b15a966eSPoul-Henning Kamp 		bzero((char *) vp, sizeof *vp);
54195e5e988SJohn Dyson 		simple_lock_init(&vp->v_interlock);
542b15a966eSPoul-Henning Kamp 		vp->v_dd = vp;
543a051452aSPoul-Henning Kamp 		cache_purge(vp);
544b15a966eSPoul-Henning Kamp 		LIST_INIT(&vp->v_cache_src);
545b15a966eSPoul-Henning Kamp 		TAILQ_INIT(&vp->v_cache_dst);
546b15a966eSPoul-Henning Kamp 		numvnodes++;
547df8bae1dSRodney W. Grimes 	}
548b15a966eSPoul-Henning Kamp 
54916e9e530SPeter Wemm 	TAILQ_INIT(&vp->v_cleanblkhd);
55016e9e530SPeter Wemm 	TAILQ_INIT(&vp->v_dirtyblkhd);
551f9ceb7c7SDavid Greenman 	vp->v_type = VNON;
552df8bae1dSRodney W. Grimes 	vp->v_tag = tag;
553df8bae1dSRodney W. Grimes 	vp->v_op = vops;
554df8bae1dSRodney W. Grimes 	insmntque(vp, mp);
555df8bae1dSRodney W. Grimes 	*vpp = vp;
556df8bae1dSRodney W. Grimes 	vp->v_usecount = 1;
557df8bae1dSRodney W. Grimes 	vp->v_data = 0;
558925a3a41SJohn Dyson 	splx(s);
55964d3c7e3SJohn Dyson 
560fb116777SEivind Eklund 	vfs_object_create(vp, p, p->p_ucred);
561df8bae1dSRodney W. Grimes 	return (0);
562df8bae1dSRodney W. Grimes }
563df8bae1dSRodney W. Grimes 
564df8bae1dSRodney W. Grimes /*
565df8bae1dSRodney W. Grimes  * Move a vnode from one mount queue to another.
566df8bae1dSRodney W. Grimes  */
567cb451ebdSBruce Evans static void
568df8bae1dSRodney W. Grimes insmntque(vp, mp)
569df8bae1dSRodney W. Grimes 	register struct vnode *vp;
570df8bae1dSRodney W. Grimes 	register struct mount *mp;
571df8bae1dSRodney W. Grimes {
572df8bae1dSRodney W. Grimes 
573996c772fSJohn Dyson 	simple_lock(&mntvnode_slock);
574df8bae1dSRodney W. Grimes 	/*
575df8bae1dSRodney W. Grimes 	 * Delete from old mount point vnode list, if on one.
576df8bae1dSRodney W. Grimes 	 */
577df8bae1dSRodney W. Grimes 	if (vp->v_mount != NULL)
578df8bae1dSRodney W. Grimes 		LIST_REMOVE(vp, v_mntvnodes);
579df8bae1dSRodney W. Grimes 	/*
580df8bae1dSRodney W. Grimes 	 * Insert into list of vnodes for the new mount point, if available.
581df8bae1dSRodney W. Grimes 	 */
582996c772fSJohn Dyson 	if ((vp->v_mount = mp) == NULL) {
583996c772fSJohn Dyson 		simple_unlock(&mntvnode_slock);
584df8bae1dSRodney W. Grimes 		return;
585996c772fSJohn Dyson 	}
586df8bae1dSRodney W. Grimes 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
587996c772fSJohn Dyson 	simple_unlock(&mntvnode_slock);
588df8bae1dSRodney W. Grimes }
589df8bae1dSRodney W. Grimes 
590df8bae1dSRodney W. Grimes /*
591df8bae1dSRodney W. Grimes  * Update outstanding I/O count and do wakeup if requested.
592df8bae1dSRodney W. Grimes  */
59326f9a767SRodney W. Grimes void
594df8bae1dSRodney W. Grimes vwakeup(bp)
595df8bae1dSRodney W. Grimes 	register struct buf *bp;
596df8bae1dSRodney W. Grimes {
597df8bae1dSRodney W. Grimes 	register struct vnode *vp;
598df8bae1dSRodney W. Grimes 
599df8bae1dSRodney W. Grimes 	bp->b_flags &= ~B_WRITEINPROG;
600bb56ec4aSPoul-Henning Kamp 	if ((vp = bp->b_vp)) {
601df8bae1dSRodney W. Grimes 		vp->v_numoutput--;
602df8bae1dSRodney W. Grimes 		if (vp->v_numoutput < 0)
603df8bae1dSRodney W. Grimes 			panic("vwakeup: neg numoutput");
604a3a8bb29SDavid Greenman 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
605df8bae1dSRodney W. Grimes 			vp->v_flag &= ~VBWAIT;
606df8bae1dSRodney W. Grimes 			wakeup((caddr_t) &vp->v_numoutput);
607df8bae1dSRodney W. Grimes 		}
608df8bae1dSRodney W. Grimes 	}
609df8bae1dSRodney W. Grimes }
610df8bae1dSRodney W. Grimes 
611df8bae1dSRodney W. Grimes /*
612df8bae1dSRodney W. Grimes  * Flush out and invalidate all buffers associated with a vnode.
613df8bae1dSRodney W. Grimes  * Called with the underlying object locked.
614df8bae1dSRodney W. Grimes  */
615df8bae1dSRodney W. Grimes int
616df8bae1dSRodney W. Grimes vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
617df8bae1dSRodney W. Grimes 	register struct vnode *vp;
618df8bae1dSRodney W. Grimes 	int flags;
619df8bae1dSRodney W. Grimes 	struct ucred *cred;
620df8bae1dSRodney W. Grimes 	struct proc *p;
621df8bae1dSRodney W. Grimes 	int slpflag, slptimeo;
622df8bae1dSRodney W. Grimes {
623df8bae1dSRodney W. Grimes 	register struct buf *bp;
624df8bae1dSRodney W. Grimes 	struct buf *nbp, *blist;
625df8bae1dSRodney W. Grimes 	int s, error;
6261cdeb653SDavid Greenman 	vm_object_t object;
627df8bae1dSRodney W. Grimes 
62828913ebeSJulian Elischer 	if (flags & V_SAVE) {
62928913ebeSJulian Elischer 		s = splbio();
63028913ebeSJulian Elischer 		while (vp->v_numoutput) {
63128913ebeSJulian Elischer 			vp->v_flag |= VBWAIT;
63229c98cd8SEivind Eklund 			error = tsleep((caddr_t)&vp->v_numoutput,
63329c98cd8SEivind Eklund 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
63429c98cd8SEivind Eklund 			if (error) {
63529c98cd8SEivind Eklund 				splx(s);
63629c98cd8SEivind Eklund 				return (error);
63729c98cd8SEivind Eklund 			}
63828913ebeSJulian Elischer 		}
63916e9e530SPeter Wemm 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
64028913ebeSJulian Elischer 			splx(s);
64128913ebeSJulian Elischer 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
642df8bae1dSRodney W. Grimes 				return (error);
64328913ebeSJulian Elischer 			s = splbio();
64428913ebeSJulian Elischer 			if (vp->v_numoutput > 0 ||
64516e9e530SPeter Wemm 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
646df8bae1dSRodney W. Grimes 				panic("vinvalbuf: dirty bufs");
647df8bae1dSRodney W. Grimes 		}
64828913ebeSJulian Elischer 		splx(s);
64928913ebeSJulian Elischer   	}
6506476c0d2SJohn Dyson 	s = splbio();
651df8bae1dSRodney W. Grimes 	for (;;) {
65216e9e530SPeter Wemm 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
65320f02ef5SPeter Wemm 		if (!blist)
65416e9e530SPeter Wemm 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
655df8bae1dSRodney W. Grimes 		if (!blist)
656df8bae1dSRodney W. Grimes 			break;
657df8bae1dSRodney W. Grimes 
658df8bae1dSRodney W. Grimes 		for (bp = blist; bp; bp = nbp) {
65916e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
66067812eacSKirk McKusick 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
66167812eacSKirk McKusick 				error = BUF_TIMELOCK(bp,
66267812eacSKirk McKusick 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
66367812eacSKirk McKusick 				    "vinvalbuf", slpflag, slptimeo);
66467812eacSKirk McKusick 				if (error == ENOLCK)
66567812eacSKirk McKusick 					break;
666df8bae1dSRodney W. Grimes 				splx(s);
667df8bae1dSRodney W. Grimes 				return (error);
6682f2160daSDavid Greenman 			}
669df8bae1dSRodney W. Grimes 			/*
6700d94caffSDavid Greenman 			 * XXX Since there are no node locks for NFS, I
6710d94caffSDavid Greenman 			 * believe there is a slight chance that a delayed
6720d94caffSDavid Greenman 			 * write will occur while sleeping just above, so
67352c64c95SJohn Dyson 			 * check for it.  Note that vfs_bio_awrite expects
67452c64c95SJohn Dyson 			 * buffers to reside on a queue, while VOP_BWRITE and
67552c64c95SJohn Dyson 			 * brelse do not.
676df8bae1dSRodney W. Grimes 			 */
67752c64c95SJohn Dyson 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
67852c64c95SJohn Dyson 				(flags & V_SAVE)) {
67952c64c95SJohn Dyson 
68095e5e988SJohn Dyson 				if (bp->b_vp == vp) {
68195e5e988SJohn Dyson 					if (bp->b_flags & B_CLUSTEROK) {
68267812eacSKirk McKusick 						BUF_UNLOCK(bp);
68395e5e988SJohn Dyson 						vfs_bio_awrite(bp);
68495e5e988SJohn Dyson 					} else {
68552c64c95SJohn Dyson 						bremfree(bp);
68667812eacSKirk McKusick 						bp->b_flags |= B_ASYNC;
687b99c307aSPoul-Henning Kamp 						BUF_WRITE(bp);
68895e5e988SJohn Dyson 					}
68995e5e988SJohn Dyson 				} else {
69052c64c95SJohn Dyson 					bremfree(bp);
691b99c307aSPoul-Henning Kamp 					(void) BUF_WRITE(bp);
69295e5e988SJohn Dyson 				}
693df8bae1dSRodney W. Grimes 				break;
694df8bae1dSRodney W. Grimes 			}
69552c64c95SJohn Dyson 			bremfree(bp);
69667812eacSKirk McKusick 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
697bef608bdSJohn Dyson 			bp->b_flags &= ~B_ASYNC;
698df8bae1dSRodney W. Grimes 			brelse(bp);
699df8bae1dSRodney W. Grimes 		}
700df8bae1dSRodney W. Grimes 	}
7011cdeb653SDavid Greenman 
7020d94caffSDavid Greenman 	while (vp->v_numoutput > 0) {
7030d94caffSDavid Greenman 		vp->v_flag |= VBWAIT;
7040d94caffSDavid Greenman 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
7050d94caffSDavid Greenman 	}
7062f2160daSDavid Greenman 
7070d94caffSDavid Greenman 	splx(s);
7080d94caffSDavid Greenman 
709ff769afcSDavid Greenman 	/*
710ff769afcSDavid Greenman 	 * Destroy the copy in the VM cache, too.
711ff769afcSDavid Greenman 	 */
71295e5e988SJohn Dyson 	simple_lock(&vp->v_interlock);
7139ff5ce6bSBoris Popov 	if (VOP_GETVOBJECT(vp, &object) == 0) {
71495e5e988SJohn Dyson 		vm_object_page_remove(object, 0, 0,
71595e5e988SJohn Dyson 			(flags & V_SAVE) ? TRUE : FALSE);
7161cdeb653SDavid Greenman 	}
71795e5e988SJohn Dyson 	simple_unlock(&vp->v_interlock);
71895e5e988SJohn Dyson 
71916e9e530SPeter Wemm 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
720df8bae1dSRodney W. Grimes 		panic("vinvalbuf: flush failed");
721df8bae1dSRodney W. Grimes 	return (0);
722df8bae1dSRodney W. Grimes }
723df8bae1dSRodney W. Grimes 
724df8bae1dSRodney W. Grimes /*
725bef608bdSJohn Dyson  * Truncate a file's buffer and pages to a specified length.  This
726bef608bdSJohn Dyson  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
727bef608bdSJohn Dyson  * sync activity.
728bef608bdSJohn Dyson  */
729bef608bdSJohn Dyson int
730bef608bdSJohn Dyson vtruncbuf(vp, cred, p, length, blksize)
731bef608bdSJohn Dyson 	register struct vnode *vp;
732bef608bdSJohn Dyson 	struct ucred *cred;
733bef608bdSJohn Dyson 	struct proc *p;
734bef608bdSJohn Dyson 	off_t length;
735bef608bdSJohn Dyson 	int blksize;
736bef608bdSJohn Dyson {
737bef608bdSJohn Dyson 	register struct buf *bp;
738f5ef029eSPoul-Henning Kamp 	struct buf *nbp;
739f5ef029eSPoul-Henning Kamp 	int s, anyfreed;
740bef608bdSJohn Dyson 	int trunclbn;
741bef608bdSJohn Dyson 
742bef608bdSJohn Dyson 	/*
743bef608bdSJohn Dyson 	 * Round up to the *next* lbn.
744bef608bdSJohn Dyson 	 */
7451c77c6b7SJohn Dyson 	trunclbn = (length + blksize - 1) / blksize;
746bef608bdSJohn Dyson 
747bef608bdSJohn Dyson 	s = splbio();
748bef608bdSJohn Dyson restart:
749bef608bdSJohn Dyson 	anyfreed = 1;
750bef608bdSJohn Dyson 	for (;anyfreed;) {
751bef608bdSJohn Dyson 		anyfreed = 0;
75216e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
75316e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
754bef608bdSJohn Dyson 			if (bp->b_lblkno >= trunclbn) {
75567812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
75667812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
75752c64c95SJohn Dyson 					goto restart;
758bef608bdSJohn Dyson 				} else {
759bef608bdSJohn Dyson 					bremfree(bp);
76067812eacSKirk McKusick 					bp->b_flags |= (B_INVAL | B_RELBUF);
761bef608bdSJohn Dyson 					bp->b_flags &= ~B_ASYNC;
762bef608bdSJohn Dyson 					brelse(bp);
763bef608bdSJohn Dyson 					anyfreed = 1;
764bef608bdSJohn Dyson 				}
76502b00854SKirk McKusick 				if (nbp &&
76602b00854SKirk McKusick 				    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
76752c64c95SJohn Dyson 				    (nbp->b_vp != vp) ||
768bef608bdSJohn Dyson 				    (nbp->b_flags & B_DELWRI))) {
769bef608bdSJohn Dyson 					goto restart;
770bef608bdSJohn Dyson 				}
771bef608bdSJohn Dyson 			}
772bef608bdSJohn Dyson 		}
773bef608bdSJohn Dyson 
77416e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
77516e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
776bef608bdSJohn Dyson 			if (bp->b_lblkno >= trunclbn) {
77767812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
77867812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
77952c64c95SJohn Dyson 					goto restart;
780bef608bdSJohn Dyson 				} else {
781bef608bdSJohn Dyson 					bremfree(bp);
78267812eacSKirk McKusick 					bp->b_flags |= (B_INVAL | B_RELBUF);
783bef608bdSJohn Dyson 					bp->b_flags &= ~B_ASYNC;
784bef608bdSJohn Dyson 					brelse(bp);
785bef608bdSJohn Dyson 					anyfreed = 1;
786bef608bdSJohn Dyson 				}
78702b00854SKirk McKusick 				if (nbp &&
78802b00854SKirk McKusick 				    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
78952c64c95SJohn Dyson 				    (nbp->b_vp != vp) ||
790bef608bdSJohn Dyson 				    (nbp->b_flags & B_DELWRI) == 0)) {
791bef608bdSJohn Dyson 					goto restart;
792bef608bdSJohn Dyson 				}
793bef608bdSJohn Dyson 			}
794bef608bdSJohn Dyson 		}
795bef608bdSJohn Dyson 	}
7962deb5d04SJohn Dyson 
79752c64c95SJohn Dyson 	if (length > 0) {
79852c64c95SJohn Dyson restartsync:
79916e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
80016e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
8012deb5d04SJohn Dyson 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
80267812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
80367812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
80467812eacSKirk McKusick 					goto restart;
8052deb5d04SJohn Dyson 				} else {
8062deb5d04SJohn Dyson 					bremfree(bp);
80752c64c95SJohn Dyson 					if (bp->b_vp == vp) {
80852c64c95SJohn Dyson 						bp->b_flags |= B_ASYNC;
80952c64c95SJohn Dyson 					} else {
81052c64c95SJohn Dyson 						bp->b_flags &= ~B_ASYNC;
81152c64c95SJohn Dyson 					}
812b99c307aSPoul-Henning Kamp 					BUF_WRITE(bp);
8132deb5d04SJohn Dyson 				}
81452c64c95SJohn Dyson 				goto restartsync;
8152deb5d04SJohn Dyson 			}
81652c64c95SJohn Dyson 
8172deb5d04SJohn Dyson 		}
8182deb5d04SJohn Dyson 	}
8192deb5d04SJohn Dyson 
8202deb5d04SJohn Dyson 	while (vp->v_numoutput > 0) {
8212deb5d04SJohn Dyson 		vp->v_flag |= VBWAIT;
8222deb5d04SJohn Dyson 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
8232deb5d04SJohn Dyson 	}
8242deb5d04SJohn Dyson 
825bef608bdSJohn Dyson 	splx(s);
826bef608bdSJohn Dyson 
827bef608bdSJohn Dyson 	vnode_pager_setsize(vp, length);
828bef608bdSJohn Dyson 
829bef608bdSJohn Dyson 	return (0);
830bef608bdSJohn Dyson }
831bef608bdSJohn Dyson 
832bef608bdSJohn Dyson /*
833df8bae1dSRodney W. Grimes  * Associate a buffer with a vnode.
834df8bae1dSRodney W. Grimes  */
83526f9a767SRodney W. Grimes void
836df8bae1dSRodney W. Grimes bgetvp(vp, bp)
837df8bae1dSRodney W. Grimes 	register struct vnode *vp;
838df8bae1dSRodney W. Grimes 	register struct buf *bp;
839df8bae1dSRodney W. Grimes {
840602d2b48SDavid Greenman 	int s;
841df8bae1dSRodney W. Grimes 
8425526d2d9SEivind Eklund 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
843219cbf59SEivind Eklund 
844a051452aSPoul-Henning Kamp 	vhold(vp);
845df8bae1dSRodney W. Grimes 	bp->b_vp = vp;
84641d2e3e0SPoul-Henning Kamp 	bp->b_dev = vn_todev(vp);
847df8bae1dSRodney W. Grimes 	/*
848df8bae1dSRodney W. Grimes 	 * Insert onto list for new vnode.
849df8bae1dSRodney W. Grimes 	 */
850602d2b48SDavid Greenman 	s = splbio();
85102b00854SKirk McKusick 	bp->b_xflags |= BX_VNCLEAN;
85202b00854SKirk McKusick 	bp->b_xflags &= ~BX_VNDIRTY;
85316e9e530SPeter Wemm 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
854602d2b48SDavid Greenman 	splx(s);
855df8bae1dSRodney W. Grimes }
856df8bae1dSRodney W. Grimes 
857df8bae1dSRodney W. Grimes /*
858df8bae1dSRodney W. Grimes  * Disassociate a buffer from a vnode.
859df8bae1dSRodney W. Grimes  */
86026f9a767SRodney W. Grimes void
861df8bae1dSRodney W. Grimes brelvp(bp)
862df8bae1dSRodney W. Grimes 	register struct buf *bp;
863df8bae1dSRodney W. Grimes {
864df8bae1dSRodney W. Grimes 	struct vnode *vp;
86516e9e530SPeter Wemm 	struct buflists *listheadp;
866602d2b48SDavid Greenman 	int s;
867df8bae1dSRodney W. Grimes 
8685526d2d9SEivind Eklund 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
8692be70f79SJohn Dyson 
870df8bae1dSRodney W. Grimes 	/*
871df8bae1dSRodney W. Grimes 	 * Delete from old vnode list, if on one.
872df8bae1dSRodney W. Grimes 	 */
873b1897c19SJulian Elischer 	vp = bp->b_vp;
874602d2b48SDavid Greenman 	s = splbio();
87502b00854SKirk McKusick 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
87602b00854SKirk McKusick 		if (bp->b_xflags & BX_VNDIRTY)
87716e9e530SPeter Wemm 			listheadp = &vp->v_dirtyblkhd;
87816e9e530SPeter Wemm 		else
87916e9e530SPeter Wemm 			listheadp = &vp->v_cleanblkhd;
88016e9e530SPeter Wemm 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
88102b00854SKirk McKusick 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
88216e9e530SPeter Wemm 	}
88316e9e530SPeter Wemm 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
884b1897c19SJulian Elischer 		vp->v_flag &= ~VONWORKLST;
885b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
886b1897c19SJulian Elischer 	}
887602d2b48SDavid Greenman 	splx(s);
888df8bae1dSRodney W. Grimes 	bp->b_vp = (struct vnode *) 0;
889a051452aSPoul-Henning Kamp 	vdrop(vp);
890df8bae1dSRodney W. Grimes }
891df8bae1dSRodney W. Grimes 
892df8bae1dSRodney W. Grimes /*
893b1897c19SJulian Elischer  * The workitem queue.
894b1897c19SJulian Elischer  *
895b1897c19SJulian Elischer  * It is useful to delay writes of file data and filesystem metadata
896b1897c19SJulian Elischer  * for tens of seconds so that quickly created and deleted files need
897b1897c19SJulian Elischer  * not waste disk bandwidth being created and removed. To realize this,
898b1897c19SJulian Elischer  * we append vnodes to a "workitem" queue. When running with a soft
899b1897c19SJulian Elischer  * updates implementation, most pending metadata dependencies should
900b1897c19SJulian Elischer  * not wait for more than a few seconds. Thus, mounted on block devices
901b1897c19SJulian Elischer  * are delayed only about a half the time that file data is delayed.
902b1897c19SJulian Elischer  * Similarly, directory updates are more critical, so are only delayed
903b1897c19SJulian Elischer  * about a third the time that file data is delayed. Thus, there are
904b1897c19SJulian Elischer  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
905e7647e6cSKris Kennaway  * one each second (driven off the filesystem syncer process). The
906b1897c19SJulian Elischer  * syncer_delayno variable indicates the next queue that is to be processed.
907b1897c19SJulian Elischer  * Items that need to be processed soon are placed in this queue:
908b1897c19SJulian Elischer  *
909b1897c19SJulian Elischer  *	syncer_workitem_pending[syncer_delayno]
910b1897c19SJulian Elischer  *
911b1897c19SJulian Elischer  * A delay of fifteen seconds is done by placing the request fifteen
912b1897c19SJulian Elischer  * entries later in the queue:
913b1897c19SJulian Elischer  *
914b1897c19SJulian Elischer  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
915b1897c19SJulian Elischer  *
916b1897c19SJulian Elischer  */
917b1897c19SJulian Elischer 
918b1897c19SJulian Elischer /*
919b1897c19SJulian Elischer  * Add an item to the syncer work queue.
920b1897c19SJulian Elischer  */
92142e26d47SMatthew Dillon static void
92242e26d47SMatthew Dillon vn_syncer_add_to_worklist(struct vnode *vp, int delay)
923b1897c19SJulian Elischer {
924b1897c19SJulian Elischer 	int s, slot;
925b1897c19SJulian Elischer 
926b1897c19SJulian Elischer 	s = splbio();
927b1897c19SJulian Elischer 
928b1897c19SJulian Elischer 	if (vp->v_flag & VONWORKLST) {
929b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
930b1897c19SJulian Elischer 	}
931b1897c19SJulian Elischer 
932b1897c19SJulian Elischer 	if (delay > syncer_maxdelay - 2)
933b1897c19SJulian Elischer 		delay = syncer_maxdelay - 2;
934b1897c19SJulian Elischer 	slot = (syncer_delayno + delay) & syncer_mask;
935b1897c19SJulian Elischer 
936b1897c19SJulian Elischer 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
937b1897c19SJulian Elischer 	vp->v_flag |= VONWORKLST;
938b1897c19SJulian Elischer 	splx(s);
939b1897c19SJulian Elischer }
940b1897c19SJulian Elischer 
9414ef2094eSJulian Elischer struct  proc *updateproc;
942155f87daSMatthew Dillon static void sched_sync __P((void));
9439c8b8baaSPeter Wemm static struct kproc_desc up_kp = {
944b1897c19SJulian Elischer 	"syncer",
945b1897c19SJulian Elischer 	sched_sync,
946b1897c19SJulian Elischer 	&updateproc
947b1897c19SJulian Elischer };
9489c8b8baaSPeter Wemm SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
949b1897c19SJulian Elischer 
950b1897c19SJulian Elischer /*
951b1897c19SJulian Elischer  * System filesystem synchronizer daemon.
952b1897c19SJulian Elischer  */
953b1897c19SJulian Elischer void
954b1897c19SJulian Elischer sched_sync(void)
955b1897c19SJulian Elischer {
956b1897c19SJulian Elischer 	struct synclist *slp;
957b1897c19SJulian Elischer 	struct vnode *vp;
958f2a2857bSKirk McKusick 	struct mount *mp;
959b1897c19SJulian Elischer 	long starttime;
960b1897c19SJulian Elischer 	int s;
961b1897c19SJulian Elischer 	struct proc *p = updateproc;
962b1897c19SJulian Elischer 
9630384fff8SJason Evans 	mtx_enter(&Giant, MTX_DEF);
9640384fff8SJason Evans 
9655e950839SLuoqi Chen 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
9665e950839SLuoqi Chen 	    SHUTDOWN_PRI_LAST);
9675e950839SLuoqi Chen 
968b1897c19SJulian Elischer 	for (;;) {
9695e950839SLuoqi Chen 		kproc_suspend_loop(p);
9705e950839SLuoqi Chen 
971227ee8a1SPoul-Henning Kamp 		starttime = time_second;
972b1897c19SJulian Elischer 
973b1897c19SJulian Elischer 		/*
97442e26d47SMatthew Dillon 		 * Push files whose dirty time has expired.  Be careful
97542e26d47SMatthew Dillon 		 * of interrupt race on slp queue.
976b1897c19SJulian Elischer 		 */
977b1897c19SJulian Elischer 		s = splbio();
978b1897c19SJulian Elischer 		slp = &syncer_workitem_pending[syncer_delayno];
979b1897c19SJulian Elischer 		syncer_delayno += 1;
980b1897c19SJulian Elischer 		if (syncer_delayno == syncer_maxdelay)
981b1897c19SJulian Elischer 			syncer_delayno = 0;
982b1897c19SJulian Elischer 		splx(s);
983b1897c19SJulian Elischer 
984b1897c19SJulian Elischer 		while ((vp = LIST_FIRST(slp)) != NULL) {
985f2a2857bSKirk McKusick 			if (VOP_ISLOCKED(vp, NULL) == 0 &&
986f2a2857bSKirk McKusick 			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
987b1897c19SJulian Elischer 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
988b1897c19SJulian Elischer 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
989b1897c19SJulian Elischer 				VOP_UNLOCK(vp, 0, p);
990f2a2857bSKirk McKusick 				vn_finished_write(mp);
9914ef2094eSJulian Elischer 			}
99242e26d47SMatthew Dillon 			s = splbio();
993b1897c19SJulian Elischer 			if (LIST_FIRST(slp) == vp) {
9944ef2094eSJulian Elischer 				/*
9954ef2094eSJulian Elischer 				 * Note: v_tag VT_VFS vps can remain on the
9964ef2094eSJulian Elischer 				 * worklist too with no dirty blocks, but
9974ef2094eSJulian Elischer 				 * since sync_fsync() moves it to a different
9984ef2094eSJulian Elischer 				 * slot we are safe.
9994ef2094eSJulian Elischer 				 */
100016e9e530SPeter Wemm 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
1001ba4ad1fcSPoul-Henning Kamp 				    !vn_isdisk(vp, NULL))
100242e26d47SMatthew Dillon 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
1003b1897c19SJulian Elischer 				/*
100442e26d47SMatthew Dillon 				 * Put us back on the worklist.  The worklist
100542e26d47SMatthew Dillon 				 * routine will remove us from our current
100642e26d47SMatthew Dillon 				 * position and then add us back in at a later
100742e26d47SMatthew Dillon 				 * position.
1008b1897c19SJulian Elischer 				 */
1009b1897c19SJulian Elischer 				vn_syncer_add_to_worklist(vp, syncdelay);
1010b1897c19SJulian Elischer 			}
101142e26d47SMatthew Dillon 			splx(s);
1012b1897c19SJulian Elischer 		}
1013b1897c19SJulian Elischer 
1014b1897c19SJulian Elischer 		/*
1015b1897c19SJulian Elischer 		 * Do soft update processing.
1016b1897c19SJulian Elischer 		 */
10173275cf73SPoul-Henning Kamp #ifdef SOFTUPDATES
1018a2e7a027SPoul-Henning Kamp 		softdep_process_worklist(NULL);
10193275cf73SPoul-Henning Kamp #endif
1020b1897c19SJulian Elischer 
1021b1897c19SJulian Elischer 		/*
1022b1897c19SJulian Elischer 		 * The variable rushjob allows the kernel to speed up the
1023b1897c19SJulian Elischer 		 * processing of the filesystem syncer process. A rushjob
1024b1897c19SJulian Elischer 		 * value of N tells the filesystem syncer to process the next
1025b1897c19SJulian Elischer 		 * N seconds worth of work on its queue ASAP. Currently rushjob
1026b1897c19SJulian Elischer 		 * is used by the soft update code to speed up the filesystem
1027b1897c19SJulian Elischer 		 * syncer process when the incore state is getting so far
1028b1897c19SJulian Elischer 		 * ahead of the disk that the kernel memory pool is being
1029b1897c19SJulian Elischer 		 * threatened with exhaustion.
1030b1897c19SJulian Elischer 		 */
1031b1897c19SJulian Elischer 		if (rushjob > 0) {
1032b1897c19SJulian Elischer 			rushjob -= 1;
1033b1897c19SJulian Elischer 			continue;
1034b1897c19SJulian Elischer 		}
1035b1897c19SJulian Elischer 		/*
1036b1897c19SJulian Elischer 		 * If it has taken us less than a second to process the
1037b1897c19SJulian Elischer 		 * current work, then wait. Otherwise start right over
1038b1897c19SJulian Elischer 		 * again. We can still lose time if any single round
1039b1897c19SJulian Elischer 		 * takes more than two seconds, but it does not really
1040b1897c19SJulian Elischer 		 * matter as we are just trying to generally pace the
1041b1897c19SJulian Elischer 		 * filesystem activity.
1042b1897c19SJulian Elischer 		 */
1043227ee8a1SPoul-Henning Kamp 		if (time_second == starttime)
1044b1897c19SJulian Elischer 			tsleep(&lbolt, PPAUSE, "syncer", 0);
1045b1897c19SJulian Elischer 	}
1046b1897c19SJulian Elischer }
1047b1897c19SJulian Elischer 
1048b1897c19SJulian Elischer /*
1049e4ab40bcSKirk McKusick  * Request the syncer daemon to speed up its work.
1050e4ab40bcSKirk McKusick  * We never push it to speed up more than half of its
1051e4ab40bcSKirk McKusick  * normal turn time, otherwise it could take over the cpu.
1052e4ab40bcSKirk McKusick  */
1053e4ab40bcSKirk McKusick int
1054e4ab40bcSKirk McKusick speedup_syncer()
1055e4ab40bcSKirk McKusick {
1056e4ab40bcSKirk McKusick 	int s;
1057e4ab40bcSKirk McKusick 
1058e4ab40bcSKirk McKusick 	s = splhigh();
1059e4ab40bcSKirk McKusick 	if (updateproc->p_wchan == &lbolt)
1060e4ab40bcSKirk McKusick 		setrunnable(updateproc);
1061e4ab40bcSKirk McKusick 	splx(s);
1062e4ab40bcSKirk McKusick 	if (rushjob < syncdelay / 2) {
1063e4ab40bcSKirk McKusick 		rushjob += 1;
1064e4ab40bcSKirk McKusick 		stat_rush_requests += 1;
1065e4ab40bcSKirk McKusick 		return (1);
1066e4ab40bcSKirk McKusick 	}
1067e4ab40bcSKirk McKusick 	return(0);
1068e4ab40bcSKirk McKusick }
1069e4ab40bcSKirk McKusick 
1070e4ab40bcSKirk McKusick /*
10710d94caffSDavid Greenman  * Associate a p-buffer with a vnode.
10721c7c3c6aSMatthew Dillon  *
10731c7c3c6aSMatthew Dillon  * Also sets B_PAGING flag to indicate that vnode is not fully associated
10741c7c3c6aSMatthew Dillon  * with the buffer.  i.e. the bp has not been linked into the vnode or
10751c7c3c6aSMatthew Dillon  * ref-counted.
10760d94caffSDavid Greenman  */
10770d94caffSDavid Greenman void
10780d94caffSDavid Greenman pbgetvp(vp, bp)
10790d94caffSDavid Greenman 	register struct vnode *vp;
10800d94caffSDavid Greenman 	register struct buf *bp;
10810d94caffSDavid Greenman {
1082219cbf59SEivind Eklund 
10835526d2d9SEivind Eklund 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
10845526d2d9SEivind Eklund 
10850d94caffSDavid Greenman 	bp->b_vp = vp;
10861c7c3c6aSMatthew Dillon 	bp->b_flags |= B_PAGING;
108741d2e3e0SPoul-Henning Kamp 	bp->b_dev = vn_todev(vp);
10880d94caffSDavid Greenman }
10890d94caffSDavid Greenman 
10900d94caffSDavid Greenman /*
10910d94caffSDavid Greenman  * Disassociate a p-buffer from a vnode.
10920d94caffSDavid Greenman  */
10930d94caffSDavid Greenman void
10940d94caffSDavid Greenman pbrelvp(bp)
10950d94caffSDavid Greenman 	register struct buf *bp;
10960d94caffSDavid Greenman {
10970d94caffSDavid Greenman 
10985526d2d9SEivind Eklund 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
10990d94caffSDavid Greenman 
11001c7c3c6aSMatthew Dillon 	/* XXX REMOVE ME */
11011c7c3c6aSMatthew Dillon 	if (bp->b_vnbufs.tqe_next != NULL) {
11021c7c3c6aSMatthew Dillon 		panic(
11031c7c3c6aSMatthew Dillon 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x",
11041c7c3c6aSMatthew Dillon 		    bp,
11051c7c3c6aSMatthew Dillon 		    (int)bp->b_flags
11061c7c3c6aSMatthew Dillon 		);
11071c7c3c6aSMatthew Dillon 	}
11080d94caffSDavid Greenman 	bp->b_vp = (struct vnode *) 0;
11091c7c3c6aSMatthew Dillon 	bp->b_flags &= ~B_PAGING;
11101c7c3c6aSMatthew Dillon }
11111c7c3c6aSMatthew Dillon 
11121c7c3c6aSMatthew Dillon void
11131c7c3c6aSMatthew Dillon pbreassignbuf(bp, newvp)
11141c7c3c6aSMatthew Dillon 	struct buf *bp;
11151c7c3c6aSMatthew Dillon 	struct vnode *newvp;
11161c7c3c6aSMatthew Dillon {
11171c7c3c6aSMatthew Dillon 	if ((bp->b_flags & B_PAGING) == 0) {
11181c7c3c6aSMatthew Dillon 		panic(
11191c7c3c6aSMatthew Dillon 		    "pbreassignbuf() on non phys bp %p",
11201c7c3c6aSMatthew Dillon 		    bp
11211c7c3c6aSMatthew Dillon 		);
11221c7c3c6aSMatthew Dillon 	}
11231c7c3c6aSMatthew Dillon 	bp->b_vp = newvp;
11240d94caffSDavid Greenman }
11250d94caffSDavid Greenman 
11260d94caffSDavid Greenman /*
1127df8bae1dSRodney W. Grimes  * Reassign a buffer from one vnode to another.
1128df8bae1dSRodney W. Grimes  * Used to assign file specific control information
1129df8bae1dSRodney W. Grimes  * (indirect blocks) to the vnode to which they belong.
1130df8bae1dSRodney W. Grimes  */
113126f9a767SRodney W. Grimes void
1132df8bae1dSRodney W. Grimes reassignbuf(bp, newvp)
1133df8bae1dSRodney W. Grimes 	register struct buf *bp;
1134df8bae1dSRodney W. Grimes 	register struct vnode *newvp;
1135df8bae1dSRodney W. Grimes {
1136b1897c19SJulian Elischer 	struct buflists *listheadp;
1137b1897c19SJulian Elischer 	int delay;
1138619594e8SJohn Dyson 	int s;
1139df8bae1dSRodney W. Grimes 
1140df8bae1dSRodney W. Grimes 	if (newvp == NULL) {
1141df8bae1dSRodney W. Grimes 		printf("reassignbuf: NULL");
1142df8bae1dSRodney W. Grimes 		return;
1143df8bae1dSRodney W. Grimes 	}
1144e929c00dSKirk McKusick 	++reassignbufcalls;
1145619594e8SJohn Dyson 
11461c7c3c6aSMatthew Dillon 	/*
11471c7c3c6aSMatthew Dillon 	 * B_PAGING flagged buffers cannot be reassigned because their vp
11481c7c3c6aSMatthew Dillon 	 * is not fully linked in.
11491c7c3c6aSMatthew Dillon 	 */
11501c7c3c6aSMatthew Dillon 	if (bp->b_flags & B_PAGING)
11511c7c3c6aSMatthew Dillon 		panic("cannot reassign paging buffer");
11521c7c3c6aSMatthew Dillon 
1153619594e8SJohn Dyson 	s = splbio();
1154df8bae1dSRodney W. Grimes 	/*
1155df8bae1dSRodney W. Grimes 	 * Delete from old vnode list, if on one.
1156df8bae1dSRodney W. Grimes 	 */
115702b00854SKirk McKusick 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
115802b00854SKirk McKusick 		if (bp->b_xflags & BX_VNDIRTY)
11594ef2094eSJulian Elischer 			listheadp = &bp->b_vp->v_dirtyblkhd;
116016e9e530SPeter Wemm 		else
11614ef2094eSJulian Elischer 			listheadp = &bp->b_vp->v_cleanblkhd;
116216e9e530SPeter Wemm 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
116302b00854SKirk McKusick 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
11644ef2094eSJulian Elischer 		if (bp->b_vp != newvp) {
11654ef2094eSJulian Elischer 			vdrop(bp->b_vp);
11664ef2094eSJulian Elischer 			bp->b_vp = NULL;	/* for clarification */
11674ef2094eSJulian Elischer 		}
1168a051452aSPoul-Henning Kamp 	}
1169df8bae1dSRodney W. Grimes 	/*
11700d94caffSDavid Greenman 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
11710d94caffSDavid Greenman 	 * of clean buffers.
1172df8bae1dSRodney W. Grimes 	 */
11730d94caffSDavid Greenman 	if (bp->b_flags & B_DELWRI) {
11740d94caffSDavid Greenman 		struct buf *tbp;
11750d94caffSDavid Greenman 
1176b1897c19SJulian Elischer 		listheadp = &newvp->v_dirtyblkhd;
1177b1897c19SJulian Elischer 		if ((newvp->v_flag & VONWORKLST) == 0) {
1178b1897c19SJulian Elischer 			switch (newvp->v_type) {
1179b1897c19SJulian Elischer 			case VDIR:
1180e4ab40bcSKirk McKusick 				delay = dirdelay;
1181b1897c19SJulian Elischer 				break;
118238224dcdSPoul-Henning Kamp 			case VCHR:
1183b1897c19SJulian Elischer 			case VBLK:
1184b1897c19SJulian Elischer 				if (newvp->v_specmountpoint != NULL) {
1185e4ab40bcSKirk McKusick 					delay = metadelay;
1186b1897c19SJulian Elischer 					break;
1187b1897c19SJulian Elischer 				}
1188b1897c19SJulian Elischer 				/* fall through */
1189b1897c19SJulian Elischer 			default:
1190e4ab40bcSKirk McKusick 				delay = filedelay;
1191b1897c19SJulian Elischer 			}
1192b1897c19SJulian Elischer 			vn_syncer_add_to_worklist(newvp, delay);
1193b1897c19SJulian Elischer 		}
119402b00854SKirk McKusick 		bp->b_xflags |= BX_VNDIRTY;
119516e9e530SPeter Wemm 		tbp = TAILQ_FIRST(listheadp);
119616e9e530SPeter Wemm 		if (tbp == NULL ||
1197e929c00dSKirk McKusick 		    bp->b_lblkno == 0 ||
1198c37c9620SMatthew Dillon 		    (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
1199e929c00dSKirk McKusick 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
120016e9e530SPeter Wemm 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
1201e929c00dSKirk McKusick 			++reassignbufsortgood;
1202e929c00dSKirk McKusick 		} else if (bp->b_lblkno < 0) {
1203e929c00dSKirk McKusick 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
1204e929c00dSKirk McKusick 			++reassignbufsortgood;
1205e929c00dSKirk McKusick 		} else if (reassignbufmethod == 1) {
1206e929c00dSKirk McKusick 			/*
1207e929c00dSKirk McKusick 			 * New sorting algorithm, only handle sequential case,
1208c37c9620SMatthew Dillon 			 * otherwise append to end (but before metadata)
1209e929c00dSKirk McKusick 			 */
1210e929c00dSKirk McKusick 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
121102b00854SKirk McKusick 			    (tbp->b_xflags & BX_VNDIRTY)) {
1212c37c9620SMatthew Dillon 				/*
1213c37c9620SMatthew Dillon 				 * Found the best place to insert the buffer
1214c37c9620SMatthew Dillon 				 */
1215e929c00dSKirk McKusick 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
1216e929c00dSKirk McKusick 				++reassignbufsortgood;
12170d94caffSDavid Greenman 			} else {
1218c37c9620SMatthew Dillon 				/*
1219c37c9620SMatthew Dillon 				 * Missed, append to end, but before meta-data.
1220c37c9620SMatthew Dillon 				 * We know that the head buffer in the list is
1221c37c9620SMatthew Dillon 				 * not meta-data due to prior conditionals.
1222c37c9620SMatthew Dillon 				 *
1223c37c9620SMatthew Dillon 				 * Indirect effects:  NFS second stage write
1224c37c9620SMatthew Dillon 				 * tends to wind up here, giving maximum
1225c37c9620SMatthew Dillon 				 * distance between the unstable write and the
1226c37c9620SMatthew Dillon 				 * commit rpc.
1227c37c9620SMatthew Dillon 				 */
1228c37c9620SMatthew Dillon 				tbp = TAILQ_LAST(listheadp, buflists);
1229c37c9620SMatthew Dillon 				while (tbp && tbp->b_lblkno < 0)
1230c37c9620SMatthew Dillon 					tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
1231c37c9620SMatthew Dillon 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
1232e929c00dSKirk McKusick 				++reassignbufsortbad;
1233e929c00dSKirk McKusick 			}
1234e929c00dSKirk McKusick 		} else {
1235e929c00dSKirk McKusick 			/*
1236e929c00dSKirk McKusick 			 * Old sorting algorithm, scan queue and insert
1237e929c00dSKirk McKusick 			 */
123816e9e530SPeter Wemm 			struct buf *ttbp;
123916e9e530SPeter Wemm 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
124016e9e530SPeter Wemm 			    (ttbp->b_lblkno < bp->b_lblkno)) {
1241e929c00dSKirk McKusick 				++reassignbufloops;
124216e9e530SPeter Wemm 				tbp = ttbp;
12430d94caffSDavid Greenman 			}
124416e9e530SPeter Wemm 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
12450d94caffSDavid Greenman 		}
12460d94caffSDavid Greenman 	} else {
124702b00854SKirk McKusick 		bp->b_xflags |= BX_VNCLEAN;
124816e9e530SPeter Wemm 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
1249b1897c19SJulian Elischer 		if ((newvp->v_flag & VONWORKLST) &&
125016e9e530SPeter Wemm 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
1251b1897c19SJulian Elischer 			newvp->v_flag &= ~VONWORKLST;
1252b1897c19SJulian Elischer 			LIST_REMOVE(newvp, v_synclist);
1253b1897c19SJulian Elischer 		}
1254df8bae1dSRodney W. Grimes 	}
12554ef2094eSJulian Elischer 	if (bp->b_vp != newvp) {
1256a051452aSPoul-Henning Kamp 		bp->b_vp = newvp;
1257a051452aSPoul-Henning Kamp 		vhold(bp->b_vp);
12584ef2094eSJulian Elischer 	}
1259619594e8SJohn Dyson 	splx(s);
12600d94caffSDavid Greenman }
1261df8bae1dSRodney W. Grimes 
1262df8bae1dSRodney W. Grimes /*
1263df8bae1dSRodney W. Grimes  * Create a vnode for a block device.
126441fadeebSBruce Evans  * Used for mounting the root file system.
126501f76720SJeroen Ruigrok van der Werven  * XXX: This now changed to a VCHR due to the block/char merging.
1266df8bae1dSRodney W. Grimes  */
126726f9a767SRodney W. Grimes int
1268df8bae1dSRodney W. Grimes bdevvp(dev, vpp)
1269df8bae1dSRodney W. Grimes 	dev_t dev;
1270df8bae1dSRodney W. Grimes 	struct vnode **vpp;
1271df8bae1dSRodney W. Grimes {
1272df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1273df8bae1dSRodney W. Grimes 	struct vnode *nvp;
1274df8bae1dSRodney W. Grimes 	int error;
1275df8bae1dSRodney W. Grimes 
12762447bec8SPoul-Henning Kamp 	if (dev == NODEV) {
127737906c68SBruce Evans 		*vpp = NULLVP;
127837906c68SBruce Evans 		return (ENXIO);
127937906c68SBruce Evans 	}
1280df8bae1dSRodney W. Grimes 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
1281df8bae1dSRodney W. Grimes 	if (error) {
128237906c68SBruce Evans 		*vpp = NULLVP;
1283df8bae1dSRodney W. Grimes 		return (error);
1284df8bae1dSRodney W. Grimes 	}
1285df8bae1dSRodney W. Grimes 	vp = nvp;
128601f76720SJeroen Ruigrok van der Werven 	vp->v_type = VCHR;
1287dbafb366SPoul-Henning Kamp 	addalias(vp, dev);
1288df8bae1dSRodney W. Grimes 	*vpp = vp;
1289df8bae1dSRodney W. Grimes 	return (0);
1290df8bae1dSRodney W. Grimes }
1291df8bae1dSRodney W. Grimes 
1292df8bae1dSRodney W. Grimes /*
1293dbafb366SPoul-Henning Kamp  * Add vnode to the alias list hung off the dev_t.
1294dbafb366SPoul-Henning Kamp  *
1295dbafb366SPoul-Henning Kamp  * The reason for this gunk is that multiple vnodes can reference
1296dbafb366SPoul-Henning Kamp  * the same physical device, so checking vp->v_usecount to see
1297dbafb366SPoul-Henning Kamp  * how many users there are is inadequate; the v_usecount for
1298dbafb366SPoul-Henning Kamp  * the vnodes need to be accumulated.  vcount() does that.
1299df8bae1dSRodney W. Grimes  */
13009b971133SKirk McKusick struct vnode *
1301dbafb366SPoul-Henning Kamp addaliasu(nvp, nvp_rdev)
1302dbafb366SPoul-Henning Kamp 	struct vnode *nvp;
1303bfbb9ce6SPoul-Henning Kamp 	udev_t nvp_rdev;
1304df8bae1dSRodney W. Grimes {
13059b971133SKirk McKusick 	struct vnode *ovp;
13069b971133SKirk McKusick 	vop_t **ops;
13079b971133SKirk McKusick 	dev_t dev;
1308df8bae1dSRodney W. Grimes 
1309df8bae1dSRodney W. Grimes 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1310dbafb366SPoul-Henning Kamp 		panic("addaliasu on non-special vnode");
13119b971133SKirk McKusick 	dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
13129b971133SKirk McKusick 	/*
13139b971133SKirk McKusick 	 * Check to see if we have a bdevvp vnode with no associated
13149b971133SKirk McKusick 	 * filesystem. If so, we want to associate the filesystem of
13159b971133SKirk McKusick 	 * the new newly instigated vnode with the bdevvp vnode and
13169b971133SKirk McKusick 	 * discard the newly created vnode rather than leaving the
13179b971133SKirk McKusick 	 * bdevvp vnode lying around with no associated filesystem.
13189b971133SKirk McKusick 	 */
13199b971133SKirk McKusick 	if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
13209b971133SKirk McKusick 		addalias(nvp, dev);
13219b971133SKirk McKusick 		return (nvp);
13229b971133SKirk McKusick 	}
13239b971133SKirk McKusick 	/*
13249b971133SKirk McKusick 	 * Discard unneeded vnode, but save its node specific data.
13259b971133SKirk McKusick 	 * Note that if there is a lock, it is carried over in the
13269b971133SKirk McKusick 	 * node specific data to the replacement vnode.
13279b971133SKirk McKusick 	 */
13289b971133SKirk McKusick 	vref(ovp);
13299b971133SKirk McKusick 	ovp->v_data = nvp->v_data;
13309b971133SKirk McKusick 	ovp->v_tag = nvp->v_tag;
13319b971133SKirk McKusick 	nvp->v_data = NULL;
13329b971133SKirk McKusick 	ops = nvp->v_op;
13339b971133SKirk McKusick 	nvp->v_op = ovp->v_op;
13349b971133SKirk McKusick 	ovp->v_op = ops;
13359b971133SKirk McKusick 	insmntque(ovp, nvp->v_mount);
13369b971133SKirk McKusick 	vrele(nvp);
13379b971133SKirk McKusick 	vgone(nvp);
13389b971133SKirk McKusick 	return (ovp);
1339df8bae1dSRodney W. Grimes }
1340155f87daSMatthew Dillon 
1341dbafb366SPoul-Henning Kamp void
1342dbafb366SPoul-Henning Kamp addalias(nvp, dev)
1343dbafb366SPoul-Henning Kamp 	struct vnode *nvp;
1344dbafb366SPoul-Henning Kamp 	dev_t dev;
1345dbafb366SPoul-Henning Kamp {
1346155f87daSMatthew Dillon 
1347dbafb366SPoul-Henning Kamp 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1348dbafb366SPoul-Henning Kamp 		panic("addalias on non-special vnode");
1349dbafb366SPoul-Henning Kamp 
1350dbafb366SPoul-Henning Kamp 	nvp->v_rdev = dev;
1351dbafb366SPoul-Henning Kamp 	simple_lock(&spechash_slock);
1352dbafb366SPoul-Henning Kamp 	SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
1353996c772fSJohn Dyson 	simple_unlock(&spechash_slock);
1354df8bae1dSRodney W. Grimes }
1355df8bae1dSRodney W. Grimes 
1356df8bae1dSRodney W. Grimes /*
1357df8bae1dSRodney W. Grimes  * Grab a particular vnode from the free list, increment its
1358e7647e6cSKris Kennaway  * reference count and lock it. The vnode lock bit is set if the
1359df8bae1dSRodney W. Grimes  * vnode is being eliminated in vgone. The process is awakened
1360df8bae1dSRodney W. Grimes  * when the transition is completed, and an error returned to
1361df8bae1dSRodney W. Grimes  * indicate that the vnode is no longer usable (possibly having
1362df8bae1dSRodney W. Grimes  * been changed to a new file system type).
1363df8bae1dSRodney W. Grimes  */
136426f9a767SRodney W. Grimes int
1365996c772fSJohn Dyson vget(vp, flags, p)
1366df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1367996c772fSJohn Dyson 	int flags;
1368996c772fSJohn Dyson 	struct proc *p;
1369df8bae1dSRodney W. Grimes {
1370996c772fSJohn Dyson 	int error;
1371df8bae1dSRodney W. Grimes 
1372df8bae1dSRodney W. Grimes 	/*
1373996c772fSJohn Dyson 	 * If the vnode is in the process of being cleaned out for
1374996c772fSJohn Dyson 	 * another use, we wait for the cleaning to finish and then
1375996c772fSJohn Dyson 	 * return failure. Cleaning is determined by checking that
1376996c772fSJohn Dyson 	 * the VXLOCK flag is set.
1377df8bae1dSRodney W. Grimes 	 */
1378996c772fSJohn Dyson 	if ((flags & LK_INTERLOCK) == 0) {
1379996c772fSJohn Dyson 		simple_lock(&vp->v_interlock);
1380996c772fSJohn Dyson 	}
1381996c772fSJohn Dyson 	if (vp->v_flag & VXLOCK) {
1382df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1383996c772fSJohn Dyson 		simple_unlock(&vp->v_interlock);
1384996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vget", 0);
1385996c772fSJohn Dyson 		return (ENOENT);
1386df8bae1dSRodney W. Grimes 	}
13872be70f79SJohn Dyson 
1388df8bae1dSRodney W. Grimes 	vp->v_usecount++;
13892be70f79SJohn Dyson 
1390a051452aSPoul-Henning Kamp 	if (VSHOULDBUSY(vp))
1391a051452aSPoul-Henning Kamp 		vbusy(vp);
1392996c772fSJohn Dyson 	if (flags & LK_TYPE_MASK) {
139364d3c7e3SJohn Dyson 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
139464d3c7e3SJohn Dyson 			/*
139564d3c7e3SJohn Dyson 			 * must expand vrele here because we do not want
139664d3c7e3SJohn Dyson 			 * to call VOP_INACTIVE if the reference count
139764d3c7e3SJohn Dyson 			 * drops back to zero since it was never really
139864d3c7e3SJohn Dyson 			 * active. We must remove it from the free list
139964d3c7e3SJohn Dyson 			 * before sleeping so that multiple processes do
140064d3c7e3SJohn Dyson 			 * not try to recycle it.
140164d3c7e3SJohn Dyson 			 */
140264d3c7e3SJohn Dyson 			simple_lock(&vp->v_interlock);
140364d3c7e3SJohn Dyson 			vp->v_usecount--;
140464d3c7e3SJohn Dyson 			if (VSHOULDFREE(vp))
140564d3c7e3SJohn Dyson 				vfree(vp);
140664d3c7e3SJohn Dyson 			simple_unlock(&vp->v_interlock);
140764d3c7e3SJohn Dyson 		}
1408996c772fSJohn Dyson 		return (error);
1409996c772fSJohn Dyson 	}
1410996c772fSJohn Dyson 	simple_unlock(&vp->v_interlock);
1411df8bae1dSRodney W. Grimes 	return (0);
1412df8bae1dSRodney W. Grimes }
1413df8bae1dSRodney W. Grimes 
1414483140eaSJohn Dyson void
1415483140eaSJohn Dyson vref(struct vnode *vp)
1416483140eaSJohn Dyson {
1417483140eaSJohn Dyson 	simple_lock(&vp->v_interlock);
1418483140eaSJohn Dyson 	vp->v_usecount++;
1419483140eaSJohn Dyson 	simple_unlock(&vp->v_interlock);
1420483140eaSJohn Dyson }
1421483140eaSJohn Dyson 
1422df8bae1dSRodney W. Grimes /*
14230d955f71SJohn Dyson  * Vnode put/release.
1424df8bae1dSRodney W. Grimes  * If count drops to zero, call inactive routine and return to freelist.
1425df8bae1dSRodney W. Grimes  */
14262be70f79SJohn Dyson void
14272be70f79SJohn Dyson vrele(vp)
1428996c772fSJohn Dyson 	struct vnode *vp;
1429df8bae1dSRodney W. Grimes {
1430996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1431df8bae1dSRodney W. Grimes 
1432219cbf59SEivind Eklund 	KASSERT(vp != NULL, ("vrele: null vp"));
1433f2a2857bSKirk McKusick 	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
14345526d2d9SEivind Eklund 
1435996c772fSJohn Dyson 	simple_lock(&vp->v_interlock);
14366476c0d2SJohn Dyson 
1437a051452aSPoul-Henning Kamp 	if (vp->v_usecount > 1) {
14382be70f79SJohn Dyson 
1439a051452aSPoul-Henning Kamp 		vp->v_usecount--;
1440fd7f690fSJohn Dyson 		simple_unlock(&vp->v_interlock);
14416476c0d2SJohn Dyson 
144295e5e988SJohn Dyson 		return;
144395e5e988SJohn Dyson 	}
144495e5e988SJohn Dyson 
144595e5e988SJohn Dyson 	if (vp->v_usecount == 1) {
1446a051452aSPoul-Henning Kamp 
14477cb22688SPoul-Henning Kamp 		vp->v_usecount--;
1448fd9d9ff1SPoul-Henning Kamp 		if (VSHOULDFREE(vp))
1449fd9d9ff1SPoul-Henning Kamp 			vfree(vp);
14500d955f71SJohn Dyson 	/*
14510d955f71SJohn Dyson 	 * If we are doing a vput, the node is already locked, and we must
14520d955f71SJohn Dyson 	 * call VOP_INACTIVE with the node locked.  So, in the case of
14530d955f71SJohn Dyson 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
14540d955f71SJohn Dyson 	 */
14552be70f79SJohn Dyson 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1456996c772fSJohn Dyson 			VOP_INACTIVE(vp, p);
14572be70f79SJohn Dyson 		}
14582be70f79SJohn Dyson 
14592be70f79SJohn Dyson 	} else {
14602be70f79SJohn Dyson #ifdef DIAGNOSTIC
14612be70f79SJohn Dyson 		vprint("vrele: negative ref count", vp);
146295e5e988SJohn Dyson 		simple_unlock(&vp->v_interlock);
14632be70f79SJohn Dyson #endif
14642be70f79SJohn Dyson 		panic("vrele: negative ref cnt");
1465fd7f690fSJohn Dyson 	}
1466df8bae1dSRodney W. Grimes }
1467df8bae1dSRodney W. Grimes 
14680d955f71SJohn Dyson void
14690d955f71SJohn Dyson vput(vp)
14700d955f71SJohn Dyson 	struct vnode *vp;
14710d955f71SJohn Dyson {
14722be70f79SJohn Dyson 	struct proc *p = curproc;	/* XXX */
14730d955f71SJohn Dyson 
14745526d2d9SEivind Eklund 	KASSERT(vp != NULL, ("vput: null vp"));
1475f2a2857bSKirk McKusick 	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
147695e5e988SJohn Dyson 
14772be70f79SJohn Dyson 	simple_lock(&vp->v_interlock);
14782be70f79SJohn Dyson 
14792be70f79SJohn Dyson 	if (vp->v_usecount > 1) {
14802be70f79SJohn Dyson 
14812be70f79SJohn Dyson 		vp->v_usecount--;
14822be70f79SJohn Dyson 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
148395e5e988SJohn Dyson 		return;
14842be70f79SJohn Dyson 
148595e5e988SJohn Dyson 	}
148695e5e988SJohn Dyson 
148795e5e988SJohn Dyson 	if (vp->v_usecount == 1) {
14882be70f79SJohn Dyson 
14892be70f79SJohn Dyson 		vp->v_usecount--;
14902be70f79SJohn Dyson 		if (VSHOULDFREE(vp))
14912be70f79SJohn Dyson 			vfree(vp);
14922be70f79SJohn Dyson 	/*
14932be70f79SJohn Dyson 	 * If we are doing a vput, the node is already locked, and we must
14942be70f79SJohn Dyson 	 * call VOP_INACTIVE with the node locked.  So, in the case of
14952be70f79SJohn Dyson 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
14962be70f79SJohn Dyson 	 */
14972be70f79SJohn Dyson 		simple_unlock(&vp->v_interlock);
14982be70f79SJohn Dyson 		VOP_INACTIVE(vp, p);
14992be70f79SJohn Dyson 
15002be70f79SJohn Dyson 	} else {
15012be70f79SJohn Dyson #ifdef DIAGNOSTIC
15022be70f79SJohn Dyson 		vprint("vput: negative ref count", vp);
15032be70f79SJohn Dyson #endif
15042be70f79SJohn Dyson 		panic("vput: negative ref cnt");
15052be70f79SJohn Dyson 	}
15060d955f71SJohn Dyson }
15070d955f71SJohn Dyson 
1508df8bae1dSRodney W. Grimes /*
1509a051452aSPoul-Henning Kamp  * Somebody doesn't want the vnode recycled.
1510df8bae1dSRodney W. Grimes  */
151126f9a767SRodney W. Grimes void
151226f9a767SRodney W. Grimes vhold(vp)
1513df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1514df8bae1dSRodney W. Grimes {
15158293f20aSTor Egge 	int s;
1516df8bae1dSRodney W. Grimes 
15178293f20aSTor Egge   	s = splbio();
1518df8bae1dSRodney W. Grimes 	vp->v_holdcnt++;
1519a051452aSPoul-Henning Kamp 	if (VSHOULDBUSY(vp))
1520a051452aSPoul-Henning Kamp 		vbusy(vp);
15218293f20aSTor Egge 	splx(s);
1522df8bae1dSRodney W. Grimes }
1523df8bae1dSRodney W. Grimes 
1524df8bae1dSRodney W. Grimes /*
1525a051452aSPoul-Henning Kamp  * One less who cares about this vnode.
1526df8bae1dSRodney W. Grimes  */
152726f9a767SRodney W. Grimes void
1528a051452aSPoul-Henning Kamp vdrop(vp)
1529df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1530df8bae1dSRodney W. Grimes {
15318293f20aSTor Egge 	int s;
1532df8bae1dSRodney W. Grimes 
15338293f20aSTor Egge 	s = splbio();
1534df8bae1dSRodney W. Grimes 	if (vp->v_holdcnt <= 0)
1535b1897c19SJulian Elischer 		panic("vdrop: holdcnt");
1536df8bae1dSRodney W. Grimes 	vp->v_holdcnt--;
1537a051452aSPoul-Henning Kamp 	if (VSHOULDFREE(vp))
1538a051452aSPoul-Henning Kamp 		vfree(vp);
15398293f20aSTor Egge 	splx(s);
1540df8bae1dSRodney W. Grimes }
1541df8bae1dSRodney W. Grimes 
1542df8bae1dSRodney W. Grimes /*
1543df8bae1dSRodney W. Grimes  * Remove any vnodes in the vnode table belonging to mount point mp.
1544df8bae1dSRodney W. Grimes  *
1545df8bae1dSRodney W. Grimes  * If MNT_NOFORCE is specified, there should not be any active ones,
1546df8bae1dSRodney W. Grimes  * return error if any are found (nb: this is a user error, not a
1547df8bae1dSRodney W. Grimes  * system error). If MNT_FORCE is specified, detach any active vnodes
1548df8bae1dSRodney W. Grimes  * that are found.
1549df8bae1dSRodney W. Grimes  */
1550df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
155127a0b398SPoul-Henning Kamp static int busyprt = 0;		/* print out busy vnodes */
15520f1adf65SBruce Evans SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1553df8bae1dSRodney W. Grimes #endif
1554df8bae1dSRodney W. Grimes 
155526f9a767SRodney W. Grimes int
1556df8bae1dSRodney W. Grimes vflush(mp, skipvp, flags)
1557df8bae1dSRodney W. Grimes 	struct mount *mp;
1558df8bae1dSRodney W. Grimes 	struct vnode *skipvp;
1559df8bae1dSRodney W. Grimes 	int flags;
1560df8bae1dSRodney W. Grimes {
1561996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1562996c772fSJohn Dyson 	struct vnode *vp, *nvp;
1563df8bae1dSRodney W. Grimes 	int busy = 0;
1564df8bae1dSRodney W. Grimes 
1565996c772fSJohn Dyson 	simple_lock(&mntvnode_slock);
1566df8bae1dSRodney W. Grimes loop:
15671b727751SPoul-Henning Kamp 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
15683d2a8cf3SDavid Greenman 		/*
15693d2a8cf3SDavid Greenman 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
15703d2a8cf3SDavid Greenman 		 * Start over if it has (it won't be on the list anymore).
15713d2a8cf3SDavid Greenman 		 */
1572df8bae1dSRodney W. Grimes 		if (vp->v_mount != mp)
1573df8bae1dSRodney W. Grimes 			goto loop;
15741b727751SPoul-Henning Kamp 		nvp = LIST_NEXT(vp, v_mntvnodes);
1575df8bae1dSRodney W. Grimes 		/*
1576df8bae1dSRodney W. Grimes 		 * Skip over a selected vnode.
1577df8bae1dSRodney W. Grimes 		 */
1578df8bae1dSRodney W. Grimes 		if (vp == skipvp)
1579df8bae1dSRodney W. Grimes 			continue;
1580996c772fSJohn Dyson 
1581996c772fSJohn Dyson 		simple_lock(&vp->v_interlock);
1582df8bae1dSRodney W. Grimes 		/*
1583df8bae1dSRodney W. Grimes 		 * Skip over a vnodes marked VSYSTEM.
1584df8bae1dSRodney W. Grimes 		 */
1585996c772fSJohn Dyson 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1586996c772fSJohn Dyson 			simple_unlock(&vp->v_interlock);
1587df8bae1dSRodney W. Grimes 			continue;
1588996c772fSJohn Dyson 		}
1589df8bae1dSRodney W. Grimes 		/*
15900d94caffSDavid Greenman 		 * If WRITECLOSE is set, only flush out regular file vnodes
15910d94caffSDavid Greenman 		 * open for writing.
1592df8bae1dSRodney W. Grimes 		 */
1593df8bae1dSRodney W. Grimes 		if ((flags & WRITECLOSE) &&
1594996c772fSJohn Dyson 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1595996c772fSJohn Dyson 			simple_unlock(&vp->v_interlock);
1596df8bae1dSRodney W. Grimes 			continue;
1597996c772fSJohn Dyson 		}
15986476c0d2SJohn Dyson 
1599df8bae1dSRodney W. Grimes 		/*
16000d94caffSDavid Greenman 		 * With v_usecount == 0, all we need to do is clear out the
16010d94caffSDavid Greenman 		 * vnode data structures and we are done.
1602df8bae1dSRodney W. Grimes 		 */
1603df8bae1dSRodney W. Grimes 		if (vp->v_usecount == 0) {
1604996c772fSJohn Dyson 			simple_unlock(&mntvnode_slock);
1605996c772fSJohn Dyson 			vgonel(vp, p);
1606996c772fSJohn Dyson 			simple_lock(&mntvnode_slock);
1607df8bae1dSRodney W. Grimes 			continue;
1608df8bae1dSRodney W. Grimes 		}
1609ad980522SJohn Dyson 
1610df8bae1dSRodney W. Grimes 		/*
16110d94caffSDavid Greenman 		 * If FORCECLOSE is set, forcibly close the vnode. For block
16120d94caffSDavid Greenman 		 * or character devices, revert to an anonymous device. For
16130d94caffSDavid Greenman 		 * all other files, just kill them.
1614df8bae1dSRodney W. Grimes 		 */
1615df8bae1dSRodney W. Grimes 		if (flags & FORCECLOSE) {
1616996c772fSJohn Dyson 			simple_unlock(&mntvnode_slock);
1617df8bae1dSRodney W. Grimes 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1618996c772fSJohn Dyson 				vgonel(vp, p);
1619df8bae1dSRodney W. Grimes 			} else {
1620996c772fSJohn Dyson 				vclean(vp, 0, p);
1621df8bae1dSRodney W. Grimes 				vp->v_op = spec_vnodeop_p;
1622df8bae1dSRodney W. Grimes 				insmntque(vp, (struct mount *) 0);
1623df8bae1dSRodney W. Grimes 			}
1624996c772fSJohn Dyson 			simple_lock(&mntvnode_slock);
1625df8bae1dSRodney W. Grimes 			continue;
1626df8bae1dSRodney W. Grimes 		}
1627df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1628df8bae1dSRodney W. Grimes 		if (busyprt)
1629df8bae1dSRodney W. Grimes 			vprint("vflush: busy vnode", vp);
1630df8bae1dSRodney W. Grimes #endif
1631996c772fSJohn Dyson 		simple_unlock(&vp->v_interlock);
1632df8bae1dSRodney W. Grimes 		busy++;
1633df8bae1dSRodney W. Grimes 	}
1634996c772fSJohn Dyson 	simple_unlock(&mntvnode_slock);
1635df8bae1dSRodney W. Grimes 	if (busy)
1636df8bae1dSRodney W. Grimes 		return (EBUSY);
1637df8bae1dSRodney W. Grimes 	return (0);
1638df8bae1dSRodney W. Grimes }
1639df8bae1dSRodney W. Grimes 
1640df8bae1dSRodney W. Grimes /*
1641df8bae1dSRodney W. Grimes  * Disassociate the underlying file system from a vnode.
1642df8bae1dSRodney W. Grimes  */
1643996c772fSJohn Dyson static void
1644514ede09SBruce Evans vclean(vp, flags, p)
1645514ede09SBruce Evans 	struct vnode *vp;
1646514ede09SBruce Evans 	int flags;
1647514ede09SBruce Evans 	struct proc *p;
1648df8bae1dSRodney W. Grimes {
164995e5e988SJohn Dyson 	int active;
1650df8bae1dSRodney W. Grimes 
1651df8bae1dSRodney W. Grimes 	/*
16520d94caffSDavid Greenman 	 * Check to see if the vnode is in use. If so we have to reference it
16530d94caffSDavid Greenman 	 * before we clean it out so that its count cannot fall to zero and
16540d94caffSDavid Greenman 	 * generate a race against ourselves to recycle it.
1655df8bae1dSRodney W. Grimes 	 */
1656bb56ec4aSPoul-Henning Kamp 	if ((active = vp->v_usecount))
1657996c772fSJohn Dyson 		vp->v_usecount++;
165895e5e988SJohn Dyson 
1659df8bae1dSRodney W. Grimes 	/*
16600d94caffSDavid Greenman 	 * Prevent the vnode from being recycled or brought into use while we
16610d94caffSDavid Greenman 	 * clean it out.
1662df8bae1dSRodney W. Grimes 	 */
1663df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK)
1664df8bae1dSRodney W. Grimes 		panic("vclean: deadlock");
1665df8bae1dSRodney W. Grimes 	vp->v_flag |= VXLOCK;
1666df8bae1dSRodney W. Grimes 	/*
1667996c772fSJohn Dyson 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1668996c772fSJohn Dyson 	 * have the object locked while it cleans it out. The VOP_LOCK
1669996c772fSJohn Dyson 	 * ensures that the VOP_INACTIVE routine is done with its work.
1670996c772fSJohn Dyson 	 * For active vnodes, it ensures that no other activity can
1671996c772fSJohn Dyson 	 * occur while the underlying object is being cleaned out.
1672996c772fSJohn Dyson 	 */
1673996c772fSJohn Dyson 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
16743c631446SJohn Dyson 
1675996c772fSJohn Dyson 	/*
1676df8bae1dSRodney W. Grimes 	 * Clean out any buffers associated with the vnode.
167737642196SKirk McKusick 	 * If the flush fails, just toss the buffers.
1678df8bae1dSRodney W. Grimes 	 */
167937642196SKirk McKusick 	if (flags & DOCLOSE) {
1680f2a2857bSKirk McKusick 		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
16819b971133SKirk McKusick 			(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
168237642196SKirk McKusick 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
168337642196SKirk McKusick 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
168437642196SKirk McKusick 	}
168537642196SKirk McKusick 
16869ff5ce6bSBoris Popov 	VOP_DESTROYVOBJECT(vp);
16873c631446SJohn Dyson 
1688df8bae1dSRodney W. Grimes 	/*
1689996c772fSJohn Dyson 	 * If purging an active vnode, it must be closed and
1690996c772fSJohn Dyson 	 * deactivated before being reclaimed. Note that the
1691996c772fSJohn Dyson 	 * VOP_INACTIVE will unlock the vnode.
1692df8bae1dSRodney W. Grimes 	 */
1693df8bae1dSRodney W. Grimes 	if (active) {
1694df8bae1dSRodney W. Grimes 		if (flags & DOCLOSE)
16954d948813SBruce Evans 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1696996c772fSJohn Dyson 		VOP_INACTIVE(vp, p);
1697996c772fSJohn Dyson 	} else {
1698996c772fSJohn Dyson 		/*
1699996c772fSJohn Dyson 		 * Any other processes trying to obtain this lock must first
1700996c772fSJohn Dyson 		 * wait for VXLOCK to clear, then call the new lock operation.
1701996c772fSJohn Dyson 		 */
1702996c772fSJohn Dyson 		VOP_UNLOCK(vp, 0, p);
1703df8bae1dSRodney W. Grimes 	}
1704df8bae1dSRodney W. Grimes 	/*
1705df8bae1dSRodney W. Grimes 	 * Reclaim the vnode.
1706df8bae1dSRodney W. Grimes 	 */
1707996c772fSJohn Dyson 	if (VOP_RECLAIM(vp, p))
1708df8bae1dSRodney W. Grimes 		panic("vclean: cannot reclaim");
170964d3c7e3SJohn Dyson 
17109a2b8fcaSRobert Watson 	if (active) {
17119a2b8fcaSRobert Watson 		/*
17129a2b8fcaSRobert Watson 		 * Inline copy of vrele() since VOP_INACTIVE
17139a2b8fcaSRobert Watson 		 * has already been called.
17149a2b8fcaSRobert Watson 		 */
17159a2b8fcaSRobert Watson 		simple_lock(&vp->v_interlock);
17169a2b8fcaSRobert Watson 		if (--vp->v_usecount <= 0) {
17179a2b8fcaSRobert Watson #ifdef DIAGNOSTIC
17189a2b8fcaSRobert Watson 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
17199a2b8fcaSRobert Watson 				vprint("vclean: bad ref count", vp);
17209a2b8fcaSRobert Watson 				panic("vclean: ref cnt");
17219a2b8fcaSRobert Watson 			}
17229a2b8fcaSRobert Watson #endif
17239a2b8fcaSRobert Watson 			vfree(vp);
17249a2b8fcaSRobert Watson 		}
17259a2b8fcaSRobert Watson 		simple_unlock(&vp->v_interlock);
17269a2b8fcaSRobert Watson 	}
172764d3c7e3SJohn Dyson 
1728996c772fSJohn Dyson 	cache_purge(vp);
1729996c772fSJohn Dyson 	if (vp->v_vnlock) {
1730996c772fSJohn Dyson 		FREE(vp->v_vnlock, M_VNODE);
1731996c772fSJohn Dyson 		vp->v_vnlock = NULL;
1732996c772fSJohn Dyson 	}
1733df8bae1dSRodney W. Grimes 
173464d3c7e3SJohn Dyson 	if (VSHOULDFREE(vp))
173564d3c7e3SJohn Dyson 		vfree(vp);
173664d3c7e3SJohn Dyson 
1737df8bae1dSRodney W. Grimes 	/*
1738df8bae1dSRodney W. Grimes 	 * Done with purge, notify sleepers of the grim news.
1739df8bae1dSRodney W. Grimes 	 */
1740df8bae1dSRodney W. Grimes 	vp->v_op = dead_vnodeop_p;
17411cbbd625SGarrett Wollman 	vn_pollgone(vp);
1742df8bae1dSRodney W. Grimes 	vp->v_tag = VT_NON;
1743df8bae1dSRodney W. Grimes 	vp->v_flag &= ~VXLOCK;
1744df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXWANT) {
1745df8bae1dSRodney W. Grimes 		vp->v_flag &= ~VXWANT;
1746df8bae1dSRodney W. Grimes 		wakeup((caddr_t) vp);
1747df8bae1dSRodney W. Grimes 	}
1748df8bae1dSRodney W. Grimes }
1749df8bae1dSRodney W. Grimes 
1750df8bae1dSRodney W. Grimes /*
1751df8bae1dSRodney W. Grimes  * Eliminate all activity associated with the requested vnode
1752df8bae1dSRodney W. Grimes  * and with all vnodes aliased to the requested vnode.
1753df8bae1dSRodney W. Grimes  */
1754996c772fSJohn Dyson int
1755996c772fSJohn Dyson vop_revoke(ap)
1756996c772fSJohn Dyson 	struct vop_revoke_args /* {
1757996c772fSJohn Dyson 		struct vnode *a_vp;
1758996c772fSJohn Dyson 		int a_flags;
1759996c772fSJohn Dyson 	} */ *ap;
1760df8bae1dSRodney W. Grimes {
1761996c772fSJohn Dyson 	struct vnode *vp, *vq;
1762dbafb366SPoul-Henning Kamp 	dev_t dev;
1763996c772fSJohn Dyson 
17645526d2d9SEivind Eklund 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
1765996c772fSJohn Dyson 
1766996c772fSJohn Dyson 	vp = ap->a_vp;
1767df8bae1dSRodney W. Grimes 	/*
1768996c772fSJohn Dyson 	 * If a vgone (or vclean) is already in progress,
1769996c772fSJohn Dyson 	 * wait until it is done and return.
1770df8bae1dSRodney W. Grimes 	 */
1771df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK) {
1772df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1773996c772fSJohn Dyson 		simple_unlock(&vp->v_interlock);
1774996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1775996c772fSJohn Dyson 		return (0);
1776df8bae1dSRodney W. Grimes 	}
1777dbafb366SPoul-Henning Kamp 	dev = vp->v_rdev;
1778dbafb366SPoul-Henning Kamp 	for (;;) {
1779996c772fSJohn Dyson 		simple_lock(&spechash_slock);
1780dbafb366SPoul-Henning Kamp 		vq = SLIST_FIRST(&dev->si_hlist);
1781996c772fSJohn Dyson 		simple_unlock(&spechash_slock);
1782dbafb366SPoul-Henning Kamp 		if (!vq)
1783df8bae1dSRodney W. Grimes 			break;
1784dbafb366SPoul-Henning Kamp 		vgone(vq);
1785df8bae1dSRodney W. Grimes 	}
1786996c772fSJohn Dyson 	return (0);
1787996c772fSJohn Dyson }
1788996c772fSJohn Dyson 
1789996c772fSJohn Dyson /*
1790996c772fSJohn Dyson  * Recycle an unused vnode to the front of the free list.
1791996c772fSJohn Dyson  * Release the passed interlock if the vnode will be recycled.
1792996c772fSJohn Dyson  */
1793996c772fSJohn Dyson int
1794996c772fSJohn Dyson vrecycle(vp, inter_lkp, p)
1795996c772fSJohn Dyson 	struct vnode *vp;
1796996c772fSJohn Dyson 	struct simplelock *inter_lkp;
1797996c772fSJohn Dyson 	struct proc *p;
1798996c772fSJohn Dyson {
1799996c772fSJohn Dyson 
1800996c772fSJohn Dyson 	simple_lock(&vp->v_interlock);
1801996c772fSJohn Dyson 	if (vp->v_usecount == 0) {
1802996c772fSJohn Dyson 		if (inter_lkp) {
1803996c772fSJohn Dyson 			simple_unlock(inter_lkp);
1804996c772fSJohn Dyson 		}
1805996c772fSJohn Dyson 		vgonel(vp, p);
1806996c772fSJohn Dyson 		return (1);
1807996c772fSJohn Dyson 	}
1808996c772fSJohn Dyson 	simple_unlock(&vp->v_interlock);
1809996c772fSJohn Dyson 	return (0);
1810df8bae1dSRodney W. Grimes }
1811df8bae1dSRodney W. Grimes 
1812df8bae1dSRodney W. Grimes /*
1813df8bae1dSRodney W. Grimes  * Eliminate all activity associated with a vnode
1814df8bae1dSRodney W. Grimes  * in preparation for reuse.
1815df8bae1dSRodney W. Grimes  */
181626f9a767SRodney W. Grimes void
181726f9a767SRodney W. Grimes vgone(vp)
1818df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1819df8bae1dSRodney W. Grimes {
1820996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1821996c772fSJohn Dyson 
1822996c772fSJohn Dyson 	simple_lock(&vp->v_interlock);
1823996c772fSJohn Dyson 	vgonel(vp, p);
1824996c772fSJohn Dyson }
1825996c772fSJohn Dyson 
1826996c772fSJohn Dyson /*
1827996c772fSJohn Dyson  * vgone, with the vp interlock held.
1828996c772fSJohn Dyson  */
1829b7a5f3caSRobert Watson void
1830996c772fSJohn Dyson vgonel(vp, p)
1831996c772fSJohn Dyson 	struct vnode *vp;
1832996c772fSJohn Dyson 	struct proc *p;
1833996c772fSJohn Dyson {
1834925a3a41SJohn Dyson 	int s;
1835df8bae1dSRodney W. Grimes 
1836df8bae1dSRodney W. Grimes 	/*
1837996c772fSJohn Dyson 	 * If a vgone (or vclean) is already in progress,
1838996c772fSJohn Dyson 	 * wait until it is done and return.
1839df8bae1dSRodney W. Grimes 	 */
1840df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK) {
1841df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1842996c772fSJohn Dyson 		simple_unlock(&vp->v_interlock);
1843996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1844df8bae1dSRodney W. Grimes 		return;
1845df8bae1dSRodney W. Grimes 	}
1846ad980522SJohn Dyson 
1847df8bae1dSRodney W. Grimes 	/*
1848df8bae1dSRodney W. Grimes 	 * Clean out the filesystem specific data.
1849df8bae1dSRodney W. Grimes 	 */
1850996c772fSJohn Dyson 	vclean(vp, DOCLOSE, p);
18512d8acc0fSJohn Dyson 	simple_lock(&vp->v_interlock);
18522be70f79SJohn Dyson 
1853df8bae1dSRodney W. Grimes 	/*
1854df8bae1dSRodney W. Grimes 	 * Delete from old mount point vnode list, if on one.
1855df8bae1dSRodney W. Grimes 	 */
1856996c772fSJohn Dyson 	if (vp->v_mount != NULL)
1857996c772fSJohn Dyson 		insmntque(vp, (struct mount *)0);
1858df8bae1dSRodney W. Grimes 	/*
1859996c772fSJohn Dyson 	 * If special device, remove it from special device alias list
1860996c772fSJohn Dyson 	 * if it is on one.
1861df8bae1dSRodney W. Grimes 	 */
1862dbafb366SPoul-Henning Kamp 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) {
1863996c772fSJohn Dyson 		simple_lock(&spechash_slock);
1864b0d17ba6SPoul-Henning Kamp 		SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext);
1865d137acccSPoul-Henning Kamp 		freedev(vp->v_rdev);
1866996c772fSJohn Dyson 		simple_unlock(&spechash_slock);
18674d4f9323SPoul-Henning Kamp 		vp->v_rdev = NULL;
1868df8bae1dSRodney W. Grimes 	}
1869996c772fSJohn Dyson 
1870df8bae1dSRodney W. Grimes 	/*
1871996c772fSJohn Dyson 	 * If it is on the freelist and not already at the head,
1872c904bbbdSKirk McKusick 	 * move it to the head of the list. The test of the
1873c904bbbdSKirk McKusick 	 * VDOOMED flag and the reference count of zero is because
1874996c772fSJohn Dyson 	 * it will be removed from the free list by getnewvnode,
1875996c772fSJohn Dyson 	 * but will not have its reference count incremented until
1876996c772fSJohn Dyson 	 * after calling vgone. If the reference count were
1877996c772fSJohn Dyson 	 * incremented first, vgone would (incorrectly) try to
1878996c772fSJohn Dyson 	 * close the previous instance of the underlying object.
1879df8bae1dSRodney W. Grimes 	 */
1880a051452aSPoul-Henning Kamp 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1881925a3a41SJohn Dyson 		s = splbio();
1882996c772fSJohn Dyson 		simple_lock(&vnode_free_list_slock);
1883c904bbbdSKirk McKusick 		if (vp->v_flag & VFREE)
1884df8bae1dSRodney W. Grimes 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1885c904bbbdSKirk McKusick 		else
1886d09a16d8STor Egge 			freevnodes++;
1887925a3a41SJohn Dyson 		vp->v_flag |= VFREE;
1888df8bae1dSRodney W. Grimes 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1889996c772fSJohn Dyson 		simple_unlock(&vnode_free_list_slock);
1890925a3a41SJohn Dyson 		splx(s);
18910082fb46SJordan K. Hubbard 	}
1892996c772fSJohn Dyson 
1893df8bae1dSRodney W. Grimes 	vp->v_type = VBAD;
189495e5e988SJohn Dyson 	simple_unlock(&vp->v_interlock);
1895df8bae1dSRodney W. Grimes }
1896df8bae1dSRodney W. Grimes 
1897df8bae1dSRodney W. Grimes /*
1898df8bae1dSRodney W. Grimes  * Lookup a vnode by device number.
1899df8bae1dSRodney W. Grimes  */
190026f9a767SRodney W. Grimes int
1901df8bae1dSRodney W. Grimes vfinddev(dev, type, vpp)
1902df8bae1dSRodney W. Grimes 	dev_t dev;
1903df8bae1dSRodney W. Grimes 	enum vtype type;
1904df8bae1dSRodney W. Grimes 	struct vnode **vpp;
1905df8bae1dSRodney W. Grimes {
1906dbafb366SPoul-Henning Kamp 	struct vnode *vp;
1907df8bae1dSRodney W. Grimes 
1908b98afd0dSBruce Evans 	simple_lock(&spechash_slock);
1909dbafb366SPoul-Henning Kamp 	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
1910dbafb366SPoul-Henning Kamp 		if (type == vp->v_type) {
1911df8bae1dSRodney W. Grimes 			*vpp = vp;
1912dbafb366SPoul-Henning Kamp 			simple_unlock(&spechash_slock);
1913dbafb366SPoul-Henning Kamp 			return (1);
1914dbafb366SPoul-Henning Kamp 		}
1915df8bae1dSRodney W. Grimes 	}
1916b98afd0dSBruce Evans 	simple_unlock(&spechash_slock);
1917dbafb366SPoul-Henning Kamp 	return (0);
1918df8bae1dSRodney W. Grimes }
1919df8bae1dSRodney W. Grimes 
1920df8bae1dSRodney W. Grimes /*
1921df8bae1dSRodney W. Grimes  * Calculate the total number of references to a special device.
1922df8bae1dSRodney W. Grimes  */
192326f9a767SRodney W. Grimes int
1924df8bae1dSRodney W. Grimes vcount(vp)
1925dbafb366SPoul-Henning Kamp 	struct vnode *vp;
1926df8bae1dSRodney W. Grimes {
192796267288SPoul-Henning Kamp 	struct vnode *vq;
1928df8bae1dSRodney W. Grimes 	int count;
1929df8bae1dSRodney W. Grimes 
1930dbafb366SPoul-Henning Kamp 	count = 0;
1931b98afd0dSBruce Evans 	simple_lock(&spechash_slock);
1932b0d17ba6SPoul-Henning Kamp 	SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext)
1933df8bae1dSRodney W. Grimes 		count += vq->v_usecount;
1934b98afd0dSBruce Evans 	simple_unlock(&spechash_slock);
1935df8bae1dSRodney W. Grimes 	return (count);
1936df8bae1dSRodney W. Grimes }
1937dbafb366SPoul-Henning Kamp 
19387fab7799SPeter Wemm /*
1939e8359a57SSøren Schmidt  * Same as above, but using the dev_t as argument
1940e8359a57SSøren Schmidt  */
1941e8359a57SSøren Schmidt 
1942e8359a57SSøren Schmidt int
1943e8359a57SSøren Schmidt count_dev(dev)
1944e8359a57SSøren Schmidt 	dev_t dev;
1945e8359a57SSøren Schmidt {
1946e8359a57SSøren Schmidt 	struct vnode *vp;
1947e8359a57SSøren Schmidt 
1948e8359a57SSøren Schmidt 	vp = SLIST_FIRST(&dev->si_hlist);
1949e8359a57SSøren Schmidt 	if (vp == NULL)
1950e8359a57SSøren Schmidt 		return (0);
1951e8359a57SSøren Schmidt 	return(vcount(vp));
1952e8359a57SSøren Schmidt }
1953e8359a57SSøren Schmidt 
1954e8359a57SSøren Schmidt /*
1955df8bae1dSRodney W. Grimes  * Print out a description of a vnode.
1956df8bae1dSRodney W. Grimes  */
1957df8bae1dSRodney W. Grimes static char *typename[] =
1958df8bae1dSRodney W. Grimes {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1959df8bae1dSRodney W. Grimes 
196026f9a767SRodney W. Grimes void
1961df8bae1dSRodney W. Grimes vprint(label, vp)
1962df8bae1dSRodney W. Grimes 	char *label;
1963dbafb366SPoul-Henning Kamp 	struct vnode *vp;
1964df8bae1dSRodney W. Grimes {
19652127f260SArchie Cobbs 	char buf[96];
1966df8bae1dSRodney W. Grimes 
1967df8bae1dSRodney W. Grimes 	if (label != NULL)
1968ac1e407bSBruce Evans 		printf("%s: %p: ", label, (void *)vp);
1969de15ef6aSDoug Rabson 	else
1970ac1e407bSBruce Evans 		printf("%p: ", (void *)vp);
1971ac1e407bSBruce Evans 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1972df8bae1dSRodney W. Grimes 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1973df8bae1dSRodney W. Grimes 	    vp->v_holdcnt);
1974df8bae1dSRodney W. Grimes 	buf[0] = '\0';
1975df8bae1dSRodney W. Grimes 	if (vp->v_flag & VROOT)
1976df8bae1dSRodney W. Grimes 		strcat(buf, "|VROOT");
1977df8bae1dSRodney W. Grimes 	if (vp->v_flag & VTEXT)
1978df8bae1dSRodney W. Grimes 		strcat(buf, "|VTEXT");
1979df8bae1dSRodney W. Grimes 	if (vp->v_flag & VSYSTEM)
1980df8bae1dSRodney W. Grimes 		strcat(buf, "|VSYSTEM");
1981df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK)
1982df8bae1dSRodney W. Grimes 		strcat(buf, "|VXLOCK");
1983df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXWANT)
1984df8bae1dSRodney W. Grimes 		strcat(buf, "|VXWANT");
1985df8bae1dSRodney W. Grimes 	if (vp->v_flag & VBWAIT)
1986df8bae1dSRodney W. Grimes 		strcat(buf, "|VBWAIT");
1987a051452aSPoul-Henning Kamp 	if (vp->v_flag & VDOOMED)
1988a051452aSPoul-Henning Kamp 		strcat(buf, "|VDOOMED");
1989a051452aSPoul-Henning Kamp 	if (vp->v_flag & VFREE)
1990a051452aSPoul-Henning Kamp 		strcat(buf, "|VFREE");
199195e5e988SJohn Dyson 	if (vp->v_flag & VOBJBUF)
199295e5e988SJohn Dyson 		strcat(buf, "|VOBJBUF");
1993df8bae1dSRodney W. Grimes 	if (buf[0] != '\0')
1994df8bae1dSRodney W. Grimes 		printf(" flags (%s)", &buf[1]);
1995df8bae1dSRodney W. Grimes 	if (vp->v_data == NULL) {
1996df8bae1dSRodney W. Grimes 		printf("\n");
1997df8bae1dSRodney W. Grimes 	} else {
1998df8bae1dSRodney W. Grimes 		printf("\n\t");
1999df8bae1dSRodney W. Grimes 		VOP_PRINT(vp);
2000df8bae1dSRodney W. Grimes 	}
2001df8bae1dSRodney W. Grimes }
2002df8bae1dSRodney W. Grimes 
20031a477b0cSDavid Greenman #ifdef DDB
2004f5ef029eSPoul-Henning Kamp #include <ddb/ddb.h>
2005df8bae1dSRodney W. Grimes /*
2006df8bae1dSRodney W. Grimes  * List all of the locked vnodes in the system.
2007df8bae1dSRodney W. Grimes  * Called when debugging the kernel.
2008df8bae1dSRodney W. Grimes  */
2009f5ef029eSPoul-Henning Kamp DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
2010df8bae1dSRodney W. Grimes {
2011c35e283aSBruce Evans 	struct proc *p = curproc;	/* XXX */
2012c35e283aSBruce Evans 	struct mount *mp, *nmp;
2013c35e283aSBruce Evans 	struct vnode *vp;
2014df8bae1dSRodney W. Grimes 
2015df8bae1dSRodney W. Grimes 	printf("Locked vnodes\n");
2016c35e283aSBruce Evans 	simple_lock(&mountlist_slock);
20170429e37aSPoul-Henning Kamp 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
2018c35e283aSBruce Evans 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
20190429e37aSPoul-Henning Kamp 			nmp = TAILQ_NEXT(mp, mnt_list);
2020c35e283aSBruce Evans 			continue;
2021c35e283aSBruce Evans 		}
20221b727751SPoul-Henning Kamp 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
20236bdfe06aSEivind Eklund 			if (VOP_ISLOCKED(vp, NULL))
2024df8bae1dSRodney W. Grimes 				vprint((char *)0, vp);
2025df8bae1dSRodney W. Grimes 		}
2026c35e283aSBruce Evans 		simple_lock(&mountlist_slock);
20270429e37aSPoul-Henning Kamp 		nmp = TAILQ_NEXT(mp, mnt_list);
2028c35e283aSBruce Evans 		vfs_unbusy(mp, p);
2029c35e283aSBruce Evans 	}
2030c35e283aSBruce Evans 	simple_unlock(&mountlist_slock);
2031df8bae1dSRodney W. Grimes }
2032df8bae1dSRodney W. Grimes #endif
2033df8bae1dSRodney W. Grimes 
20343a76a594SBruce Evans /*
20353a76a594SBruce Evans  * Top level filesystem related information gathering.
20363a76a594SBruce Evans  */
203782d9ae4eSPoul-Henning Kamp static int	sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
20383a76a594SBruce Evans 
20394a8b9660SBruce Evans static int
204082d9ae4eSPoul-Henning Kamp vfs_sysctl(SYSCTL_HANDLER_ARGS)
2041a896f025SBruce Evans {
20424a8b9660SBruce Evans 	int *name = (int *)arg1 - 1;	/* XXX */
20434a8b9660SBruce Evans 	u_int namelen = arg2 + 1;	/* XXX */
2044a896f025SBruce Evans 	struct vfsconf *vfsp;
2045a896f025SBruce Evans 
2046f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2)
20473a76a594SBruce Evans 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
20484a8b9660SBruce Evans 	if (namelen == 1)
20493a76a594SBruce Evans 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
2050dc91a89eSBruce Evans #endif
2051a896f025SBruce Evans 
20524a8b9660SBruce Evans #ifdef notyet
20533a76a594SBruce Evans 	/* all sysctl names at this level are at least name and field */
20543a76a594SBruce Evans 	if (namelen < 2)
20553a76a594SBruce Evans 		return (ENOTDIR);		/* overloaded */
20563a76a594SBruce Evans 	if (name[0] != VFS_GENERIC) {
20573a76a594SBruce Evans 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
20583a76a594SBruce Evans 			if (vfsp->vfc_typenum == name[0])
20593a76a594SBruce Evans 				break;
20603a76a594SBruce Evans 		if (vfsp == NULL)
20613a76a594SBruce Evans 			return (EOPNOTSUPP);
20623a76a594SBruce Evans 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
20633a76a594SBruce Evans 		    oldp, oldlenp, newp, newlen, p));
20643a76a594SBruce Evans 	}
20654a8b9660SBruce Evans #endif
20663a76a594SBruce Evans 	switch (name[1]) {
20673a76a594SBruce Evans 	case VFS_MAXTYPENUM:
20683a76a594SBruce Evans 		if (namelen != 2)
20693a76a594SBruce Evans 			return (ENOTDIR);
20703a76a594SBruce Evans 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
20713a76a594SBruce Evans 	case VFS_CONF:
20723a76a594SBruce Evans 		if (namelen != 3)
20733a76a594SBruce Evans 			return (ENOTDIR);	/* overloaded */
20743a76a594SBruce Evans 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
20753a76a594SBruce Evans 			if (vfsp->vfc_typenum == name[2])
20763a76a594SBruce Evans 				break;
20773a76a594SBruce Evans 		if (vfsp == NULL)
20783a76a594SBruce Evans 			return (EOPNOTSUPP);
20793a76a594SBruce Evans 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
20803a76a594SBruce Evans 	}
20813a76a594SBruce Evans 	return (EOPNOTSUPP);
20823a76a594SBruce Evans }
20833a76a594SBruce Evans 
20844a8b9660SBruce Evans SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
20854a8b9660SBruce Evans 	"Generic filesystem");
20864a8b9660SBruce Evans 
2087f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2)
2088a896f025SBruce Evans 
2089a896f025SBruce Evans static int
209082d9ae4eSPoul-Henning Kamp sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
2091a896f025SBruce Evans {
2092a896f025SBruce Evans 	int error;
2093a896f025SBruce Evans 	struct vfsconf *vfsp;
2094a896f025SBruce Evans 	struct ovfsconf ovfs;
20953a76a594SBruce Evans 
20963a76a594SBruce Evans 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
2097a896f025SBruce Evans 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
2098a896f025SBruce Evans 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
2099a896f025SBruce Evans 		ovfs.vfc_index = vfsp->vfc_typenum;
2100a896f025SBruce Evans 		ovfs.vfc_refcount = vfsp->vfc_refcount;
2101a896f025SBruce Evans 		ovfs.vfc_flags = vfsp->vfc_flags;
2102a896f025SBruce Evans 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
2103a896f025SBruce Evans 		if (error)
2104a896f025SBruce Evans 			return error;
2105a896f025SBruce Evans 	}
2106a896f025SBruce Evans 	return 0;
2107a896f025SBruce Evans }
2108a896f025SBruce Evans 
2109f5ce6752SBruce Evans #endif /* 1 || COMPAT_PRELITE2 */
2110a896f025SBruce Evans 
21114a11ca4eSPoul-Henning Kamp #if 0
2112df8bae1dSRodney W. Grimes #define KINFO_VNODESLOP	10
2113df8bae1dSRodney W. Grimes /*
2114df8bae1dSRodney W. Grimes  * Dump vnode list (via sysctl).
2115df8bae1dSRodney W. Grimes  * Copyout address of vnode followed by vnode.
2116df8bae1dSRodney W. Grimes  */
2117df8bae1dSRodney W. Grimes /* ARGSUSED */
21184b2af45fSPoul-Henning Kamp static int
211982d9ae4eSPoul-Henning Kamp sysctl_vnode(SYSCTL_HANDLER_ARGS)
2120df8bae1dSRodney W. Grimes {
2121996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
2122c35e283aSBruce Evans 	struct mount *mp, *nmp;
2123c35e283aSBruce Evans 	struct vnode *nvp, *vp;
2124df8bae1dSRodney W. Grimes 	int error;
2125df8bae1dSRodney W. Grimes 
2126df8bae1dSRodney W. Grimes #define VPTRSZ	sizeof (struct vnode *)
2127df8bae1dSRodney W. Grimes #define VNODESZ	sizeof (struct vnode)
21284b2af45fSPoul-Henning Kamp 
21294b2af45fSPoul-Henning Kamp 	req->lock = 0;
21302d0b1d70SPoul-Henning Kamp 	if (!req->oldptr) /* Make an estimate */
21314b2af45fSPoul-Henning Kamp 		return (SYSCTL_OUT(req, 0,
21324b2af45fSPoul-Henning Kamp 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
2133df8bae1dSRodney W. Grimes 
2134c35e283aSBruce Evans 	simple_lock(&mountlist_slock);
21350429e37aSPoul-Henning Kamp 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
2136c35e283aSBruce Evans 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
21370429e37aSPoul-Henning Kamp 			nmp = TAILQ_NEXT(mp, mnt_list);
2138df8bae1dSRodney W. Grimes 			continue;
2139c35e283aSBruce Evans 		}
2140df8bae1dSRodney W. Grimes again:
2141c35e283aSBruce Evans 		simple_lock(&mntvnode_slock);
21421b727751SPoul-Henning Kamp 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2143df8bae1dSRodney W. Grimes 		     vp != NULL;
2144c35e283aSBruce Evans 		     vp = nvp) {
2145df8bae1dSRodney W. Grimes 			/*
2146c35e283aSBruce Evans 			 * Check that the vp is still associated with
2147c35e283aSBruce Evans 			 * this filesystem.  RACE: could have been
2148c35e283aSBruce Evans 			 * recycled onto the same filesystem.
2149df8bae1dSRodney W. Grimes 			 */
2150df8bae1dSRodney W. Grimes 			if (vp->v_mount != mp) {
2151c35e283aSBruce Evans 				simple_unlock(&mntvnode_slock);
2152df8bae1dSRodney W. Grimes 				goto again;
2153df8bae1dSRodney W. Grimes 			}
21541b727751SPoul-Henning Kamp 			nvp = LIST_NEXT(vp, v_mntvnodes);
2155c35e283aSBruce Evans 			simple_unlock(&mntvnode_slock);
21564b2af45fSPoul-Henning Kamp 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
2157c35e283aSBruce Evans 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
2158df8bae1dSRodney W. Grimes 				return (error);
2159c35e283aSBruce Evans 			simple_lock(&mntvnode_slock);
2160e887950aSBruce Evans 		}
2161c35e283aSBruce Evans 		simple_unlock(&mntvnode_slock);
2162c35e283aSBruce Evans 		simple_lock(&mountlist_slock);
21630429e37aSPoul-Henning Kamp 		nmp = TAILQ_NEXT(mp, mnt_list);
2164996c772fSJohn Dyson 		vfs_unbusy(mp, p);
2165df8bae1dSRodney W. Grimes 	}
2166c35e283aSBruce Evans 	simple_unlock(&mountlist_slock);
2167df8bae1dSRodney W. Grimes 
2168df8bae1dSRodney W. Grimes 	return (0);
2169df8bae1dSRodney W. Grimes }
21704a11ca4eSPoul-Henning Kamp #endif
2171df8bae1dSRodney W. Grimes 
21722e58c0f8SDavid Greenman /*
21732e58c0f8SDavid Greenman  * XXX
21742e58c0f8SDavid Greenman  * Exporting the vnode list on large systems causes them to crash.
21752e58c0f8SDavid Greenman  * Exporting the vnode list on medium systems causes sysctl to coredump.
21762e58c0f8SDavid Greenman  */
21772e58c0f8SDavid Greenman #if 0
217865d0bc13SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
217965d0bc13SPoul-Henning Kamp 	0, 0, sysctl_vnode, "S,vnode", "");
21802e58c0f8SDavid Greenman #endif
21814b2af45fSPoul-Henning Kamp 
2182df8bae1dSRodney W. Grimes /*
2183df8bae1dSRodney W. Grimes  * Check to see if a filesystem is mounted on a block device.
2184df8bae1dSRodney W. Grimes  */
2185df8bae1dSRodney W. Grimes int
2186df8bae1dSRodney W. Grimes vfs_mountedon(vp)
2187996c772fSJohn Dyson 	struct vnode *vp;
2188df8bae1dSRodney W. Grimes {
2189df8bae1dSRodney W. Grimes 
2190b1897c19SJulian Elischer 	if (vp->v_specmountpoint != NULL)
2191df8bae1dSRodney W. Grimes 		return (EBUSY);
2192dbafb366SPoul-Henning Kamp 	return (0);
2193996c772fSJohn Dyson }
2194996c772fSJohn Dyson 
2195996c772fSJohn Dyson /*
2196996c772fSJohn Dyson  * Unmount all filesystems. The list is traversed in reverse order
21977c1557c4SBruce Evans  * of mounting to avoid dependencies.
2198996c772fSJohn Dyson  */
2199996c772fSJohn Dyson void
2200996c772fSJohn Dyson vfs_unmountall()
2201996c772fSJohn Dyson {
22020429e37aSPoul-Henning Kamp 	struct mount *mp;
2203cb87a87cSTor Egge 	struct proc *p;
2204996c772fSJohn Dyson 	int error;
2205996c772fSJohn Dyson 
2206cb87a87cSTor Egge 	if (curproc != NULL)
2207cb87a87cSTor Egge 		p = curproc;
2208cb87a87cSTor Egge 	else
2209cb87a87cSTor Egge 		p = initproc;	/* XXX XXX should this be proc0? */
22107c1557c4SBruce Evans 	/*
22117c1557c4SBruce Evans 	 * Since this only runs when rebooting, it is not interlocked.
22127c1557c4SBruce Evans 	 */
22130429e37aSPoul-Henning Kamp 	while(!TAILQ_EMPTY(&mountlist)) {
22140429e37aSPoul-Henning Kamp 		mp = TAILQ_LAST(&mountlist, mntlist);
22157c1557c4SBruce Evans 		error = dounmount(mp, MNT_FORCE, p);
2216996c772fSJohn Dyson 		if (error) {
22170429e37aSPoul-Henning Kamp 			TAILQ_REMOVE(&mountlist, mp, mnt_list);
22187c1557c4SBruce Evans 			printf("unmount of %s failed (",
22197c1557c4SBruce Evans 			    mp->mnt_stat.f_mntonname);
2220996c772fSJohn Dyson 			if (error == EBUSY)
2221996c772fSJohn Dyson 				printf("BUSY)\n");
2222996c772fSJohn Dyson 			else
2223996c772fSJohn Dyson 				printf("%d)\n", error);
22240429e37aSPoul-Henning Kamp 		} else {
22250429e37aSPoul-Henning Kamp 			/* The unmount has removed mp from the mountlist */
2226996c772fSJohn Dyson 		}
2227996c772fSJohn Dyson 	}
2228df8bae1dSRodney W. Grimes }
2229df8bae1dSRodney W. Grimes 
2230df8bae1dSRodney W. Grimes /*
2231df8bae1dSRodney W. Grimes  * Build hash lists of net addresses and hang them off the mount point.
2232df8bae1dSRodney W. Grimes  * Called by ufs_mount() to set up the lists of export addresses.
2233df8bae1dSRodney W. Grimes  */
2234df8bae1dSRodney W. Grimes static int
2235514ede09SBruce Evans vfs_hang_addrlist(mp, nep, argp)
2236514ede09SBruce Evans 	struct mount *mp;
2237514ede09SBruce Evans 	struct netexport *nep;
2238514ede09SBruce Evans 	struct export_args *argp;
2239df8bae1dSRodney W. Grimes {
2240df8bae1dSRodney W. Grimes 	register struct netcred *np;
2241df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2242df8bae1dSRodney W. Grimes 	register int i;
2243df8bae1dSRodney W. Grimes 	struct radix_node *rn;
2244df8bae1dSRodney W. Grimes 	struct sockaddr *saddr, *smask = 0;
2245df8bae1dSRodney W. Grimes 	struct domain *dom;
2246df8bae1dSRodney W. Grimes 	int error;
2247df8bae1dSRodney W. Grimes 
2248df8bae1dSRodney W. Grimes 	if (argp->ex_addrlen == 0) {
2249df8bae1dSRodney W. Grimes 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2250df8bae1dSRodney W. Grimes 			return (EPERM);
2251df8bae1dSRodney W. Grimes 		np = &nep->ne_defexported;
2252df8bae1dSRodney W. Grimes 		np->netc_exflags = argp->ex_flags;
2253df8bae1dSRodney W. Grimes 		np->netc_anon = argp->ex_anon;
2254df8bae1dSRodney W. Grimes 		np->netc_anon.cr_ref = 1;
2255df8bae1dSRodney W. Grimes 		mp->mnt_flag |= MNT_DEFEXPORTED;
2256df8bae1dSRodney W. Grimes 		return (0);
2257df8bae1dSRodney W. Grimes 	}
2258df8bae1dSRodney W. Grimes 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2259df8bae1dSRodney W. Grimes 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
2260df8bae1dSRodney W. Grimes 	bzero((caddr_t) np, i);
2261df8bae1dSRodney W. Grimes 	saddr = (struct sockaddr *) (np + 1);
2262bb56ec4aSPoul-Henning Kamp 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
2263df8bae1dSRodney W. Grimes 		goto out;
2264df8bae1dSRodney W. Grimes 	if (saddr->sa_len > argp->ex_addrlen)
2265df8bae1dSRodney W. Grimes 		saddr->sa_len = argp->ex_addrlen;
2266df8bae1dSRodney W. Grimes 	if (argp->ex_masklen) {
2267df8bae1dSRodney W. Grimes 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
22685f61c81dSPeter Wemm 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
2269df8bae1dSRodney W. Grimes 		if (error)
2270df8bae1dSRodney W. Grimes 			goto out;
2271df8bae1dSRodney W. Grimes 		if (smask->sa_len > argp->ex_masklen)
2272df8bae1dSRodney W. Grimes 			smask->sa_len = argp->ex_masklen;
2273df8bae1dSRodney W. Grimes 	}
2274df8bae1dSRodney W. Grimes 	i = saddr->sa_family;
2275df8bae1dSRodney W. Grimes 	if ((rnh = nep->ne_rtable[i]) == 0) {
2276df8bae1dSRodney W. Grimes 		/*
22770d94caffSDavid Greenman 		 * Seems silly to initialize every AF when most are not used,
22780d94caffSDavid Greenman 		 * do so on demand here
2279df8bae1dSRodney W. Grimes 		 */
2280df8bae1dSRodney W. Grimes 		for (dom = domains; dom; dom = dom->dom_next)
2281df8bae1dSRodney W. Grimes 			if (dom->dom_family == i && dom->dom_rtattach) {
2282df8bae1dSRodney W. Grimes 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
2283df8bae1dSRodney W. Grimes 				    dom->dom_rtoffset);
2284df8bae1dSRodney W. Grimes 				break;
2285df8bae1dSRodney W. Grimes 			}
2286df8bae1dSRodney W. Grimes 		if ((rnh = nep->ne_rtable[i]) == 0) {
2287df8bae1dSRodney W. Grimes 			error = ENOBUFS;
2288df8bae1dSRodney W. Grimes 			goto out;
2289df8bae1dSRodney W. Grimes 		}
2290df8bae1dSRodney W. Grimes 	}
2291df8bae1dSRodney W. Grimes 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
2292df8bae1dSRodney W. Grimes 	    np->netc_rnodes);
2293df8bae1dSRodney W. Grimes 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2294df8bae1dSRodney W. Grimes 		error = EPERM;
2295df8bae1dSRodney W. Grimes 		goto out;
2296df8bae1dSRodney W. Grimes 	}
2297df8bae1dSRodney W. Grimes 	np->netc_exflags = argp->ex_flags;
2298df8bae1dSRodney W. Grimes 	np->netc_anon = argp->ex_anon;
2299df8bae1dSRodney W. Grimes 	np->netc_anon.cr_ref = 1;
2300df8bae1dSRodney W. Grimes 	return (0);
2301df8bae1dSRodney W. Grimes out:
2302df8bae1dSRodney W. Grimes 	free(np, M_NETADDR);
2303df8bae1dSRodney W. Grimes 	return (error);
2304df8bae1dSRodney W. Grimes }
2305df8bae1dSRodney W. Grimes 
2306df8bae1dSRodney W. Grimes /* ARGSUSED */
2307df8bae1dSRodney W. Grimes static int
2308514ede09SBruce Evans vfs_free_netcred(rn, w)
2309514ede09SBruce Evans 	struct radix_node *rn;
2310514ede09SBruce Evans 	void *w;
2311df8bae1dSRodney W. Grimes {
2312df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2313df8bae1dSRodney W. Grimes 
2314df8bae1dSRodney W. Grimes 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2315df8bae1dSRodney W. Grimes 	free((caddr_t) rn, M_NETADDR);
2316df8bae1dSRodney W. Grimes 	return (0);
2317df8bae1dSRodney W. Grimes }
2318df8bae1dSRodney W. Grimes 
2319df8bae1dSRodney W. Grimes /*
2320df8bae1dSRodney W. Grimes  * Free the net address hash lists that are hanging off the mount points.
2321df8bae1dSRodney W. Grimes  */
2322df8bae1dSRodney W. Grimes static void
2323514ede09SBruce Evans vfs_free_addrlist(nep)
2324514ede09SBruce Evans 	struct netexport *nep;
2325df8bae1dSRodney W. Grimes {
2326df8bae1dSRodney W. Grimes 	register int i;
2327df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2328df8bae1dSRodney W. Grimes 
2329df8bae1dSRodney W. Grimes 	for (i = 0; i <= AF_MAX; i++)
2330bb56ec4aSPoul-Henning Kamp 		if ((rnh = nep->ne_rtable[i])) {
2331df8bae1dSRodney W. Grimes 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2332df8bae1dSRodney W. Grimes 			    (caddr_t) rnh);
2333df8bae1dSRodney W. Grimes 			free((caddr_t) rnh, M_RTABLE);
2334df8bae1dSRodney W. Grimes 			nep->ne_rtable[i] = 0;
2335df8bae1dSRodney W. Grimes 		}
2336df8bae1dSRodney W. Grimes }
2337df8bae1dSRodney W. Grimes 
233821a90397SAlfred Perlstein /*
233921a90397SAlfred Perlstein  * High level function to manipulate export options on a mount point
234021a90397SAlfred Perlstein  * and the passed in netexport.
234121a90397SAlfred Perlstein  * Struct export_args *argp is the variable used to twiddle options,
234221a90397SAlfred Perlstein  * the structure is described in sys/mount.h
234321a90397SAlfred Perlstein  */
2344df8bae1dSRodney W. Grimes int
2345df8bae1dSRodney W. Grimes vfs_export(mp, nep, argp)
2346df8bae1dSRodney W. Grimes 	struct mount *mp;
2347df8bae1dSRodney W. Grimes 	struct netexport *nep;
2348df8bae1dSRodney W. Grimes 	struct export_args *argp;
2349df8bae1dSRodney W. Grimes {
2350df8bae1dSRodney W. Grimes 	int error;
2351df8bae1dSRodney W. Grimes 
2352df8bae1dSRodney W. Grimes 	if (argp->ex_flags & MNT_DELEXPORT) {
2353f6b4c285SDoug Rabson 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2354f6b4c285SDoug Rabson 			vfs_setpublicfs(NULL, NULL, NULL);
2355f6b4c285SDoug Rabson 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2356f6b4c285SDoug Rabson 		}
2357df8bae1dSRodney W. Grimes 		vfs_free_addrlist(nep);
2358df8bae1dSRodney W. Grimes 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2359df8bae1dSRodney W. Grimes 	}
2360df8bae1dSRodney W. Grimes 	if (argp->ex_flags & MNT_EXPORTED) {
2361f6b4c285SDoug Rabson 		if (argp->ex_flags & MNT_EXPUBLIC) {
2362f6b4c285SDoug Rabson 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2363f6b4c285SDoug Rabson 				return (error);
2364f6b4c285SDoug Rabson 			mp->mnt_flag |= MNT_EXPUBLIC;
2365f6b4c285SDoug Rabson 		}
2366bb56ec4aSPoul-Henning Kamp 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2367df8bae1dSRodney W. Grimes 			return (error);
2368df8bae1dSRodney W. Grimes 		mp->mnt_flag |= MNT_EXPORTED;
2369df8bae1dSRodney W. Grimes 	}
2370df8bae1dSRodney W. Grimes 	return (0);
2371df8bae1dSRodney W. Grimes }
2372df8bae1dSRodney W. Grimes 
2373f6b4c285SDoug Rabson /*
2374f6b4c285SDoug Rabson  * Set the publicly exported filesystem (WebNFS). Currently, only
2375f6b4c285SDoug Rabson  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2376f6b4c285SDoug Rabson  */
2377f6b4c285SDoug Rabson int
2378f6b4c285SDoug Rabson vfs_setpublicfs(mp, nep, argp)
2379f6b4c285SDoug Rabson 	struct mount *mp;
2380f6b4c285SDoug Rabson 	struct netexport *nep;
2381f6b4c285SDoug Rabson 	struct export_args *argp;
2382f6b4c285SDoug Rabson {
2383f6b4c285SDoug Rabson 	int error;
2384f6b4c285SDoug Rabson 	struct vnode *rvp;
2385f6b4c285SDoug Rabson 	char *cp;
2386f6b4c285SDoug Rabson 
2387f6b4c285SDoug Rabson 	/*
2388f6b4c285SDoug Rabson 	 * mp == NULL -> invalidate the current info, the FS is
2389f6b4c285SDoug Rabson 	 * no longer exported. May be called from either vfs_export
2390f6b4c285SDoug Rabson 	 * or unmount, so check if it hasn't already been done.
2391f6b4c285SDoug Rabson 	 */
2392f6b4c285SDoug Rabson 	if (mp == NULL) {
2393f6b4c285SDoug Rabson 		if (nfs_pub.np_valid) {
2394f6b4c285SDoug Rabson 			nfs_pub.np_valid = 0;
2395f6b4c285SDoug Rabson 			if (nfs_pub.np_index != NULL) {
2396f6b4c285SDoug Rabson 				FREE(nfs_pub.np_index, M_TEMP);
2397f6b4c285SDoug Rabson 				nfs_pub.np_index = NULL;
2398f6b4c285SDoug Rabson 			}
2399f6b4c285SDoug Rabson 		}
2400f6b4c285SDoug Rabson 		return (0);
2401f6b4c285SDoug Rabson 	}
2402f6b4c285SDoug Rabson 
2403f6b4c285SDoug Rabson 	/*
2404f6b4c285SDoug Rabson 	 * Only one allowed at a time.
2405f6b4c285SDoug Rabson 	 */
2406f6b4c285SDoug Rabson 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2407f6b4c285SDoug Rabson 		return (EBUSY);
2408f6b4c285SDoug Rabson 
2409f6b4c285SDoug Rabson 	/*
2410f6b4c285SDoug Rabson 	 * Get real filehandle for root of exported FS.
2411f6b4c285SDoug Rabson 	 */
2412f6b4c285SDoug Rabson 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2413f6b4c285SDoug Rabson 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2414f6b4c285SDoug Rabson 
2415f6b4c285SDoug Rabson 	if ((error = VFS_ROOT(mp, &rvp)))
2416f6b4c285SDoug Rabson 		return (error);
2417f6b4c285SDoug Rabson 
2418f6b4c285SDoug Rabson 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2419f6b4c285SDoug Rabson 		return (error);
2420f6b4c285SDoug Rabson 
2421f6b4c285SDoug Rabson 	vput(rvp);
2422f6b4c285SDoug Rabson 
2423f6b4c285SDoug Rabson 	/*
2424f6b4c285SDoug Rabson 	 * If an indexfile was specified, pull it in.
2425f6b4c285SDoug Rabson 	 */
2426f6b4c285SDoug Rabson 	if (argp->ex_indexfile != NULL) {
2427f6b4c285SDoug Rabson 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2428f6b4c285SDoug Rabson 		    M_WAITOK);
2429f6b4c285SDoug Rabson 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2430f6b4c285SDoug Rabson 		    MAXNAMLEN, (size_t *)0);
2431f6b4c285SDoug Rabson 		if (!error) {
2432f6b4c285SDoug Rabson 			/*
2433f6b4c285SDoug Rabson 			 * Check for illegal filenames.
2434f6b4c285SDoug Rabson 			 */
2435f6b4c285SDoug Rabson 			for (cp = nfs_pub.np_index; *cp; cp++) {
2436f6b4c285SDoug Rabson 				if (*cp == '/') {
2437f6b4c285SDoug Rabson 					error = EINVAL;
2438f6b4c285SDoug Rabson 					break;
2439f6b4c285SDoug Rabson 				}
2440f6b4c285SDoug Rabson 			}
2441f6b4c285SDoug Rabson 		}
2442f6b4c285SDoug Rabson 		if (error) {
2443f6b4c285SDoug Rabson 			FREE(nfs_pub.np_index, M_TEMP);
2444f6b4c285SDoug Rabson 			return (error);
2445f6b4c285SDoug Rabson 		}
2446f6b4c285SDoug Rabson 	}
2447f6b4c285SDoug Rabson 
2448f6b4c285SDoug Rabson 	nfs_pub.np_mount = mp;
2449f6b4c285SDoug Rabson 	nfs_pub.np_valid = 1;
2450f6b4c285SDoug Rabson 	return (0);
2451f6b4c285SDoug Rabson }
2452f6b4c285SDoug Rabson 
245321a90397SAlfred Perlstein /*
245421a90397SAlfred Perlstein  * Used by the filesystems to determine if a given network address
245521a90397SAlfred Perlstein  * (passed in 'nam') is present in thier exports list, returns a pointer
245621a90397SAlfred Perlstein  * to struct netcred so that the filesystem can examine it for
245721a90397SAlfred Perlstein  * access rights (read/write/etc).
245821a90397SAlfred Perlstein  */
2459df8bae1dSRodney W. Grimes struct netcred *
2460df8bae1dSRodney W. Grimes vfs_export_lookup(mp, nep, nam)
2461df8bae1dSRodney W. Grimes 	register struct mount *mp;
2462df8bae1dSRodney W. Grimes 	struct netexport *nep;
246357bf258eSGarrett Wollman 	struct sockaddr *nam;
2464df8bae1dSRodney W. Grimes {
2465df8bae1dSRodney W. Grimes 	register struct netcred *np;
2466df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2467df8bae1dSRodney W. Grimes 	struct sockaddr *saddr;
2468df8bae1dSRodney W. Grimes 
2469df8bae1dSRodney W. Grimes 	np = NULL;
2470df8bae1dSRodney W. Grimes 	if (mp->mnt_flag & MNT_EXPORTED) {
2471df8bae1dSRodney W. Grimes 		/*
2472df8bae1dSRodney W. Grimes 		 * Lookup in the export list first.
2473df8bae1dSRodney W. Grimes 		 */
2474df8bae1dSRodney W. Grimes 		if (nam != NULL) {
247557bf258eSGarrett Wollman 			saddr = nam;
2476df8bae1dSRodney W. Grimes 			rnh = nep->ne_rtable[saddr->sa_family];
2477df8bae1dSRodney W. Grimes 			if (rnh != NULL) {
2478df8bae1dSRodney W. Grimes 				np = (struct netcred *)
2479df8bae1dSRodney W. Grimes 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2480df8bae1dSRodney W. Grimes 							      rnh);
2481df8bae1dSRodney W. Grimes 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2482df8bae1dSRodney W. Grimes 					np = NULL;
2483df8bae1dSRodney W. Grimes 			}
2484df8bae1dSRodney W. Grimes 		}
2485df8bae1dSRodney W. Grimes 		/*
2486df8bae1dSRodney W. Grimes 		 * If no address match, use the default if it exists.
2487df8bae1dSRodney W. Grimes 		 */
2488df8bae1dSRodney W. Grimes 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2489df8bae1dSRodney W. Grimes 			np = &nep->ne_defexported;
2490df8bae1dSRodney W. Grimes 	}
2491df8bae1dSRodney W. Grimes 	return (np);
2492df8bae1dSRodney W. Grimes }
249361f5d510SDavid Greenman 
249461f5d510SDavid Greenman /*
249561f5d510SDavid Greenman  * perform msync on all vnodes under a mount point
249661f5d510SDavid Greenman  * the mount point must be locked.
249761f5d510SDavid Greenman  */
249861f5d510SDavid Greenman void
249961f5d510SDavid Greenman vfs_msync(struct mount *mp, int flags) {
2500a316d390SJohn Dyson 	struct vnode *vp, *nvp;
250137b8ccd3SPeter Wemm 	struct vm_object *obj;
250295e5e988SJohn Dyson 	int anyio, tries;
250395e5e988SJohn Dyson 
250495e5e988SJohn Dyson 	tries = 5;
250561f5d510SDavid Greenman loop:
250695e5e988SJohn Dyson 	anyio = 0;
25071b727751SPoul-Henning Kamp 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
250861f5d510SDavid Greenman 
25091b727751SPoul-Henning Kamp 		nvp = LIST_NEXT(vp, v_mntvnodes);
251095e5e988SJohn Dyson 
251195e5e988SJohn Dyson 		if (vp->v_mount != mp) {
251295e5e988SJohn Dyson 			goto loop;
251395e5e988SJohn Dyson 		}
251495e5e988SJohn Dyson 
251537b8ccd3SPeter Wemm 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
251637b8ccd3SPeter Wemm 			continue;
251737b8ccd3SPeter Wemm 
251837b8ccd3SPeter Wemm 		if (flags != MNT_WAIT) {
25199ff5ce6bSBoris Popov 			if (VOP_GETVOBJECT(vp, &obj) != 0 ||
25209ff5ce6bSBoris Popov 			    (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
252137b8ccd3SPeter Wemm 				continue;
25226bdfe06aSEivind Eklund 			if (VOP_ISLOCKED(vp, NULL))
252361f5d510SDavid Greenman 				continue;
252495e5e988SJohn Dyson 		}
252595e5e988SJohn Dyson 
252695e5e988SJohn Dyson 		simple_lock(&vp->v_interlock);
25279ff5ce6bSBoris Popov 		if (VOP_GETVOBJECT(vp, &obj) == 0 &&
25289ff5ce6bSBoris Popov 		    (obj->flags & OBJ_MIGHTBEDIRTY)) {
252995e5e988SJohn Dyson 			if (!vget(vp,
253095e5e988SJohn Dyson 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
25319ff5ce6bSBoris Popov 				if (VOP_GETVOBJECT(vp, &obj) == 0) {
25329ff5ce6bSBoris Popov 					vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
253395e5e988SJohn Dyson 					anyio = 1;
253495e5e988SJohn Dyson 				}
253595e5e988SJohn Dyson 				vput(vp);
253695e5e988SJohn Dyson 			}
253795e5e988SJohn Dyson 		} else {
253895e5e988SJohn Dyson 			simple_unlock(&vp->v_interlock);
253961f5d510SDavid Greenman 		}
254061f5d510SDavid Greenman 	}
254195e5e988SJohn Dyson 	if (anyio && (--tries > 0))
254295e5e988SJohn Dyson 		goto loop;
254361f5d510SDavid Greenman }
25446476c0d2SJohn Dyson 
25456476c0d2SJohn Dyson /*
25466476c0d2SJohn Dyson  * Create the VM object needed for VMIO and mmap support.  This
25476476c0d2SJohn Dyson  * is done for all VREG files in the system.  Some filesystems might
25486476c0d2SJohn Dyson  * afford the additional metadata buffering capability of the
25496476c0d2SJohn Dyson  * VMIO code by making the device node be VMIO mode also.
255095e5e988SJohn Dyson  *
2551fb116777SEivind Eklund  * vp must be locked when vfs_object_create is called.
25526476c0d2SJohn Dyson  */
25536476c0d2SJohn Dyson int
2554fb116777SEivind Eklund vfs_object_create(vp, p, cred)
25556476c0d2SJohn Dyson 	struct vnode *vp;
25566476c0d2SJohn Dyson 	struct proc *p;
25576476c0d2SJohn Dyson 	struct ucred *cred;
25586476c0d2SJohn Dyson {
25599ff5ce6bSBoris Popov 	return (VOP_CREATEVOBJECT(vp, cred, p));
25606476c0d2SJohn Dyson }
2561b15a966eSPoul-Henning Kamp 
2562c904bbbdSKirk McKusick void
2563a051452aSPoul-Henning Kamp vfree(vp)
2564b15a966eSPoul-Henning Kamp 	struct vnode *vp;
2565b15a966eSPoul-Henning Kamp {
2566925a3a41SJohn Dyson 	int s;
2567925a3a41SJohn Dyson 
2568925a3a41SJohn Dyson 	s = splbio();
2569a051452aSPoul-Henning Kamp 	simple_lock(&vnode_free_list_slock);
2570c904bbbdSKirk McKusick 	KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
2571a051452aSPoul-Henning Kamp 	if (vp->v_flag & VAGE) {
2572a051452aSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2573a051452aSPoul-Henning Kamp 	} else {
2574b15a966eSPoul-Henning Kamp 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
25758670684aSPoul-Henning Kamp 	}
2576a051452aSPoul-Henning Kamp 	freevnodes++;
2577b15a966eSPoul-Henning Kamp 	simple_unlock(&vnode_free_list_slock);
2578a051452aSPoul-Henning Kamp 	vp->v_flag &= ~VAGE;
2579a051452aSPoul-Henning Kamp 	vp->v_flag |= VFREE;
2580925a3a41SJohn Dyson 	splx(s);
2581b15a966eSPoul-Henning Kamp }
2582a051452aSPoul-Henning Kamp 
258347221757SJohn Dyson void
2584a051452aSPoul-Henning Kamp vbusy(vp)
2585a051452aSPoul-Henning Kamp 	struct vnode *vp;
2586a051452aSPoul-Henning Kamp {
2587925a3a41SJohn Dyson 	int s;
2588925a3a41SJohn Dyson 
2589925a3a41SJohn Dyson 	s = splbio();
2590a051452aSPoul-Henning Kamp 	simple_lock(&vnode_free_list_slock);
2591c904bbbdSKirk McKusick 	KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
2592a051452aSPoul-Henning Kamp 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2593a051452aSPoul-Henning Kamp 	freevnodes--;
2594a051452aSPoul-Henning Kamp 	simple_unlock(&vnode_free_list_slock);
259564d3c7e3SJohn Dyson 	vp->v_flag &= ~(VFREE|VAGE);
2596925a3a41SJohn Dyson 	splx(s);
2597b15a966eSPoul-Henning Kamp }
25981cbbd625SGarrett Wollman 
25991cbbd625SGarrett Wollman /*
26001cbbd625SGarrett Wollman  * Record a process's interest in events which might happen to
26011cbbd625SGarrett Wollman  * a vnode.  Because poll uses the historic select-style interface
26021cbbd625SGarrett Wollman  * internally, this routine serves as both the ``check for any
26031cbbd625SGarrett Wollman  * pending events'' and the ``record my interest in future events''
26041cbbd625SGarrett Wollman  * functions.  (These are done together, while the lock is held,
26051cbbd625SGarrett Wollman  * to avoid race conditions.)
26061cbbd625SGarrett Wollman  */
26071cbbd625SGarrett Wollman int
26081cbbd625SGarrett Wollman vn_pollrecord(vp, p, events)
26091cbbd625SGarrett Wollman 	struct vnode *vp;
26101cbbd625SGarrett Wollman 	struct proc *p;
26111cbbd625SGarrett Wollman 	short events;
26121cbbd625SGarrett Wollman {
26131cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
26141cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_revents & events) {
26151cbbd625SGarrett Wollman 		/*
26161cbbd625SGarrett Wollman 		 * This leaves events we are not interested
26171cbbd625SGarrett Wollman 		 * in available for the other process which
26181cbbd625SGarrett Wollman 		 * which presumably had requested them
26191cbbd625SGarrett Wollman 		 * (otherwise they would never have been
26201cbbd625SGarrett Wollman 		 * recorded).
26211cbbd625SGarrett Wollman 		 */
26221cbbd625SGarrett Wollman 		events &= vp->v_pollinfo.vpi_revents;
26231cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_revents &= ~events;
26241cbbd625SGarrett Wollman 
26251cbbd625SGarrett Wollman 		simple_unlock(&vp->v_pollinfo.vpi_lock);
26261cbbd625SGarrett Wollman 		return events;
26271cbbd625SGarrett Wollman 	}
26281cbbd625SGarrett Wollman 	vp->v_pollinfo.vpi_events |= events;
26291cbbd625SGarrett Wollman 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
26301cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
26311cbbd625SGarrett Wollman 	return 0;
26321cbbd625SGarrett Wollman }
26331cbbd625SGarrett Wollman 
26341cbbd625SGarrett Wollman /*
26351cbbd625SGarrett Wollman  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
26361cbbd625SGarrett Wollman  * it is possible for us to miss an event due to race conditions, but
26371cbbd625SGarrett Wollman  * that condition is expected to be rare, so for the moment it is the
26381cbbd625SGarrett Wollman  * preferred interface.
26391cbbd625SGarrett Wollman  */
26401cbbd625SGarrett Wollman void
26411cbbd625SGarrett Wollman vn_pollevent(vp, events)
26421cbbd625SGarrett Wollman 	struct vnode *vp;
26431cbbd625SGarrett Wollman 	short events;
26441cbbd625SGarrett Wollman {
26451cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
26461cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_events & events) {
26471cbbd625SGarrett Wollman 		/*
26481cbbd625SGarrett Wollman 		 * We clear vpi_events so that we don't
26491cbbd625SGarrett Wollman 		 * call selwakeup() twice if two events are
26501cbbd625SGarrett Wollman 		 * posted before the polling process(es) is
26511cbbd625SGarrett Wollman 		 * awakened.  This also ensures that we take at
26521cbbd625SGarrett Wollman 		 * most one selwakeup() if the polling process
26531cbbd625SGarrett Wollman 		 * is no longer interested.  However, it does
26541cbbd625SGarrett Wollman 		 * mean that only one event can be noticed at
26551cbbd625SGarrett Wollman 		 * a time.  (Perhaps we should only clear those
26561cbbd625SGarrett Wollman 		 * event bits which we note?) XXX
26571cbbd625SGarrett Wollman 		 */
26581cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
26591cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_revents |= events;
26601cbbd625SGarrett Wollman 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
26611cbbd625SGarrett Wollman 	}
26621cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
26631cbbd625SGarrett Wollman }
26641cbbd625SGarrett Wollman 
26651cbbd625SGarrett Wollman /*
26661cbbd625SGarrett Wollman  * Wake up anyone polling on vp because it is being revoked.
26671cbbd625SGarrett Wollman  * This depends on dead_poll() returning POLLHUP for correct
26681cbbd625SGarrett Wollman  * behavior.
26691cbbd625SGarrett Wollman  */
26701cbbd625SGarrett Wollman void
26711cbbd625SGarrett Wollman vn_pollgone(vp)
26721cbbd625SGarrett Wollman 	struct vnode *vp;
26731cbbd625SGarrett Wollman {
26741cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
26751cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_events) {
26761cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_events = 0;
26771cbbd625SGarrett Wollman 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
26781cbbd625SGarrett Wollman 	}
26791cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
26801cbbd625SGarrett Wollman }
2681b1897c19SJulian Elischer 
2682b1897c19SJulian Elischer 
2683b1897c19SJulian Elischer 
2684b1897c19SJulian Elischer /*
2685b1897c19SJulian Elischer  * Routine to create and manage a filesystem syncer vnode.
2686b1897c19SJulian Elischer  */
2687b1897c19SJulian Elischer #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
26880df45b5aSEivind Eklund static int	sync_fsync __P((struct  vop_fsync_args *));
26890df45b5aSEivind Eklund static int	sync_inactive __P((struct  vop_inactive_args *));
26900df45b5aSEivind Eklund static int	sync_reclaim  __P((struct  vop_reclaim_args *));
2691b1897c19SJulian Elischer #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
2692b1897c19SJulian Elischer #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
26930df45b5aSEivind Eklund static int	sync_print __P((struct vop_print_args *));
2694b1897c19SJulian Elischer #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
2695b1897c19SJulian Elischer 
2696db878ba4SEivind Eklund static vop_t **sync_vnodeop_p;
2697db878ba4SEivind Eklund static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
2698b1897c19SJulian Elischer 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
2699b1897c19SJulian Elischer 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
2700b1897c19SJulian Elischer 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
2701b1897c19SJulian Elischer 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
2702b1897c19SJulian Elischer 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
2703b1897c19SJulian Elischer 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
2704b1897c19SJulian Elischer 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
2705b1897c19SJulian Elischer 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
2706b1897c19SJulian Elischer 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
2707b1897c19SJulian Elischer 	{ NULL, NULL }
2708b1897c19SJulian Elischer };
2709db878ba4SEivind Eklund static struct vnodeopv_desc sync_vnodeop_opv_desc =
2710b1897c19SJulian Elischer 	{ &sync_vnodeop_p, sync_vnodeop_entries };
2711b1897c19SJulian Elischer 
2712b1897c19SJulian Elischer VNODEOP_SET(sync_vnodeop_opv_desc);
2713b1897c19SJulian Elischer 
2714b1897c19SJulian Elischer /*
2715b1897c19SJulian Elischer  * Create a new filesystem syncer vnode for the specified mount point.
2716b1897c19SJulian Elischer  */
2717b1897c19SJulian Elischer int
2718b1897c19SJulian Elischer vfs_allocate_syncvnode(mp)
2719b1897c19SJulian Elischer 	struct mount *mp;
2720b1897c19SJulian Elischer {
2721b1897c19SJulian Elischer 	struct vnode *vp;
2722b1897c19SJulian Elischer 	static long start, incr, next;
2723b1897c19SJulian Elischer 	int error;
2724b1897c19SJulian Elischer 
2725b1897c19SJulian Elischer 	/* Allocate a new vnode */
2726b1897c19SJulian Elischer 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
2727b1897c19SJulian Elischer 		mp->mnt_syncer = NULL;
2728b1897c19SJulian Elischer 		return (error);
2729b1897c19SJulian Elischer 	}
2730b1897c19SJulian Elischer 	vp->v_type = VNON;
2731b1897c19SJulian Elischer 	/*
2732b1897c19SJulian Elischer 	 * Place the vnode onto the syncer worklist. We attempt to
2733b1897c19SJulian Elischer 	 * scatter them about on the list so that they will go off
2734b1897c19SJulian Elischer 	 * at evenly distributed times even if all the filesystems
2735b1897c19SJulian Elischer 	 * are mounted at once.
2736b1897c19SJulian Elischer 	 */
2737b1897c19SJulian Elischer 	next += incr;
2738b1897c19SJulian Elischer 	if (next == 0 || next > syncer_maxdelay) {
2739b1897c19SJulian Elischer 		start /= 2;
2740b1897c19SJulian Elischer 		incr /= 2;
2741b1897c19SJulian Elischer 		if (start == 0) {
2742b1897c19SJulian Elischer 			start = syncer_maxdelay / 2;
2743b1897c19SJulian Elischer 			incr = syncer_maxdelay;
2744b1897c19SJulian Elischer 		}
2745b1897c19SJulian Elischer 		next = start;
2746b1897c19SJulian Elischer 	}
2747b1897c19SJulian Elischer 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
2748b1897c19SJulian Elischer 	mp->mnt_syncer = vp;
2749b1897c19SJulian Elischer 	return (0);
2750b1897c19SJulian Elischer }
2751b1897c19SJulian Elischer 
2752b1897c19SJulian Elischer /*
2753b1897c19SJulian Elischer  * Do a lazy sync of the filesystem.
2754b1897c19SJulian Elischer  */
2755db878ba4SEivind Eklund static int
2756b1897c19SJulian Elischer sync_fsync(ap)
2757b1897c19SJulian Elischer 	struct vop_fsync_args /* {
2758b1897c19SJulian Elischer 		struct vnode *a_vp;
2759b1897c19SJulian Elischer 		struct ucred *a_cred;
2760b1897c19SJulian Elischer 		int a_waitfor;
2761b1897c19SJulian Elischer 		struct proc *a_p;
2762b1897c19SJulian Elischer 	} */ *ap;
2763b1897c19SJulian Elischer {
2764b1897c19SJulian Elischer 	struct vnode *syncvp = ap->a_vp;
2765b1897c19SJulian Elischer 	struct mount *mp = syncvp->v_mount;
2766b1897c19SJulian Elischer 	struct proc *p = ap->a_p;
2767b1897c19SJulian Elischer 	int asyncflag;
2768b1897c19SJulian Elischer 
2769b1897c19SJulian Elischer 	/*
2770b1897c19SJulian Elischer 	 * We only need to do something if this is a lazy evaluation.
2771b1897c19SJulian Elischer 	 */
2772b1897c19SJulian Elischer 	if (ap->a_waitfor != MNT_LAZY)
2773b1897c19SJulian Elischer 		return (0);
2774b1897c19SJulian Elischer 
2775b1897c19SJulian Elischer 	/*
2776b1897c19SJulian Elischer 	 * Move ourselves to the back of the sync list.
2777b1897c19SJulian Elischer 	 */
2778b1897c19SJulian Elischer 	vn_syncer_add_to_worklist(syncvp, syncdelay);
2779b1897c19SJulian Elischer 
2780b1897c19SJulian Elischer 	/*
2781b1897c19SJulian Elischer 	 * Walk the list of vnodes pushing all that are dirty and
2782b1897c19SJulian Elischer 	 * not already on the sync list.
2783b1897c19SJulian Elischer 	 */
2784b1897c19SJulian Elischer 	simple_lock(&mountlist_slock);
278571033a8cSTor Egge 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
278671033a8cSTor Egge 		simple_unlock(&mountlist_slock);
2787b1897c19SJulian Elischer 		return (0);
278871033a8cSTor Egge 	}
2789f2a2857bSKirk McKusick 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
2790f2a2857bSKirk McKusick 		vfs_unbusy(mp, p);
2791f2a2857bSKirk McKusick 		simple_unlock(&mountlist_slock);
2792f2a2857bSKirk McKusick 		return (0);
2793f2a2857bSKirk McKusick 	}
2794b1897c19SJulian Elischer 	asyncflag = mp->mnt_flag & MNT_ASYNC;
2795b1897c19SJulian Elischer 	mp->mnt_flag &= ~MNT_ASYNC;
2796efdc5523SPeter Wemm 	vfs_msync(mp, MNT_NOWAIT);
2797b1897c19SJulian Elischer 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
2798b1897c19SJulian Elischer 	if (asyncflag)
2799b1897c19SJulian Elischer 		mp->mnt_flag |= MNT_ASYNC;
2800f2a2857bSKirk McKusick 	vn_finished_write(mp);
2801b1897c19SJulian Elischer 	vfs_unbusy(mp, p);
2802b1897c19SJulian Elischer 	return (0);
2803b1897c19SJulian Elischer }
2804b1897c19SJulian Elischer 
2805b1897c19SJulian Elischer /*
2806b1897c19SJulian Elischer  * The syncer vnode is no referenced.
2807b1897c19SJulian Elischer  */
2808db878ba4SEivind Eklund static int
2809b1897c19SJulian Elischer sync_inactive(ap)
2810b1897c19SJulian Elischer 	struct vop_inactive_args /* {
2811b1897c19SJulian Elischer 		struct vnode *a_vp;
2812b1897c19SJulian Elischer 		struct proc *a_p;
2813b1897c19SJulian Elischer 	} */ *ap;
2814b1897c19SJulian Elischer {
2815b1897c19SJulian Elischer 
2816b1897c19SJulian Elischer 	vgone(ap->a_vp);
2817b1897c19SJulian Elischer 	return (0);
2818b1897c19SJulian Elischer }
2819b1897c19SJulian Elischer 
2820b1897c19SJulian Elischer /*
2821b1897c19SJulian Elischer  * The syncer vnode is no longer needed and is being decommissioned.
282242e26d47SMatthew Dillon  *
282342e26d47SMatthew Dillon  * Modifications to the worklist must be protected at splbio().
2824b1897c19SJulian Elischer  */
2825db878ba4SEivind Eklund static int
2826b1897c19SJulian Elischer sync_reclaim(ap)
2827b1897c19SJulian Elischer 	struct vop_reclaim_args /* {
2828b1897c19SJulian Elischer 		struct vnode *a_vp;
2829b1897c19SJulian Elischer 	} */ *ap;
2830b1897c19SJulian Elischer {
2831b1897c19SJulian Elischer 	struct vnode *vp = ap->a_vp;
283242e26d47SMatthew Dillon 	int s;
2833b1897c19SJulian Elischer 
283442e26d47SMatthew Dillon 	s = splbio();
2835b1897c19SJulian Elischer 	vp->v_mount->mnt_syncer = NULL;
2836b1897c19SJulian Elischer 	if (vp->v_flag & VONWORKLST) {
2837b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
2838b1897c19SJulian Elischer 		vp->v_flag &= ~VONWORKLST;
2839b1897c19SJulian Elischer 	}
284042e26d47SMatthew Dillon 	splx(s);
2841b1897c19SJulian Elischer 
2842b1897c19SJulian Elischer 	return (0);
2843b1897c19SJulian Elischer }
2844b1897c19SJulian Elischer 
2845b1897c19SJulian Elischer /*
2846b1897c19SJulian Elischer  * Print out a syncer vnode.
2847b1897c19SJulian Elischer  */
2848db878ba4SEivind Eklund static int
2849b1897c19SJulian Elischer sync_print(ap)
2850b1897c19SJulian Elischer 	struct vop_print_args /* {
2851b1897c19SJulian Elischer 		struct vnode *a_vp;
2852b1897c19SJulian Elischer 	} */ *ap;
2853b1897c19SJulian Elischer {
2854b1897c19SJulian Elischer 	struct vnode *vp = ap->a_vp;
2855b1897c19SJulian Elischer 
2856b1897c19SJulian Elischer 	printf("syncer vnode");
2857b1897c19SJulian Elischer 	if (vp->v_vnlock != NULL)
2858b1897c19SJulian Elischer 		lockmgr_printinfo(vp->v_vnlock);
2859b1897c19SJulian Elischer 	printf("\n");
2860b1897c19SJulian Elischer 	return (0);
2861b1897c19SJulian Elischer }
28626ca54864SPoul-Henning Kamp 
28636ca54864SPoul-Henning Kamp /*
28646ca54864SPoul-Henning Kamp  * extract the dev_t from a VBLK or VCHR
28656ca54864SPoul-Henning Kamp  */
28666ca54864SPoul-Henning Kamp dev_t
28676ca54864SPoul-Henning Kamp vn_todev(vp)
28686ca54864SPoul-Henning Kamp 	struct vnode *vp;
28696ca54864SPoul-Henning Kamp {
28706ca54864SPoul-Henning Kamp 	if (vp->v_type != VBLK && vp->v_type != VCHR)
28716ca54864SPoul-Henning Kamp 		return (NODEV);
28726ca54864SPoul-Henning Kamp 	return (vp->v_rdev);
28736ca54864SPoul-Henning Kamp }
287441d2e3e0SPoul-Henning Kamp 
287541d2e3e0SPoul-Henning Kamp /*
287641d2e3e0SPoul-Henning Kamp  * Check if vnode represents a disk device
287741d2e3e0SPoul-Henning Kamp  */
287841d2e3e0SPoul-Henning Kamp int
2879ba4ad1fcSPoul-Henning Kamp vn_isdisk(vp, errp)
288041d2e3e0SPoul-Henning Kamp 	struct vnode *vp;
2881ba4ad1fcSPoul-Henning Kamp 	int *errp;
288241d2e3e0SPoul-Henning Kamp {
288364dc16dfSPoul-Henning Kamp 	struct cdevsw *cdevsw;
288464dc16dfSPoul-Henning Kamp 
2885ba4ad1fcSPoul-Henning Kamp 	if (vp->v_type != VBLK && vp->v_type != VCHR) {
2886ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2887ba4ad1fcSPoul-Henning Kamp 			*errp = ENOTBLK;
288841d2e3e0SPoul-Henning Kamp 		return (0);
2889ba4ad1fcSPoul-Henning Kamp 	}
2890b081a64aSChris Costello 	if (vp->v_rdev == NULL) {
2891b081a64aSChris Costello 		if (errp != NULL)
2892b081a64aSChris Costello 			*errp = ENXIO;
2893b081a64aSChris Costello 		return (0);
2894b081a64aSChris Costello 	}
289564dc16dfSPoul-Henning Kamp 	cdevsw = devsw(vp->v_rdev);
289664dc16dfSPoul-Henning Kamp 	if (cdevsw == NULL) {
2897ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2898ba4ad1fcSPoul-Henning Kamp 			*errp = ENXIO;
289941d2e3e0SPoul-Henning Kamp 		return (0);
2900ba4ad1fcSPoul-Henning Kamp 	}
290164dc16dfSPoul-Henning Kamp 	if (!(cdevsw->d_flags & D_DISK)) {
2902ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2903ba4ad1fcSPoul-Henning Kamp 			*errp = ENOTBLK;
290441d2e3e0SPoul-Henning Kamp 		return (0);
2905ba4ad1fcSPoul-Henning Kamp 	}
2906ba4ad1fcSPoul-Henning Kamp 	if (errp != NULL)
2907ba4ad1fcSPoul-Henning Kamp 		*errp = 0;
290841d2e3e0SPoul-Henning Kamp 	return (1);
290941d2e3e0SPoul-Henning Kamp }
291041d2e3e0SPoul-Henning Kamp 
2911e12d97d2SEivind Eklund void
2912e12d97d2SEivind Eklund NDFREE(ndp, flags)
2913e12d97d2SEivind Eklund      struct nameidata *ndp;
2914e12d97d2SEivind Eklund      const uint flags;
2915e12d97d2SEivind Eklund {
2916e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_FREE_PNBUF) &&
2917e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & HASBUF)) {
2918e12d97d2SEivind Eklund 		zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
2919e12d97d2SEivind Eklund 		ndp->ni_cnd.cn_flags &= ~HASBUF;
2920e12d97d2SEivind Eklund 	}
2921e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_DVP_UNLOCK) &&
2922e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
2923e12d97d2SEivind Eklund 	    ndp->ni_dvp != ndp->ni_vp)
2924e12d97d2SEivind Eklund 		VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc);
2925e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_DVP_RELE) &&
2926e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
2927e12d97d2SEivind Eklund 		vrele(ndp->ni_dvp);
2928e12d97d2SEivind Eklund 		ndp->ni_dvp = NULL;
2929e12d97d2SEivind Eklund 	}
2930e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_VP_UNLOCK) &&
2931e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
2932e12d97d2SEivind Eklund 		VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc);
2933e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_VP_RELE) &&
2934e12d97d2SEivind Eklund 	    ndp->ni_vp) {
2935e12d97d2SEivind Eklund 		vrele(ndp->ni_vp);
2936e12d97d2SEivind Eklund 		ndp->ni_vp = NULL;
2937e12d97d2SEivind Eklund 	}
2938e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_STARTDIR_RELE) &&
2939e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
2940e12d97d2SEivind Eklund 		vrele(ndp->ni_startdir);
2941e12d97d2SEivind Eklund 		ndp->ni_startdir = NULL;
2942e12d97d2SEivind Eklund 	}
2943e12d97d2SEivind Eklund }
2944e39c53edSPoul-Henning Kamp 
2945e0848358SRobert Watson /*
2946e0848358SRobert Watson  * Common file system object access control check routine.  Accepts a
2947e0848358SRobert Watson  * vnode's type, "mode", uid and gid, requested access mode, credentials,
2948e0848358SRobert Watson  * and optional call-by-reference privused argument allowing vaccess()
2949e0848358SRobert Watson  * to indicate to the caller whether privilege was used to satisfy the
2950e0848358SRobert Watson  * request.  Returns 0 on success, or an errno on failure.
2951e0848358SRobert Watson  */
2952e39c53edSPoul-Henning Kamp int
2953012c643dSRobert Watson vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused)
2954e39c53edSPoul-Henning Kamp 	enum vtype type;
2955e39c53edSPoul-Henning Kamp 	mode_t file_mode;
2956012c643dSRobert Watson 	uid_t file_uid;
2957012c643dSRobert Watson 	gid_t file_gid;
2958e39c53edSPoul-Henning Kamp 	mode_t acc_mode;
2959e39c53edSPoul-Henning Kamp 	struct ucred *cred;
2960012c643dSRobert Watson 	int *privused;
2961e39c53edSPoul-Henning Kamp {
2962012c643dSRobert Watson 	mode_t dac_granted;
2963012c643dSRobert Watson #ifdef CAPABILITIES
2964012c643dSRobert Watson 	mode_t cap_granted;
2965012c643dSRobert Watson #endif
2966e39c53edSPoul-Henning Kamp 
2967e39c53edSPoul-Henning Kamp 	/*
2968012c643dSRobert Watson 	 * Look for a normal, non-privileged way to access the file/directory
2969012c643dSRobert Watson 	 * as requested.  If it exists, go with that.
2970e39c53edSPoul-Henning Kamp 	 */
2971012c643dSRobert Watson 
2972012c643dSRobert Watson 	if (privused != NULL)
2973012c643dSRobert Watson 		*privused = 0;
2974012c643dSRobert Watson 
2975012c643dSRobert Watson 	dac_granted = 0;
2976012c643dSRobert Watson 
2977012c643dSRobert Watson 	/* Check the owner. */
2978012c643dSRobert Watson 	if (cred->cr_uid == file_uid) {
2979012c643dSRobert Watson 		if (file_mode & S_IXUSR)
2980012c643dSRobert Watson 			dac_granted |= VEXEC;
2981012c643dSRobert Watson 		if (file_mode & S_IRUSR)
2982012c643dSRobert Watson 			dac_granted |= VREAD;
2983012c643dSRobert Watson 		if (file_mode & S_IWUSR)
2984012c643dSRobert Watson 			dac_granted |= VWRITE;
2985012c643dSRobert Watson 
2986012c643dSRobert Watson 		if ((acc_mode & dac_granted) == acc_mode)
2987e39c53edSPoul-Henning Kamp 			return (0);
2988e39c53edSPoul-Henning Kamp 
2989012c643dSRobert Watson 		goto privcheck;
2990e39c53edSPoul-Henning Kamp 	}
2991e39c53edSPoul-Henning Kamp 
2992012c643dSRobert Watson 	/* Otherwise, check the groups (first match) */
2993012c643dSRobert Watson 	if (groupmember(file_gid, cred)) {
2994012c643dSRobert Watson 		if (file_mode & S_IXGRP)
2995012c643dSRobert Watson 			dac_granted |= VEXEC;
2996012c643dSRobert Watson 		if (file_mode & S_IRGRP)
2997012c643dSRobert Watson 			dac_granted |= VREAD;
2998012c643dSRobert Watson 		if (file_mode & S_IWGRP)
2999012c643dSRobert Watson 			dac_granted |= VWRITE;
3000012c643dSRobert Watson 
3001012c643dSRobert Watson 		if ((acc_mode & dac_granted) == acc_mode)
3002012c643dSRobert Watson 			return (0);
3003012c643dSRobert Watson 
3004012c643dSRobert Watson 		goto privcheck;
3005e39c53edSPoul-Henning Kamp 	}
3006e39c53edSPoul-Henning Kamp 
3007e39c53edSPoul-Henning Kamp 	/* Otherwise, check everyone else. */
3008012c643dSRobert Watson 	if (file_mode & S_IXOTH)
3009012c643dSRobert Watson 		dac_granted |= VEXEC;
3010012c643dSRobert Watson 	if (file_mode & S_IROTH)
3011012c643dSRobert Watson 		dac_granted |= VREAD;
3012012c643dSRobert Watson 	if (file_mode & S_IWOTH)
3013012c643dSRobert Watson 		dac_granted |= VWRITE;
3014012c643dSRobert Watson 	if ((acc_mode & dac_granted) == acc_mode)
3015012c643dSRobert Watson 		return (0);
3016012c643dSRobert Watson 
3017012c643dSRobert Watson privcheck:
3018012c643dSRobert Watson 	if (!suser_xxx(cred, NULL, PRISON_ROOT)) {
3019012c643dSRobert Watson 		/* XXX audit: privilege used */
3020012c643dSRobert Watson 		if (privused != NULL)
3021012c643dSRobert Watson 			*privused = 1;
3022012c643dSRobert Watson 		return (0);
3023012c643dSRobert Watson 	}
3024012c643dSRobert Watson 
3025012c643dSRobert Watson #ifdef CAPABILITIES
3026012c643dSRobert Watson 	/*
3027012c643dSRobert Watson 	 * Build a capability mask to determine if the set of capabilities
3028012c643dSRobert Watson 	 * satisfies the requirements when combined with the granted mask
3029012c643dSRobert Watson 	 * from above.
3030012c643dSRobert Watson 	 * For each capability, if the capability is required, bitwise
3031012c643dSRobert Watson 	 * or the request type onto the cap_granted mask.
3032012c643dSRobert Watson 	 */
3033012c643dSRobert Watson 	cap_granted = 0;
3034012c643dSRobert Watson 	if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
3035728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT))
3036012c643dSRobert Watson 	    cap_granted |= VEXEC;
3037012c643dSRobert Watson 
3038012c643dSRobert Watson 	if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) &&
3039728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT))
3040012c643dSRobert Watson 		cap_granted |= VREAD;
3041012c643dSRobert Watson 
3042012c643dSRobert Watson 	if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
3043728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT))
3044012c643dSRobert Watson 		cap_granted |= VWRITE;
3045012c643dSRobert Watson 
3046728783c2SRobert Watson 	if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) {
3047012c643dSRobert Watson 		/* XXX audit: privilege used */
3048012c643dSRobert Watson 		if (privused != NULL)
3049012c643dSRobert Watson 			*privused = 1;
3050012c643dSRobert Watson 		return (0);
3051012c643dSRobert Watson 	}
3052012c643dSRobert Watson #endif
3053012c643dSRobert Watson 
3054012c643dSRobert Watson 	return (EACCES);
3055e39c53edSPoul-Henning Kamp }
3056