xref: /freebsd/sys/kern/vfs_export.c (revision a863c0fb2fdb34ba012b313c83705d7d0cd0d38c)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1989, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38996c772fSJohn Dyson  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39c3aac50fSPeter Wemm  * $FreeBSD$
40df8bae1dSRodney W. Grimes  */
41df8bae1dSRodney W. Grimes 
42df8bae1dSRodney W. Grimes /*
43df8bae1dSRodney W. Grimes  * External virtual filesystem routines
44df8bae1dSRodney W. Grimes  */
450e41ee30SGarrett Wollman #include "opt_ddb.h"
463275cf73SPoul-Henning Kamp #include "opt_ffs.h"
47df8bae1dSRodney W. Grimes 
48df8bae1dSRodney W. Grimes #include <sys/param.h>
49df8bae1dSRodney W. Grimes #include <sys/systm.h>
509626b608SPoul-Henning Kamp #include <sys/bio.h>
515e950839SLuoqi Chen #include <sys/buf.h>
525e950839SLuoqi Chen #include <sys/conf.h>
535e950839SLuoqi Chen #include <sys/dirent.h>
545e950839SLuoqi Chen #include <sys/domain.h>
555e950839SLuoqi Chen #include <sys/eventhandler.h>
564d948813SBruce Evans #include <sys/fcntl.h>
57986f4ce7SBruce Evans #include <sys/kernel.h>
589c8b8baaSPeter Wemm #include <sys/kthread.h>
590384fff8SJason Evans #include <sys/ktr.h>
60a1c995b6SPoul-Henning Kamp #include <sys/malloc.h>
61df8bae1dSRodney W. Grimes #include <sys/mount.h>
62e12d97d2SEivind Eklund #include <sys/namei.h>
635e950839SLuoqi Chen #include <sys/proc.h>
645e950839SLuoqi Chen #include <sys/reboot.h>
65771b51efSBruce Evans #include <sys/socket.h>
66df8bae1dSRodney W. Grimes #include <sys/stat.h>
675e950839SLuoqi Chen #include <sys/sysctl.h>
682be70f79SJohn Dyson #include <sys/vmmeter.h>
695e950839SLuoqi Chen #include <sys/vnode.h>
70df8bae1dSRodney W. Grimes 
71d3114049SBruce Evans #include <machine/limits.h>
720384fff8SJason Evans #include <machine/mutex.h>
73d3114049SBruce Evans 
74df8bae1dSRodney W. Grimes #include <vm/vm.h>
75efeaf95aSDavid Greenman #include <vm/vm_object.h>
76efeaf95aSDavid Greenman #include <vm/vm_extern.h>
771efb74fbSJohn Dyson #include <vm/pmap.h>
781efb74fbSJohn Dyson #include <vm/vm_map.h>
791c7c3c6aSMatthew Dillon #include <vm/vm_page.h>
8047221757SJohn Dyson #include <vm/vm_pager.h>
816476c0d2SJohn Dyson #include <vm/vnode_pager.h>
822d8acc0fSJohn Dyson #include <vm/vm_zone.h>
83df8bae1dSRodney W. Grimes 
84a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
8555166637SPoul-Henning Kamp 
86bba25953SEivind Eklund static void	addalias __P((struct vnode *vp, dev_t nvp_rdev));
87cb451ebdSBruce Evans static void	insmntque __P((struct vnode *vp, struct mount *mp));
88996c772fSJohn Dyson static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
89453aaa0dSEivind Eklund 
90453aaa0dSEivind Eklund /*
91453aaa0dSEivind Eklund  * Number of vnodes in existence.  Increased whenever getnewvnode()
92453aaa0dSEivind Eklund  * allocates a new vnode, never decreased.
93453aaa0dSEivind Eklund  */
94cb451ebdSBruce Evans static unsigned long	numvnodes;
95b15a966eSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
9698d93822SBruce Evans 
97453aaa0dSEivind Eklund /*
98453aaa0dSEivind Eklund  * Conversion tables for conversion from vnode types to inode formats
99453aaa0dSEivind Eklund  * and back.
100453aaa0dSEivind Eklund  */
101df8bae1dSRodney W. Grimes enum vtype iftovt_tab[16] = {
102df8bae1dSRodney W. Grimes 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
103df8bae1dSRodney W. Grimes 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
104df8bae1dSRodney W. Grimes };
105df8bae1dSRodney W. Grimes int vttoif_tab[9] = {
106df8bae1dSRodney W. Grimes 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
107df8bae1dSRodney W. Grimes 	S_IFSOCK, S_IFIFO, S_IFMT,
108df8bae1dSRodney W. Grimes };
109df8bae1dSRodney W. Grimes 
110a863c0fbSEivind Eklund /*
111a863c0fbSEivind Eklund  * List of vnodes that are ready for recycling.
112a863c0fbSEivind Eklund  */
113453aaa0dSEivind Eklund static TAILQ_HEAD(freelst, vnode) vnode_free_list;
114925a3a41SJohn Dyson 
115453aaa0dSEivind Eklund /*
116453aaa0dSEivind Eklund  * Minimum number of free vnodes.  If there are fewer than this free vnodes,
117453aaa0dSEivind Eklund  * getnewvnode() will return a newly allocated vnode.
118453aaa0dSEivind Eklund  */
11987b1940aSPoul-Henning Kamp static u_long wantfreevnodes = 25;
12000544193SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
121453aaa0dSEivind Eklund /* Number of vnodes in the free list. */
122cba2a7c6SBruce Evans static u_long freevnodes = 0;
123a051452aSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
124fbd6e6c9SPoul-Henning Kamp 
125453aaa0dSEivind Eklund /*
126a863c0fbSEivind Eklund  * Various variables used for debugging the new implementation of
127a863c0fbSEivind Eklund  * reassignbuf().
128a863c0fbSEivind Eklund  * XXX these are probably of (very) limited utility now.
129453aaa0dSEivind Eklund  */
130e929c00dSKirk McKusick static int reassignbufcalls;
131e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
132e929c00dSKirk McKusick static int reassignbufloops;
133e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
134e929c00dSKirk McKusick static int reassignbufsortgood;
135e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
136e929c00dSKirk McKusick static int reassignbufsortbad;
137e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
138453aaa0dSEivind Eklund /* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
139e929c00dSKirk McKusick static int reassignbufmethod = 1;
140e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
141e929c00dSKirk McKusick 
142bef608bdSJohn Dyson #ifdef ENABLE_VFS_IOOPT
143a863c0fbSEivind Eklund /* See NOTES for a description of this setting. */
144ad8ac923SKirk McKusick int vfs_ioopt = 0;
14560f8d464SJohn Dyson SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
14626300b34SJohn Dyson #endif
14760f8d464SJohn Dyson 
148a863c0fbSEivind Eklund /* List of mounted filesystems. */
149453aaa0dSEivind Eklund struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
150a863c0fbSEivind Eklund 
151453aaa0dSEivind Eklund /* For any iteration/modification of mountlist */
152a18b1f1dSJason Evans struct mtx mountlist_mtx;
153a863c0fbSEivind Eklund 
154453aaa0dSEivind Eklund /* For any iteration/modification of mnt_vnodelist */
155996c772fSJohn Dyson struct simplelock mntvnode_slock;
156453aaa0dSEivind Eklund /*
157453aaa0dSEivind Eklund  * Cache for the mount type id assigned to NFS.  This is used for
158453aaa0dSEivind Eklund  * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c.
159453aaa0dSEivind Eklund  */
160500b04a2SBruce Evans int	nfs_mount_type = -1;
161453aaa0dSEivind Eklund 
162289bdf33SBruce Evans #ifndef NULL_SIMPLELOCKS
163453aaa0dSEivind Eklund /* To keep more than one thread at a time from running vfs_getnewfsid */
164289bdf33SBruce Evans static struct simplelock mntid_slock;
165a863c0fbSEivind Eklund 
166453aaa0dSEivind Eklund /* For any iteration/modification of vnode_free_list */
167303b270bSEivind Eklund static struct simplelock vnode_free_list_slock;
168453aaa0dSEivind Eklund 
169453aaa0dSEivind Eklund /*
170453aaa0dSEivind Eklund  * For any iteration/modification of dev->si_hlist (linked through
171453aaa0dSEivind Eklund  * v_specnext)
172453aaa0dSEivind Eklund  */
173996c772fSJohn Dyson static struct simplelock spechash_slock;
174289bdf33SBruce Evans #endif
175453aaa0dSEivind Eklund 
176453aaa0dSEivind Eklund /* Publicly exported FS */
177453aaa0dSEivind Eklund struct nfs_public nfs_pub;
178a863c0fbSEivind Eklund 
179453aaa0dSEivind Eklund /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
1802d8acc0fSJohn Dyson static vm_zone_t vnode_zone;
181a863c0fbSEivind Eklund 
182453aaa0dSEivind Eklund /* Set to 1 to print out reclaim of active vnodes */
183453aaa0dSEivind Eklund int	prtactive = 0;
184df8bae1dSRodney W. Grimes 
185b1897c19SJulian Elischer /*
186b1897c19SJulian Elischer  * The workitem queue.
187453aaa0dSEivind Eklund  *
188453aaa0dSEivind Eklund  * It is useful to delay writes of file data and filesystem metadata
189453aaa0dSEivind Eklund  * for tens of seconds so that quickly created and deleted files need
190453aaa0dSEivind Eklund  * not waste disk bandwidth being created and removed. To realize this,
191453aaa0dSEivind Eklund  * we append vnodes to a "workitem" queue. When running with a soft
192453aaa0dSEivind Eklund  * updates implementation, most pending metadata dependencies should
193453aaa0dSEivind Eklund  * not wait for more than a few seconds. Thus, mounted on block devices
194453aaa0dSEivind Eklund  * are delayed only about a half the time that file data is delayed.
195453aaa0dSEivind Eklund  * Similarly, directory updates are more critical, so are only delayed
196453aaa0dSEivind Eklund  * about a third the time that file data is delayed. Thus, there are
197453aaa0dSEivind Eklund  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
198453aaa0dSEivind Eklund  * one each second (driven off the filesystem syncer process). The
199453aaa0dSEivind Eklund  * syncer_delayno variable indicates the next queue that is to be processed.
200453aaa0dSEivind Eklund  * Items that need to be processed soon are placed in this queue:
201453aaa0dSEivind Eklund  *
202453aaa0dSEivind Eklund  *	syncer_workitem_pending[syncer_delayno]
203453aaa0dSEivind Eklund  *
204453aaa0dSEivind Eklund  * A delay of fifteen seconds is done by placing the request fifteen
205453aaa0dSEivind Eklund  * entries later in the queue:
206453aaa0dSEivind Eklund  *
207453aaa0dSEivind Eklund  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
208453aaa0dSEivind Eklund  *
209b1897c19SJulian Elischer  */
210453aaa0dSEivind Eklund static int syncer_delayno = 0;
211453aaa0dSEivind Eklund static long syncer_mask;
212453aaa0dSEivind Eklund LIST_HEAD(synclist, vnode);
213453aaa0dSEivind Eklund static struct synclist *syncer_workitem_pending;
214453aaa0dSEivind Eklund 
215b1897c19SJulian Elischer #define SYNCER_MAXDELAY		32
216db878ba4SEivind Eklund static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
217e4ab40bcSKirk McKusick time_t syncdelay = 30;		/* max time to delay syncing data */
218e4ab40bcSKirk McKusick time_t filedelay = 30;		/* time to delay syncing files */
219e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
22067812eacSKirk McKusick time_t dirdelay = 29;		/* time to delay syncing directories */
221e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
22267812eacSKirk McKusick time_t metadelay = 28;		/* time to delay syncing metadata */
223e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
224e4ab40bcSKirk McKusick static int rushjob;		/* number of slots to run ASAP */
225e4ab40bcSKirk McKusick static int stat_rush_requests;	/* number of times I/O speeded up */
226e4ab40bcSKirk McKusick SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
227b1897c19SJulian Elischer 
228453aaa0dSEivind Eklund /*
229a863c0fbSEivind Eklund  * Number of vnodes we want to exist at any one time.  This is mostly used
230a863c0fbSEivind Eklund  * to size hash tables in vnode-related code.  It is normally not used in
231a863c0fbSEivind Eklund  * getnewvnode(), as wantfreevnodes is normally nonzero.)
232a863c0fbSEivind Eklund  *
233a863c0fbSEivind Eklund  * XXX desiredvnodes is historical cruft and should not exist.
234453aaa0dSEivind Eklund  */
2350d94caffSDavid Greenman int desiredvnodes;
2363d177f46SBill Fumerola SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
2373d177f46SBill Fumerola     &desiredvnodes, 0, "Maximum number of vnodes");
2380d94caffSDavid Greenman 
23998d93822SBruce Evans static void	vfs_free_addrlist __P((struct netexport *nep));
24098d93822SBruce Evans static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
24198d93822SBruce Evans static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
24298d93822SBruce Evans 				       struct export_args *argp));
24398d93822SBruce Evans 
244df8bae1dSRodney W. Grimes /*
245df8bae1dSRodney W. Grimes  * Initialize the vnode management data structures.
246df8bae1dSRodney W. Grimes  */
24726f9a767SRodney W. Grimes void
248df8bae1dSRodney W. Grimes vntblinit()
249df8bae1dSRodney W. Grimes {
250df8bae1dSRodney W. Grimes 
2512be70f79SJohn Dyson 	desiredvnodes = maxproc + cnt.v_page_count / 4;
252a18b1f1dSJason Evans 	mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
253996c772fSJohn Dyson 	simple_lock_init(&mntvnode_slock);
254996c772fSJohn Dyson 	simple_lock_init(&mntid_slock);
255996c772fSJohn Dyson 	simple_lock_init(&spechash_slock);
256df8bae1dSRodney W. Grimes 	TAILQ_INIT(&vnode_free_list);
257996c772fSJohn Dyson 	simple_lock_init(&vnode_free_list_slock);
2582d8acc0fSJohn Dyson 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
259b1897c19SJulian Elischer 	/*
260b1897c19SJulian Elischer 	 * Initialize the filesystem syncer.
261b1897c19SJulian Elischer 	 */
262b1897c19SJulian Elischer 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
263b1897c19SJulian Elischer 		&syncer_mask);
264b1897c19SJulian Elischer 	syncer_maxdelay = syncer_mask + 1;
265df8bae1dSRodney W. Grimes }
266df8bae1dSRodney W. Grimes 
267df8bae1dSRodney W. Grimes /*
268996c772fSJohn Dyson  * Mark a mount point as busy. Used to synchronize access and to delay
269996c772fSJohn Dyson  * unmounting. Interlock is not released on failure.
270df8bae1dSRodney W. Grimes  */
27126f9a767SRodney W. Grimes int
272996c772fSJohn Dyson vfs_busy(mp, flags, interlkp, p)
273996c772fSJohn Dyson 	struct mount *mp;
274996c772fSJohn Dyson 	int flags;
275a18b1f1dSJason Evans 	struct mtx *interlkp;
276996c772fSJohn Dyson 	struct proc *p;
277df8bae1dSRodney W. Grimes {
278996c772fSJohn Dyson 	int lkflags;
279df8bae1dSRodney W. Grimes 
280b1f4a44bSJulian Elischer 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
281996c772fSJohn Dyson 		if (flags & LK_NOWAIT)
282996c772fSJohn Dyson 			return (ENOENT);
283b1f4a44bSJulian Elischer 		mp->mnt_kern_flag |= MNTK_MWAIT;
284996c772fSJohn Dyson 		if (interlkp) {
285a18b1f1dSJason Evans 			mtx_exit(interlkp, MTX_DEF);
286df8bae1dSRodney W. Grimes 		}
287df8bae1dSRodney W. Grimes 		/*
288996c772fSJohn Dyson 		 * Since all busy locks are shared except the exclusive
289996c772fSJohn Dyson 		 * lock granted when unmounting, the only place that a
290996c772fSJohn Dyson 		 * wakeup needs to be done is at the release of the
291996c772fSJohn Dyson 		 * exclusive lock at the end of dounmount.
292df8bae1dSRodney W. Grimes 		 */
293996c772fSJohn Dyson 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
294996c772fSJohn Dyson 		if (interlkp) {
295a18b1f1dSJason Evans 			mtx_enter(interlkp, MTX_DEF);
296df8bae1dSRodney W. Grimes 		}
297996c772fSJohn Dyson 		return (ENOENT);
298df8bae1dSRodney W. Grimes 	}
2998f9110f6SJohn Dyson 	lkflags = LK_SHARED | LK_NOPAUSE;
300996c772fSJohn Dyson 	if (interlkp)
301996c772fSJohn Dyson 		lkflags |= LK_INTERLOCK;
302996c772fSJohn Dyson 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
303996c772fSJohn Dyson 		panic("vfs_busy: unexpected lock failure");
304df8bae1dSRodney W. Grimes 	return (0);
305df8bae1dSRodney W. Grimes }
306df8bae1dSRodney W. Grimes 
307df8bae1dSRodney W. Grimes /*
308df8bae1dSRodney W. Grimes  * Free a busy filesystem.
309df8bae1dSRodney W. Grimes  */
31026f9a767SRodney W. Grimes void
311996c772fSJohn Dyson vfs_unbusy(mp, p)
312996c772fSJohn Dyson 	struct mount *mp;
313996c772fSJohn Dyson 	struct proc *p;
314df8bae1dSRodney W. Grimes {
315df8bae1dSRodney W. Grimes 
316996c772fSJohn Dyson 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
317e0e9c421SDavid Greenman }
318e0e9c421SDavid Greenman 
319e0e9c421SDavid Greenman /*
320996c772fSJohn Dyson  * Lookup a filesystem type, and if found allocate and initialize
321996c772fSJohn Dyson  * a mount structure for it.
322996c772fSJohn Dyson  *
323996c772fSJohn Dyson  * Devname is usually updated by mount(8) after booting.
324e0e9c421SDavid Greenman  */
325996c772fSJohn Dyson int
326996c772fSJohn Dyson vfs_rootmountalloc(fstypename, devname, mpp)
327996c772fSJohn Dyson 	char *fstypename;
328996c772fSJohn Dyson 	char *devname;
329996c772fSJohn Dyson 	struct mount **mpp;
330e0e9c421SDavid Greenman {
331996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
332996c772fSJohn Dyson 	struct vfsconf *vfsp;
333996c772fSJohn Dyson 	struct mount *mp;
334996c772fSJohn Dyson 
335ecbb00a2SDoug Rabson 	if (fstypename == NULL)
336ecbb00a2SDoug Rabson 		return (ENODEV);
337996c772fSJohn Dyson 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
338996c772fSJohn Dyson 		if (!strcmp(vfsp->vfc_name, fstypename))
339996c772fSJohn Dyson 			break;
340996c772fSJohn Dyson 	if (vfsp == NULL)
341996c772fSJohn Dyson 		return (ENODEV);
342996c772fSJohn Dyson 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
343996c772fSJohn Dyson 	bzero((char *)mp, (u_long)sizeof(struct mount));
3448f9110f6SJohn Dyson 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
345996c772fSJohn Dyson 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
346996c772fSJohn Dyson 	LIST_INIT(&mp->mnt_vnodelist);
347996c772fSJohn Dyson 	mp->mnt_vfc = vfsp;
348996c772fSJohn Dyson 	mp->mnt_op = vfsp->vfc_vfsops;
349996c772fSJohn Dyson 	mp->mnt_flag = MNT_RDONLY;
350996c772fSJohn Dyson 	mp->mnt_vnodecovered = NULLVP;
351996c772fSJohn Dyson 	vfsp->vfc_refcount++;
3521b5464efSPoul-Henning Kamp 	mp->mnt_iosize_max = DFLTPHYS;
353996c772fSJohn Dyson 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
354996c772fSJohn Dyson 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
355996c772fSJohn Dyson 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
356996c772fSJohn Dyson 	mp->mnt_stat.f_mntonname[0] = '/';
357996c772fSJohn Dyson 	mp->mnt_stat.f_mntonname[1] = 0;
358996c772fSJohn Dyson 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
359996c772fSJohn Dyson 	*mpp = mp;
360996c772fSJohn Dyson 	return (0);
361996c772fSJohn Dyson }
362996c772fSJohn Dyson 
363996c772fSJohn Dyson /*
364996c772fSJohn Dyson  * Find an appropriate filesystem to use for the root. If a filesystem
365996c772fSJohn Dyson  * has not been preselected, walk through the list of known filesystems
366996c772fSJohn Dyson  * trying those that have mountroot routines, and try them until one
367996c772fSJohn Dyson  * works or we have tried them all.
368996c772fSJohn Dyson  */
369996c772fSJohn Dyson #ifdef notdef	/* XXX JH */
370996c772fSJohn Dyson int
371514ede09SBruce Evans lite2_vfs_mountroot()
372996c772fSJohn Dyson {
373996c772fSJohn Dyson 	struct vfsconf *vfsp;
374514ede09SBruce Evans 	extern int (*lite2_mountroot) __P((void));
375e0e9c421SDavid Greenman 	int error;
376e0e9c421SDavid Greenman 
377996c772fSJohn Dyson 	if (lite2_mountroot != NULL)
378996c772fSJohn Dyson 		return ((*lite2_mountroot)());
379996c772fSJohn Dyson 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
380996c772fSJohn Dyson 		if (vfsp->vfc_mountroot == NULL)
381e0e9c421SDavid Greenman 			continue;
382996c772fSJohn Dyson 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
383996c772fSJohn Dyson 			return (0);
384996c772fSJohn Dyson 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
385e0e9c421SDavid Greenman 	}
386996c772fSJohn Dyson 	return (ENODEV);
387e0e9c421SDavid Greenman }
388996c772fSJohn Dyson #endif
389e0e9c421SDavid Greenman 
390df8bae1dSRodney W. Grimes /*
391df8bae1dSRodney W. Grimes  * Lookup a mount point by filesystem identifier.
392df8bae1dSRodney W. Grimes  */
393df8bae1dSRodney W. Grimes struct mount *
394996c772fSJohn Dyson vfs_getvfs(fsid)
395df8bae1dSRodney W. Grimes 	fsid_t *fsid;
396df8bae1dSRodney W. Grimes {
397df8bae1dSRodney W. Grimes 	register struct mount *mp;
398df8bae1dSRodney W. Grimes 
399a18b1f1dSJason Evans 	mtx_enter(&mountlist_mtx, MTX_DEF);
4000429e37aSPoul-Henning Kamp 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
401df8bae1dSRodney W. Grimes 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
402996c772fSJohn Dyson 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
403a18b1f1dSJason Evans 			mtx_exit(&mountlist_mtx, MTX_DEF);
404df8bae1dSRodney W. Grimes 			return (mp);
405df8bae1dSRodney W. Grimes 	    }
406996c772fSJohn Dyson 	}
407a18b1f1dSJason Evans 	mtx_exit(&mountlist_mtx, MTX_DEF);
408df8bae1dSRodney W. Grimes 	return ((struct mount *) 0);
409df8bae1dSRodney W. Grimes }
410df8bae1dSRodney W. Grimes 
411df8bae1dSRodney W. Grimes /*
41205ecdd70SBruce Evans  * Get a new unique fsid.  Try to make its val[0] unique, since this value
41305ecdd70SBruce Evans  * will be used to create fake device numbers for stat().  Also try (but
41405ecdd70SBruce Evans  * not so hard) make its val[0] unique mod 2^16, since some emulators only
41505ecdd70SBruce Evans  * support 16-bit device numbers.  We end up with unique val[0]'s for the
41605ecdd70SBruce Evans  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
417e6f71111SMatthew Dillon  *
41861214975SBruce Evans  * Keep in mind that several mounts may be running in parallel.  Starting
41905ecdd70SBruce Evans  * the search one past where the previous search terminated is both a
42005ecdd70SBruce Evans  * micro-optimization and a defense against returning the same fsid to
42105ecdd70SBruce Evans  * different mounts.
422df8bae1dSRodney W. Grimes  */
423df8bae1dSRodney W. Grimes void
424996c772fSJohn Dyson vfs_getnewfsid(mp)
425df8bae1dSRodney W. Grimes 	struct mount *mp;
426df8bae1dSRodney W. Grimes {
42705ecdd70SBruce Evans 	static u_int16_t mntid_base;
428df8bae1dSRodney W. Grimes 	fsid_t tfsid;
42905ecdd70SBruce Evans 	int mtype;
430df8bae1dSRodney W. Grimes 
431996c772fSJohn Dyson 	simple_lock(&mntid_slock);
432996c772fSJohn Dyson 	mtype = mp->mnt_vfc->vfc_typenum;
433df8bae1dSRodney W. Grimes 	tfsid.val[1] = mtype;
4343660ebc2SBoris Popov 	mtype = (mtype & 0xFF) << 24;
43505ecdd70SBruce Evans 	for (;;) {
4363660ebc2SBoris Popov 		tfsid.val[0] = makeudev(255,
4373660ebc2SBoris Popov 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
4383660ebc2SBoris Popov 		mntid_base++;
439e6f71111SMatthew Dillon 		if (vfs_getvfs(&tfsid) == NULL)
440e6f71111SMatthew Dillon 			break;
441df8bae1dSRodney W. Grimes 	}
442df8bae1dSRodney W. Grimes 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
443e6f71111SMatthew Dillon 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
444996c772fSJohn Dyson 	simple_unlock(&mntid_slock);
445df8bae1dSRodney W. Grimes }
446df8bae1dSRodney W. Grimes 
447df8bae1dSRodney W. Grimes /*
448a2801b77SJohn Polstra  * Knob to control the precision of file timestamps:
449a2801b77SJohn Polstra  *
450a2801b77SJohn Polstra  *   0 = seconds only; nanoseconds zeroed.
451a2801b77SJohn Polstra  *   1 = seconds and nanoseconds, accurate within 1/HZ.
452a2801b77SJohn Polstra  *   2 = seconds and nanoseconds, truncated to microseconds.
453a2801b77SJohn Polstra  * >=3 = seconds and nanoseconds, maximum precision.
454a2801b77SJohn Polstra  */
455a2801b77SJohn Polstra enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
456a2801b77SJohn Polstra 
457a2801b77SJohn Polstra static int timestamp_precision = TSP_SEC;
458a2801b77SJohn Polstra SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
459a2801b77SJohn Polstra     &timestamp_precision, 0, "");
460a2801b77SJohn Polstra 
461a2801b77SJohn Polstra /*
462a2801b77SJohn Polstra  * Get a current timestamp.
463a2801b77SJohn Polstra  */
464a2801b77SJohn Polstra void
465a2801b77SJohn Polstra vfs_timestamp(tsp)
466a2801b77SJohn Polstra 	struct timespec *tsp;
467a2801b77SJohn Polstra {
468a2801b77SJohn Polstra 	struct timeval tv;
469a2801b77SJohn Polstra 
470a2801b77SJohn Polstra 	switch (timestamp_precision) {
471a2801b77SJohn Polstra 	case TSP_SEC:
472a2801b77SJohn Polstra 		tsp->tv_sec = time_second;
473a2801b77SJohn Polstra 		tsp->tv_nsec = 0;
474a2801b77SJohn Polstra 		break;
475a2801b77SJohn Polstra 	case TSP_HZ:
476a2801b77SJohn Polstra 		getnanotime(tsp);
477a2801b77SJohn Polstra 		break;
478a2801b77SJohn Polstra 	case TSP_USEC:
479a2801b77SJohn Polstra 		microtime(&tv);
480a2801b77SJohn Polstra 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
481a2801b77SJohn Polstra 		break;
482a2801b77SJohn Polstra 	case TSP_NSEC:
483a2801b77SJohn Polstra 	default:
484a2801b77SJohn Polstra 		nanotime(tsp);
485a2801b77SJohn Polstra 		break;
486a2801b77SJohn Polstra 	}
487a2801b77SJohn Polstra }
488a2801b77SJohn Polstra 
489a2801b77SJohn Polstra /*
490df8bae1dSRodney W. Grimes  * Set vnode attributes to VNOVAL
491df8bae1dSRodney W. Grimes  */
49226f9a767SRodney W. Grimes void
49326f9a767SRodney W. Grimes vattr_null(vap)
494df8bae1dSRodney W. Grimes 	register struct vattr *vap;
495df8bae1dSRodney W. Grimes {
496df8bae1dSRodney W. Grimes 
497df8bae1dSRodney W. Grimes 	vap->va_type = VNON;
49826f9a767SRodney W. Grimes 	vap->va_size = VNOVAL;
49926f9a767SRodney W. Grimes 	vap->va_bytes = VNOVAL;
5007a6c46b5SDoug Rabson 	vap->va_mode = VNOVAL;
5017a6c46b5SDoug Rabson 	vap->va_nlink = VNOVAL;
5027a6c46b5SDoug Rabson 	vap->va_uid = VNOVAL;
5037a6c46b5SDoug Rabson 	vap->va_gid = VNOVAL;
5047a6c46b5SDoug Rabson 	vap->va_fsid = VNOVAL;
5057a6c46b5SDoug Rabson 	vap->va_fileid = VNOVAL;
5067a6c46b5SDoug Rabson 	vap->va_blocksize = VNOVAL;
5077a6c46b5SDoug Rabson 	vap->va_rdev = VNOVAL;
5087a6c46b5SDoug Rabson 	vap->va_atime.tv_sec = VNOVAL;
5097a6c46b5SDoug Rabson 	vap->va_atime.tv_nsec = VNOVAL;
5107a6c46b5SDoug Rabson 	vap->va_mtime.tv_sec = VNOVAL;
5117a6c46b5SDoug Rabson 	vap->va_mtime.tv_nsec = VNOVAL;
5127a6c46b5SDoug Rabson 	vap->va_ctime.tv_sec = VNOVAL;
5137a6c46b5SDoug Rabson 	vap->va_ctime.tv_nsec = VNOVAL;
5147a6c46b5SDoug Rabson 	vap->va_flags = VNOVAL;
5157a6c46b5SDoug Rabson 	vap->va_gen = VNOVAL;
516df8bae1dSRodney W. Grimes 	vap->va_vaflags = 0;
517df8bae1dSRodney W. Grimes }
518df8bae1dSRodney W. Grimes 
519df8bae1dSRodney W. Grimes /*
520df8bae1dSRodney W. Grimes  * Routines having to do with the management of the vnode table.
521df8bae1dSRodney W. Grimes  */
522df8bae1dSRodney W. Grimes 
523df8bae1dSRodney W. Grimes /*
524df8bae1dSRodney W. Grimes  * Return the next vnode from the free list.
525df8bae1dSRodney W. Grimes  */
52626f9a767SRodney W. Grimes int
527df8bae1dSRodney W. Grimes getnewvnode(tag, mp, vops, vpp)
528df8bae1dSRodney W. Grimes 	enum vtagtype tag;
529df8bae1dSRodney W. Grimes 	struct mount *mp;
530f57e6547SBruce Evans 	vop_t **vops;
531df8bae1dSRodney W. Grimes 	struct vnode **vpp;
532df8bae1dSRodney W. Grimes {
533c904bbbdSKirk McKusick 	int s, count;
534996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
535c904bbbdSKirk McKusick 	struct vnode *vp = NULL;
536f2a2857bSKirk McKusick 	struct mount *vnmp;
53795e5e988SJohn Dyson 	vm_object_t object;
538df8bae1dSRodney W. Grimes 
539b15a966eSPoul-Henning Kamp 	/*
540b15a966eSPoul-Henning Kamp 	 * We take the least recently used vnode from the freelist
541b15a966eSPoul-Henning Kamp 	 * if we can get it and it has no cached pages, and no
542b15a966eSPoul-Henning Kamp 	 * namecache entries are relative to it.
543b15a966eSPoul-Henning Kamp 	 * Otherwise we allocate a new vnode
544b15a966eSPoul-Henning Kamp 	 */
545b15a966eSPoul-Henning Kamp 
546925a3a41SJohn Dyson 	s = splbio();
547996c772fSJohn Dyson 	simple_lock(&vnode_free_list_slock);
548925a3a41SJohn Dyson 
54900544193SPoul-Henning Kamp 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
55000544193SPoul-Henning Kamp 		vp = NULL;
551d047b580SPoul-Henning Kamp 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
552d047b580SPoul-Henning Kamp 		/*
553d047b580SPoul-Henning Kamp 		 * XXX: this is only here to be backwards compatible
554d047b580SPoul-Henning Kamp 		 */
55500544193SPoul-Henning Kamp 		vp = NULL;
556c904bbbdSKirk McKusick 	} else for (count = 0; count < freevnodes; count++) {
557c904bbbdSKirk McKusick 		vp = TAILQ_FIRST(&vnode_free_list);
558c904bbbdSKirk McKusick 		if (vp == NULL || vp->v_usecount)
559c904bbbdSKirk McKusick 			panic("getnewvnode: free vnode isn't");
56095e5e988SJohn Dyson 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
561c904bbbdSKirk McKusick 		/*
562c904bbbdSKirk McKusick 		 * Don't recycle if active in the namecache or
563c904bbbdSKirk McKusick 		 * if it still has cached pages or we cannot get
564c904bbbdSKirk McKusick 		 * its interlock.
565c904bbbdSKirk McKusick 		 */
566c904bbbdSKirk McKusick 		if (LIST_FIRST(&vp->v_cache_src) != NULL ||
5679ff5ce6bSBoris Popov 		    (VOP_GETVOBJECT(vp, &object) == 0 &&
5689ff5ce6bSBoris Popov 		     (object->resident_page_count || object->ref_count)) ||
569a18b1f1dSJason Evans 		    !mtx_try_enter(&vp->v_interlock, MTX_DEF)) {
570c904bbbdSKirk McKusick 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
571c904bbbdSKirk McKusick 			vp = NULL;
572b15a966eSPoul-Henning Kamp 			continue;
573c904bbbdSKirk McKusick 		}
574f2a2857bSKirk McKusick 		/*
575f2a2857bSKirk McKusick 		 * Skip over it if its filesystem is being suspended.
576f2a2857bSKirk McKusick 		 */
577f2a2857bSKirk McKusick 		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
578b15a966eSPoul-Henning Kamp 			break;
579a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
580f2a2857bSKirk McKusick 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
581f2a2857bSKirk McKusick 		vp = NULL;
582b15a966eSPoul-Henning Kamp 	}
583b15a966eSPoul-Henning Kamp 	if (vp) {
584a051452aSPoul-Henning Kamp 		vp->v_flag |= VDOOMED;
585b15a966eSPoul-Henning Kamp 		freevnodes--;
586996c772fSJohn Dyson 		simple_unlock(&vnode_free_list_slock);
587a051452aSPoul-Henning Kamp 		cache_purge(vp);
588df8bae1dSRodney W. Grimes 		vp->v_lease = NULL;
5892be70f79SJohn Dyson 		if (vp->v_type != VBAD) {
590996c772fSJohn Dyson 			vgonel(vp, p);
5912be70f79SJohn Dyson 		} else {
592a18b1f1dSJason Evans 			mtx_exit(&vp->v_interlock, MTX_DEF);
593996c772fSJohn Dyson 		}
594f2a2857bSKirk McKusick 		vn_finished_write(vnmp);
595bd7e5f99SJohn Dyson 
5965526d2d9SEivind Eklund #ifdef INVARIANTS
597797f2d22SPoul-Henning Kamp 		{
598797f2d22SPoul-Henning Kamp 			int s;
5990d94caffSDavid Greenman 
600df8bae1dSRodney W. Grimes 			if (vp->v_data)
601df8bae1dSRodney W. Grimes 				panic("cleaned vnode isn't");
602df8bae1dSRodney W. Grimes 			s = splbio();
603df8bae1dSRodney W. Grimes 			if (vp->v_numoutput)
604df8bae1dSRodney W. Grimes 				panic("Clean vnode has pending I/O's");
605df8bae1dSRodney W. Grimes 			splx(s);
606f2a2857bSKirk McKusick 			if (vp->v_writecount != 0)
607f2a2857bSKirk McKusick 				panic("Non-zero write count");
608797f2d22SPoul-Henning Kamp 		}
609df8bae1dSRodney W. Grimes #endif
610df8bae1dSRodney W. Grimes 		vp->v_flag = 0;
611df8bae1dSRodney W. Grimes 		vp->v_lastw = 0;
612df8bae1dSRodney W. Grimes 		vp->v_lasta = 0;
613df8bae1dSRodney W. Grimes 		vp->v_cstart = 0;
614df8bae1dSRodney W. Grimes 		vp->v_clen = 0;
615df8bae1dSRodney W. Grimes 		vp->v_socket = 0;
616b15a966eSPoul-Henning Kamp 	} else {
617b15a966eSPoul-Henning Kamp 		simple_unlock(&vnode_free_list_slock);
6182d8acc0fSJohn Dyson 		vp = (struct vnode *) zalloc(vnode_zone);
619b15a966eSPoul-Henning Kamp 		bzero((char *) vp, sizeof *vp);
620a18b1f1dSJason Evans 		mtx_init(&vp->v_interlock, "vnode interlock", MTX_DEF);
621b15a966eSPoul-Henning Kamp 		vp->v_dd = vp;
622a051452aSPoul-Henning Kamp 		cache_purge(vp);
623b15a966eSPoul-Henning Kamp 		LIST_INIT(&vp->v_cache_src);
624b15a966eSPoul-Henning Kamp 		TAILQ_INIT(&vp->v_cache_dst);
625b15a966eSPoul-Henning Kamp 		numvnodes++;
626df8bae1dSRodney W. Grimes 	}
627b15a966eSPoul-Henning Kamp 
62816e9e530SPeter Wemm 	TAILQ_INIT(&vp->v_cleanblkhd);
62916e9e530SPeter Wemm 	TAILQ_INIT(&vp->v_dirtyblkhd);
630f9ceb7c7SDavid Greenman 	vp->v_type = VNON;
631df8bae1dSRodney W. Grimes 	vp->v_tag = tag;
632df8bae1dSRodney W. Grimes 	vp->v_op = vops;
63367e87166SBoris Popov 	lockinit(&vp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE);
634df8bae1dSRodney W. Grimes 	insmntque(vp, mp);
635df8bae1dSRodney W. Grimes 	*vpp = vp;
636df8bae1dSRodney W. Grimes 	vp->v_usecount = 1;
637df8bae1dSRodney W. Grimes 	vp->v_data = 0;
638925a3a41SJohn Dyson 	splx(s);
63964d3c7e3SJohn Dyson 
640fb116777SEivind Eklund 	vfs_object_create(vp, p, p->p_ucred);
641df8bae1dSRodney W. Grimes 	return (0);
642df8bae1dSRodney W. Grimes }
643df8bae1dSRodney W. Grimes 
644df8bae1dSRodney W. Grimes /*
645df8bae1dSRodney W. Grimes  * Move a vnode from one mount queue to another.
646df8bae1dSRodney W. Grimes  */
647cb451ebdSBruce Evans static void
648df8bae1dSRodney W. Grimes insmntque(vp, mp)
649df8bae1dSRodney W. Grimes 	register struct vnode *vp;
650df8bae1dSRodney W. Grimes 	register struct mount *mp;
651df8bae1dSRodney W. Grimes {
652df8bae1dSRodney W. Grimes 
653996c772fSJohn Dyson 	simple_lock(&mntvnode_slock);
654df8bae1dSRodney W. Grimes 	/*
655df8bae1dSRodney W. Grimes 	 * Delete from old mount point vnode list, if on one.
656df8bae1dSRodney W. Grimes 	 */
657df8bae1dSRodney W. Grimes 	if (vp->v_mount != NULL)
658df8bae1dSRodney W. Grimes 		LIST_REMOVE(vp, v_mntvnodes);
659df8bae1dSRodney W. Grimes 	/*
660df8bae1dSRodney W. Grimes 	 * Insert into list of vnodes for the new mount point, if available.
661df8bae1dSRodney W. Grimes 	 */
662996c772fSJohn Dyson 	if ((vp->v_mount = mp) == NULL) {
663996c772fSJohn Dyson 		simple_unlock(&mntvnode_slock);
664df8bae1dSRodney W. Grimes 		return;
665996c772fSJohn Dyson 	}
666df8bae1dSRodney W. Grimes 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
667996c772fSJohn Dyson 	simple_unlock(&mntvnode_slock);
668df8bae1dSRodney W. Grimes }
669df8bae1dSRodney W. Grimes 
670df8bae1dSRodney W. Grimes /*
671df8bae1dSRodney W. Grimes  * Update outstanding I/O count and do wakeup if requested.
672df8bae1dSRodney W. Grimes  */
67326f9a767SRodney W. Grimes void
674df8bae1dSRodney W. Grimes vwakeup(bp)
675df8bae1dSRodney W. Grimes 	register struct buf *bp;
676df8bae1dSRodney W. Grimes {
677df8bae1dSRodney W. Grimes 	register struct vnode *vp;
678df8bae1dSRodney W. Grimes 
679df8bae1dSRodney W. Grimes 	bp->b_flags &= ~B_WRITEINPROG;
680bb56ec4aSPoul-Henning Kamp 	if ((vp = bp->b_vp)) {
681df8bae1dSRodney W. Grimes 		vp->v_numoutput--;
682df8bae1dSRodney W. Grimes 		if (vp->v_numoutput < 0)
683df8bae1dSRodney W. Grimes 			panic("vwakeup: neg numoutput");
684a3a8bb29SDavid Greenman 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
685df8bae1dSRodney W. Grimes 			vp->v_flag &= ~VBWAIT;
686df8bae1dSRodney W. Grimes 			wakeup((caddr_t) &vp->v_numoutput);
687df8bae1dSRodney W. Grimes 		}
688df8bae1dSRodney W. Grimes 	}
689df8bae1dSRodney W. Grimes }
690df8bae1dSRodney W. Grimes 
691df8bae1dSRodney W. Grimes /*
692df8bae1dSRodney W. Grimes  * Flush out and invalidate all buffers associated with a vnode.
693df8bae1dSRodney W. Grimes  * Called with the underlying object locked.
694df8bae1dSRodney W. Grimes  */
695df8bae1dSRodney W. Grimes int
696df8bae1dSRodney W. Grimes vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
697df8bae1dSRodney W. Grimes 	register struct vnode *vp;
698df8bae1dSRodney W. Grimes 	int flags;
699df8bae1dSRodney W. Grimes 	struct ucred *cred;
700df8bae1dSRodney W. Grimes 	struct proc *p;
701df8bae1dSRodney W. Grimes 	int slpflag, slptimeo;
702df8bae1dSRodney W. Grimes {
703df8bae1dSRodney W. Grimes 	register struct buf *bp;
704df8bae1dSRodney W. Grimes 	struct buf *nbp, *blist;
705df8bae1dSRodney W. Grimes 	int s, error;
7061cdeb653SDavid Greenman 	vm_object_t object;
707df8bae1dSRodney W. Grimes 
70828913ebeSJulian Elischer 	if (flags & V_SAVE) {
70928913ebeSJulian Elischer 		s = splbio();
71028913ebeSJulian Elischer 		while (vp->v_numoutput) {
71128913ebeSJulian Elischer 			vp->v_flag |= VBWAIT;
71229c98cd8SEivind Eklund 			error = tsleep((caddr_t)&vp->v_numoutput,
71329c98cd8SEivind Eklund 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
71429c98cd8SEivind Eklund 			if (error) {
71529c98cd8SEivind Eklund 				splx(s);
71629c98cd8SEivind Eklund 				return (error);
71729c98cd8SEivind Eklund 			}
71828913ebeSJulian Elischer 		}
71916e9e530SPeter Wemm 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
72028913ebeSJulian Elischer 			splx(s);
72128913ebeSJulian Elischer 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
722df8bae1dSRodney W. Grimes 				return (error);
72328913ebeSJulian Elischer 			s = splbio();
72428913ebeSJulian Elischer 			if (vp->v_numoutput > 0 ||
72516e9e530SPeter Wemm 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
726df8bae1dSRodney W. Grimes 				panic("vinvalbuf: dirty bufs");
727df8bae1dSRodney W. Grimes 		}
72828913ebeSJulian Elischer 		splx(s);
72928913ebeSJulian Elischer   	}
7306476c0d2SJohn Dyson 	s = splbio();
731df8bae1dSRodney W. Grimes 	for (;;) {
73216e9e530SPeter Wemm 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
73320f02ef5SPeter Wemm 		if (!blist)
73416e9e530SPeter Wemm 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
735df8bae1dSRodney W. Grimes 		if (!blist)
736df8bae1dSRodney W. Grimes 			break;
737df8bae1dSRodney W. Grimes 
738df8bae1dSRodney W. Grimes 		for (bp = blist; bp; bp = nbp) {
73916e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
74067812eacSKirk McKusick 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
74167812eacSKirk McKusick 				error = BUF_TIMELOCK(bp,
74267812eacSKirk McKusick 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
74367812eacSKirk McKusick 				    "vinvalbuf", slpflag, slptimeo);
74467812eacSKirk McKusick 				if (error == ENOLCK)
74567812eacSKirk McKusick 					break;
746df8bae1dSRodney W. Grimes 				splx(s);
747df8bae1dSRodney W. Grimes 				return (error);
7482f2160daSDavid Greenman 			}
749df8bae1dSRodney W. Grimes 			/*
7500d94caffSDavid Greenman 			 * XXX Since there are no node locks for NFS, I
7510d94caffSDavid Greenman 			 * believe there is a slight chance that a delayed
7520d94caffSDavid Greenman 			 * write will occur while sleeping just above, so
75352c64c95SJohn Dyson 			 * check for it.  Note that vfs_bio_awrite expects
75452c64c95SJohn Dyson 			 * buffers to reside on a queue, while VOP_BWRITE and
75552c64c95SJohn Dyson 			 * brelse do not.
756df8bae1dSRodney W. Grimes 			 */
75752c64c95SJohn Dyson 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
75852c64c95SJohn Dyson 				(flags & V_SAVE)) {
75952c64c95SJohn Dyson 
76095e5e988SJohn Dyson 				if (bp->b_vp == vp) {
76195e5e988SJohn Dyson 					if (bp->b_flags & B_CLUSTEROK) {
76267812eacSKirk McKusick 						BUF_UNLOCK(bp);
76395e5e988SJohn Dyson 						vfs_bio_awrite(bp);
76495e5e988SJohn Dyson 					} else {
76552c64c95SJohn Dyson 						bremfree(bp);
76667812eacSKirk McKusick 						bp->b_flags |= B_ASYNC;
767b99c307aSPoul-Henning Kamp 						BUF_WRITE(bp);
76895e5e988SJohn Dyson 					}
76995e5e988SJohn Dyson 				} else {
77052c64c95SJohn Dyson 					bremfree(bp);
771b99c307aSPoul-Henning Kamp 					(void) BUF_WRITE(bp);
77295e5e988SJohn Dyson 				}
773df8bae1dSRodney W. Grimes 				break;
774df8bae1dSRodney W. Grimes 			}
77552c64c95SJohn Dyson 			bremfree(bp);
77667812eacSKirk McKusick 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
777bef608bdSJohn Dyson 			bp->b_flags &= ~B_ASYNC;
778df8bae1dSRodney W. Grimes 			brelse(bp);
779df8bae1dSRodney W. Grimes 		}
780df8bae1dSRodney W. Grimes 	}
7811cdeb653SDavid Greenman 
7820d94caffSDavid Greenman 	while (vp->v_numoutput > 0) {
7830d94caffSDavid Greenman 		vp->v_flag |= VBWAIT;
7840d94caffSDavid Greenman 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
7850d94caffSDavid Greenman 	}
7862f2160daSDavid Greenman 
7870d94caffSDavid Greenman 	splx(s);
7880d94caffSDavid Greenman 
789ff769afcSDavid Greenman 	/*
790ff769afcSDavid Greenman 	 * Destroy the copy in the VM cache, too.
791ff769afcSDavid Greenman 	 */
792a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
7939ff5ce6bSBoris Popov 	if (VOP_GETVOBJECT(vp, &object) == 0) {
79495e5e988SJohn Dyson 		vm_object_page_remove(object, 0, 0,
79595e5e988SJohn Dyson 			(flags & V_SAVE) ? TRUE : FALSE);
7961cdeb653SDavid Greenman 	}
797a18b1f1dSJason Evans 	mtx_exit(&vp->v_interlock, MTX_DEF);
79895e5e988SJohn Dyson 
79916e9e530SPeter Wemm 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
800df8bae1dSRodney W. Grimes 		panic("vinvalbuf: flush failed");
801df8bae1dSRodney W. Grimes 	return (0);
802df8bae1dSRodney W. Grimes }
803df8bae1dSRodney W. Grimes 
804df8bae1dSRodney W. Grimes /*
805bef608bdSJohn Dyson  * Truncate a file's buffer and pages to a specified length.  This
806bef608bdSJohn Dyson  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
807bef608bdSJohn Dyson  * sync activity.
808bef608bdSJohn Dyson  */
809bef608bdSJohn Dyson int
810bef608bdSJohn Dyson vtruncbuf(vp, cred, p, length, blksize)
811bef608bdSJohn Dyson 	register struct vnode *vp;
812bef608bdSJohn Dyson 	struct ucred *cred;
813bef608bdSJohn Dyson 	struct proc *p;
814bef608bdSJohn Dyson 	off_t length;
815bef608bdSJohn Dyson 	int blksize;
816bef608bdSJohn Dyson {
817bef608bdSJohn Dyson 	register struct buf *bp;
818f5ef029eSPoul-Henning Kamp 	struct buf *nbp;
819f5ef029eSPoul-Henning Kamp 	int s, anyfreed;
820bef608bdSJohn Dyson 	int trunclbn;
821bef608bdSJohn Dyson 
822bef608bdSJohn Dyson 	/*
823bef608bdSJohn Dyson 	 * Round up to the *next* lbn.
824bef608bdSJohn Dyson 	 */
8251c77c6b7SJohn Dyson 	trunclbn = (length + blksize - 1) / blksize;
826bef608bdSJohn Dyson 
827bef608bdSJohn Dyson 	s = splbio();
828bef608bdSJohn Dyson restart:
829bef608bdSJohn Dyson 	anyfreed = 1;
830bef608bdSJohn Dyson 	for (;anyfreed;) {
831bef608bdSJohn Dyson 		anyfreed = 0;
83216e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
83316e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
834bef608bdSJohn Dyson 			if (bp->b_lblkno >= trunclbn) {
83567812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
83667812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
83752c64c95SJohn Dyson 					goto restart;
838bef608bdSJohn Dyson 				} else {
839bef608bdSJohn Dyson 					bremfree(bp);
84067812eacSKirk McKusick 					bp->b_flags |= (B_INVAL | B_RELBUF);
841bef608bdSJohn Dyson 					bp->b_flags &= ~B_ASYNC;
842bef608bdSJohn Dyson 					brelse(bp);
843bef608bdSJohn Dyson 					anyfreed = 1;
844bef608bdSJohn Dyson 				}
84502b00854SKirk McKusick 				if (nbp &&
84602b00854SKirk McKusick 				    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
84752c64c95SJohn Dyson 				    (nbp->b_vp != vp) ||
848bef608bdSJohn Dyson 				    (nbp->b_flags & B_DELWRI))) {
849bef608bdSJohn Dyson 					goto restart;
850bef608bdSJohn Dyson 				}
851bef608bdSJohn Dyson 			}
852bef608bdSJohn Dyson 		}
853bef608bdSJohn Dyson 
85416e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
85516e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
856bef608bdSJohn Dyson 			if (bp->b_lblkno >= trunclbn) {
85767812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
85867812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
85952c64c95SJohn Dyson 					goto restart;
860bef608bdSJohn Dyson 				} else {
861bef608bdSJohn Dyson 					bremfree(bp);
86267812eacSKirk McKusick 					bp->b_flags |= (B_INVAL | B_RELBUF);
863bef608bdSJohn Dyson 					bp->b_flags &= ~B_ASYNC;
864bef608bdSJohn Dyson 					brelse(bp);
865bef608bdSJohn Dyson 					anyfreed = 1;
866bef608bdSJohn Dyson 				}
86702b00854SKirk McKusick 				if (nbp &&
86802b00854SKirk McKusick 				    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
86952c64c95SJohn Dyson 				    (nbp->b_vp != vp) ||
870bef608bdSJohn Dyson 				    (nbp->b_flags & B_DELWRI) == 0)) {
871bef608bdSJohn Dyson 					goto restart;
872bef608bdSJohn Dyson 				}
873bef608bdSJohn Dyson 			}
874bef608bdSJohn Dyson 		}
875bef608bdSJohn Dyson 	}
8762deb5d04SJohn Dyson 
87752c64c95SJohn Dyson 	if (length > 0) {
87852c64c95SJohn Dyson restartsync:
87916e9e530SPeter Wemm 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
88016e9e530SPeter Wemm 			nbp = TAILQ_NEXT(bp, b_vnbufs);
8812deb5d04SJohn Dyson 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
88267812eacSKirk McKusick 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
88367812eacSKirk McKusick 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
88467812eacSKirk McKusick 					goto restart;
8852deb5d04SJohn Dyson 				} else {
8862deb5d04SJohn Dyson 					bremfree(bp);
88752c64c95SJohn Dyson 					if (bp->b_vp == vp) {
88852c64c95SJohn Dyson 						bp->b_flags |= B_ASYNC;
88952c64c95SJohn Dyson 					} else {
89052c64c95SJohn Dyson 						bp->b_flags &= ~B_ASYNC;
89152c64c95SJohn Dyson 					}
892b99c307aSPoul-Henning Kamp 					BUF_WRITE(bp);
8932deb5d04SJohn Dyson 				}
89452c64c95SJohn Dyson 				goto restartsync;
8952deb5d04SJohn Dyson 			}
89652c64c95SJohn Dyson 
8972deb5d04SJohn Dyson 		}
8982deb5d04SJohn Dyson 	}
8992deb5d04SJohn Dyson 
9002deb5d04SJohn Dyson 	while (vp->v_numoutput > 0) {
9012deb5d04SJohn Dyson 		vp->v_flag |= VBWAIT;
9022deb5d04SJohn Dyson 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
9032deb5d04SJohn Dyson 	}
9042deb5d04SJohn Dyson 
905bef608bdSJohn Dyson 	splx(s);
906bef608bdSJohn Dyson 
907bef608bdSJohn Dyson 	vnode_pager_setsize(vp, length);
908bef608bdSJohn Dyson 
909bef608bdSJohn Dyson 	return (0);
910bef608bdSJohn Dyson }
911bef608bdSJohn Dyson 
912bef608bdSJohn Dyson /*
913df8bae1dSRodney W. Grimes  * Associate a buffer with a vnode.
914df8bae1dSRodney W. Grimes  */
91526f9a767SRodney W. Grimes void
916df8bae1dSRodney W. Grimes bgetvp(vp, bp)
917df8bae1dSRodney W. Grimes 	register struct vnode *vp;
918df8bae1dSRodney W. Grimes 	register struct buf *bp;
919df8bae1dSRodney W. Grimes {
920602d2b48SDavid Greenman 	int s;
921df8bae1dSRodney W. Grimes 
9225526d2d9SEivind Eklund 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
923219cbf59SEivind Eklund 
924a051452aSPoul-Henning Kamp 	vhold(vp);
925df8bae1dSRodney W. Grimes 	bp->b_vp = vp;
92641d2e3e0SPoul-Henning Kamp 	bp->b_dev = vn_todev(vp);
927df8bae1dSRodney W. Grimes 	/*
928df8bae1dSRodney W. Grimes 	 * Insert onto list for new vnode.
929df8bae1dSRodney W. Grimes 	 */
930602d2b48SDavid Greenman 	s = splbio();
93102b00854SKirk McKusick 	bp->b_xflags |= BX_VNCLEAN;
93202b00854SKirk McKusick 	bp->b_xflags &= ~BX_VNDIRTY;
93316e9e530SPeter Wemm 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
934602d2b48SDavid Greenman 	splx(s);
935df8bae1dSRodney W. Grimes }
936df8bae1dSRodney W. Grimes 
937df8bae1dSRodney W. Grimes /*
938df8bae1dSRodney W. Grimes  * Disassociate a buffer from a vnode.
939df8bae1dSRodney W. Grimes  */
94026f9a767SRodney W. Grimes void
941df8bae1dSRodney W. Grimes brelvp(bp)
942df8bae1dSRodney W. Grimes 	register struct buf *bp;
943df8bae1dSRodney W. Grimes {
944df8bae1dSRodney W. Grimes 	struct vnode *vp;
94516e9e530SPeter Wemm 	struct buflists *listheadp;
946602d2b48SDavid Greenman 	int s;
947df8bae1dSRodney W. Grimes 
9485526d2d9SEivind Eklund 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
9492be70f79SJohn Dyson 
950df8bae1dSRodney W. Grimes 	/*
951df8bae1dSRodney W. Grimes 	 * Delete from old vnode list, if on one.
952df8bae1dSRodney W. Grimes 	 */
953b1897c19SJulian Elischer 	vp = bp->b_vp;
954602d2b48SDavid Greenman 	s = splbio();
95502b00854SKirk McKusick 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
95602b00854SKirk McKusick 		if (bp->b_xflags & BX_VNDIRTY)
95716e9e530SPeter Wemm 			listheadp = &vp->v_dirtyblkhd;
95816e9e530SPeter Wemm 		else
95916e9e530SPeter Wemm 			listheadp = &vp->v_cleanblkhd;
96016e9e530SPeter Wemm 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
96102b00854SKirk McKusick 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
96216e9e530SPeter Wemm 	}
96316e9e530SPeter Wemm 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
964b1897c19SJulian Elischer 		vp->v_flag &= ~VONWORKLST;
965b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
966b1897c19SJulian Elischer 	}
967602d2b48SDavid Greenman 	splx(s);
968df8bae1dSRodney W. Grimes 	bp->b_vp = (struct vnode *) 0;
969a051452aSPoul-Henning Kamp 	vdrop(vp);
970df8bae1dSRodney W. Grimes }
971df8bae1dSRodney W. Grimes 
972df8bae1dSRodney W. Grimes /*
973b1897c19SJulian Elischer  * Add an item to the syncer work queue.
974b1897c19SJulian Elischer  */
97542e26d47SMatthew Dillon static void
97642e26d47SMatthew Dillon vn_syncer_add_to_worklist(struct vnode *vp, int delay)
977b1897c19SJulian Elischer {
978b1897c19SJulian Elischer 	int s, slot;
979b1897c19SJulian Elischer 
980b1897c19SJulian Elischer 	s = splbio();
981b1897c19SJulian Elischer 
982b1897c19SJulian Elischer 	if (vp->v_flag & VONWORKLST) {
983b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
984b1897c19SJulian Elischer 	}
985b1897c19SJulian Elischer 
986b1897c19SJulian Elischer 	if (delay > syncer_maxdelay - 2)
987b1897c19SJulian Elischer 		delay = syncer_maxdelay - 2;
988b1897c19SJulian Elischer 	slot = (syncer_delayno + delay) & syncer_mask;
989b1897c19SJulian Elischer 
990b1897c19SJulian Elischer 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
991b1897c19SJulian Elischer 	vp->v_flag |= VONWORKLST;
992b1897c19SJulian Elischer 	splx(s);
993b1897c19SJulian Elischer }
994b1897c19SJulian Elischer 
9954ef2094eSJulian Elischer struct  proc *updateproc;
996155f87daSMatthew Dillon static void sched_sync __P((void));
9979c8b8baaSPeter Wemm static struct kproc_desc up_kp = {
998b1897c19SJulian Elischer 	"syncer",
999b1897c19SJulian Elischer 	sched_sync,
1000b1897c19SJulian Elischer 	&updateproc
1001b1897c19SJulian Elischer };
10029c8b8baaSPeter Wemm SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
1003b1897c19SJulian Elischer 
1004b1897c19SJulian Elischer /*
1005b1897c19SJulian Elischer  * System filesystem synchronizer daemon.
1006b1897c19SJulian Elischer  */
1007b1897c19SJulian Elischer void
1008b1897c19SJulian Elischer sched_sync(void)
1009b1897c19SJulian Elischer {
1010b1897c19SJulian Elischer 	struct synclist *slp;
1011b1897c19SJulian Elischer 	struct vnode *vp;
1012f2a2857bSKirk McKusick 	struct mount *mp;
1013b1897c19SJulian Elischer 	long starttime;
1014b1897c19SJulian Elischer 	int s;
1015b1897c19SJulian Elischer 	struct proc *p = updateproc;
1016b1897c19SJulian Elischer 
10170384fff8SJason Evans 	mtx_enter(&Giant, MTX_DEF);
10180384fff8SJason Evans 
10195e950839SLuoqi Chen 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
10205e950839SLuoqi Chen 	    SHUTDOWN_PRI_LAST);
10215e950839SLuoqi Chen 
1022b1897c19SJulian Elischer 	for (;;) {
10235e950839SLuoqi Chen 		kproc_suspend_loop(p);
10245e950839SLuoqi Chen 
1025227ee8a1SPoul-Henning Kamp 		starttime = time_second;
1026b1897c19SJulian Elischer 
1027b1897c19SJulian Elischer 		/*
102842e26d47SMatthew Dillon 		 * Push files whose dirty time has expired.  Be careful
102942e26d47SMatthew Dillon 		 * of interrupt race on slp queue.
1030b1897c19SJulian Elischer 		 */
1031b1897c19SJulian Elischer 		s = splbio();
1032b1897c19SJulian Elischer 		slp = &syncer_workitem_pending[syncer_delayno];
1033b1897c19SJulian Elischer 		syncer_delayno += 1;
1034b1897c19SJulian Elischer 		if (syncer_delayno == syncer_maxdelay)
1035b1897c19SJulian Elischer 			syncer_delayno = 0;
1036b1897c19SJulian Elischer 		splx(s);
1037b1897c19SJulian Elischer 
1038b1897c19SJulian Elischer 		while ((vp = LIST_FIRST(slp)) != NULL) {
1039f2a2857bSKirk McKusick 			if (VOP_ISLOCKED(vp, NULL) == 0 &&
1040f2a2857bSKirk McKusick 			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
1041b1897c19SJulian Elischer 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1042b1897c19SJulian Elischer 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
1043b1897c19SJulian Elischer 				VOP_UNLOCK(vp, 0, p);
1044f2a2857bSKirk McKusick 				vn_finished_write(mp);
10454ef2094eSJulian Elischer 			}
104642e26d47SMatthew Dillon 			s = splbio();
1047b1897c19SJulian Elischer 			if (LIST_FIRST(slp) == vp) {
10484ef2094eSJulian Elischer 				/*
10494ef2094eSJulian Elischer 				 * Note: v_tag VT_VFS vps can remain on the
10504ef2094eSJulian Elischer 				 * worklist too with no dirty blocks, but
10514ef2094eSJulian Elischer 				 * since sync_fsync() moves it to a different
10524ef2094eSJulian Elischer 				 * slot we are safe.
10534ef2094eSJulian Elischer 				 */
105416e9e530SPeter Wemm 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
1055ba4ad1fcSPoul-Henning Kamp 				    !vn_isdisk(vp, NULL))
105642e26d47SMatthew Dillon 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
1057b1897c19SJulian Elischer 				/*
105842e26d47SMatthew Dillon 				 * Put us back on the worklist.  The worklist
105942e26d47SMatthew Dillon 				 * routine will remove us from our current
106042e26d47SMatthew Dillon 				 * position and then add us back in at a later
106142e26d47SMatthew Dillon 				 * position.
1062b1897c19SJulian Elischer 				 */
1063b1897c19SJulian Elischer 				vn_syncer_add_to_worklist(vp, syncdelay);
1064b1897c19SJulian Elischer 			}
106542e26d47SMatthew Dillon 			splx(s);
1066b1897c19SJulian Elischer 		}
1067b1897c19SJulian Elischer 
1068b1897c19SJulian Elischer 		/*
1069b1897c19SJulian Elischer 		 * Do soft update processing.
1070b1897c19SJulian Elischer 		 */
10713275cf73SPoul-Henning Kamp #ifdef SOFTUPDATES
1072a2e7a027SPoul-Henning Kamp 		softdep_process_worklist(NULL);
10733275cf73SPoul-Henning Kamp #endif
1074b1897c19SJulian Elischer 
1075b1897c19SJulian Elischer 		/*
1076b1897c19SJulian Elischer 		 * The variable rushjob allows the kernel to speed up the
1077b1897c19SJulian Elischer 		 * processing of the filesystem syncer process. A rushjob
1078b1897c19SJulian Elischer 		 * value of N tells the filesystem syncer to process the next
1079b1897c19SJulian Elischer 		 * N seconds worth of work on its queue ASAP. Currently rushjob
1080b1897c19SJulian Elischer 		 * is used by the soft update code to speed up the filesystem
1081b1897c19SJulian Elischer 		 * syncer process when the incore state is getting so far
1082b1897c19SJulian Elischer 		 * ahead of the disk that the kernel memory pool is being
1083b1897c19SJulian Elischer 		 * threatened with exhaustion.
1084b1897c19SJulian Elischer 		 */
1085b1897c19SJulian Elischer 		if (rushjob > 0) {
1086b1897c19SJulian Elischer 			rushjob -= 1;
1087b1897c19SJulian Elischer 			continue;
1088b1897c19SJulian Elischer 		}
1089b1897c19SJulian Elischer 		/*
1090b1897c19SJulian Elischer 		 * If it has taken us less than a second to process the
1091b1897c19SJulian Elischer 		 * current work, then wait. Otherwise start right over
1092b1897c19SJulian Elischer 		 * again. We can still lose time if any single round
1093b1897c19SJulian Elischer 		 * takes more than two seconds, but it does not really
1094b1897c19SJulian Elischer 		 * matter as we are just trying to generally pace the
1095b1897c19SJulian Elischer 		 * filesystem activity.
1096b1897c19SJulian Elischer 		 */
1097227ee8a1SPoul-Henning Kamp 		if (time_second == starttime)
1098b1897c19SJulian Elischer 			tsleep(&lbolt, PPAUSE, "syncer", 0);
1099b1897c19SJulian Elischer 	}
1100b1897c19SJulian Elischer }
1101b1897c19SJulian Elischer 
1102b1897c19SJulian Elischer /*
1103e4ab40bcSKirk McKusick  * Request the syncer daemon to speed up its work.
1104e4ab40bcSKirk McKusick  * We never push it to speed up more than half of its
1105e4ab40bcSKirk McKusick  * normal turn time, otherwise it could take over the cpu.
1106e4ab40bcSKirk McKusick  */
1107e4ab40bcSKirk McKusick int
1108e4ab40bcSKirk McKusick speedup_syncer()
1109e4ab40bcSKirk McKusick {
1110e4ab40bcSKirk McKusick 	int s;
1111e4ab40bcSKirk McKusick 
1112e4ab40bcSKirk McKusick 	s = splhigh();
1113e4ab40bcSKirk McKusick 	if (updateproc->p_wchan == &lbolt)
1114e4ab40bcSKirk McKusick 		setrunnable(updateproc);
1115e4ab40bcSKirk McKusick 	splx(s);
1116e4ab40bcSKirk McKusick 	if (rushjob < syncdelay / 2) {
1117e4ab40bcSKirk McKusick 		rushjob += 1;
1118e4ab40bcSKirk McKusick 		stat_rush_requests += 1;
1119e4ab40bcSKirk McKusick 		return (1);
1120e4ab40bcSKirk McKusick 	}
1121e4ab40bcSKirk McKusick 	return(0);
1122e4ab40bcSKirk McKusick }
1123e4ab40bcSKirk McKusick 
1124e4ab40bcSKirk McKusick /*
11250d94caffSDavid Greenman  * Associate a p-buffer with a vnode.
11261c7c3c6aSMatthew Dillon  *
11271c7c3c6aSMatthew Dillon  * Also sets B_PAGING flag to indicate that vnode is not fully associated
11281c7c3c6aSMatthew Dillon  * with the buffer.  i.e. the bp has not been linked into the vnode or
11291c7c3c6aSMatthew Dillon  * ref-counted.
11300d94caffSDavid Greenman  */
11310d94caffSDavid Greenman void
11320d94caffSDavid Greenman pbgetvp(vp, bp)
11330d94caffSDavid Greenman 	register struct vnode *vp;
11340d94caffSDavid Greenman 	register struct buf *bp;
11350d94caffSDavid Greenman {
1136219cbf59SEivind Eklund 
11375526d2d9SEivind Eklund 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
11385526d2d9SEivind Eklund 
11390d94caffSDavid Greenman 	bp->b_vp = vp;
11401c7c3c6aSMatthew Dillon 	bp->b_flags |= B_PAGING;
114141d2e3e0SPoul-Henning Kamp 	bp->b_dev = vn_todev(vp);
11420d94caffSDavid Greenman }
11430d94caffSDavid Greenman 
11440d94caffSDavid Greenman /*
11450d94caffSDavid Greenman  * Disassociate a p-buffer from a vnode.
11460d94caffSDavid Greenman  */
11470d94caffSDavid Greenman void
11480d94caffSDavid Greenman pbrelvp(bp)
11490d94caffSDavid Greenman 	register struct buf *bp;
11500d94caffSDavid Greenman {
11510d94caffSDavid Greenman 
11525526d2d9SEivind Eklund 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
11530d94caffSDavid Greenman 
11541c7c3c6aSMatthew Dillon 	/* XXX REMOVE ME */
11551c7c3c6aSMatthew Dillon 	if (bp->b_vnbufs.tqe_next != NULL) {
11561c7c3c6aSMatthew Dillon 		panic(
11571c7c3c6aSMatthew Dillon 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x",
11581c7c3c6aSMatthew Dillon 		    bp,
11591c7c3c6aSMatthew Dillon 		    (int)bp->b_flags
11601c7c3c6aSMatthew Dillon 		);
11611c7c3c6aSMatthew Dillon 	}
11620d94caffSDavid Greenman 	bp->b_vp = (struct vnode *) 0;
11631c7c3c6aSMatthew Dillon 	bp->b_flags &= ~B_PAGING;
11641c7c3c6aSMatthew Dillon }
11651c7c3c6aSMatthew Dillon 
1166453aaa0dSEivind Eklund /*
1167453aaa0dSEivind Eklund  * Change the vnode a pager buffer is associated with.
1168453aaa0dSEivind Eklund  */
11691c7c3c6aSMatthew Dillon void
11701c7c3c6aSMatthew Dillon pbreassignbuf(bp, newvp)
11711c7c3c6aSMatthew Dillon 	struct buf *bp;
11721c7c3c6aSMatthew Dillon 	struct vnode *newvp;
11731c7c3c6aSMatthew Dillon {
1174a863c0fbSEivind Eklund 
1175453aaa0dSEivind Eklund 	KASSERT(bp->b_flags & B_PAGING,
1176453aaa0dSEivind Eklund 	    ("pbreassignbuf() on non phys bp %p", bp));
11771c7c3c6aSMatthew Dillon 	bp->b_vp = newvp;
11780d94caffSDavid Greenman }
11790d94caffSDavid Greenman 
11800d94caffSDavid Greenman /*
1181df8bae1dSRodney W. Grimes  * Reassign a buffer from one vnode to another.
1182df8bae1dSRodney W. Grimes  * Used to assign file specific control information
1183df8bae1dSRodney W. Grimes  * (indirect blocks) to the vnode to which they belong.
1184df8bae1dSRodney W. Grimes  */
118526f9a767SRodney W. Grimes void
1186df8bae1dSRodney W. Grimes reassignbuf(bp, newvp)
1187df8bae1dSRodney W. Grimes 	register struct buf *bp;
1188df8bae1dSRodney W. Grimes 	register struct vnode *newvp;
1189df8bae1dSRodney W. Grimes {
1190b1897c19SJulian Elischer 	struct buflists *listheadp;
1191b1897c19SJulian Elischer 	int delay;
1192619594e8SJohn Dyson 	int s;
1193df8bae1dSRodney W. Grimes 
1194df8bae1dSRodney W. Grimes 	if (newvp == NULL) {
1195df8bae1dSRodney W. Grimes 		printf("reassignbuf: NULL");
1196df8bae1dSRodney W. Grimes 		return;
1197df8bae1dSRodney W. Grimes 	}
1198e929c00dSKirk McKusick 	++reassignbufcalls;
1199619594e8SJohn Dyson 
12001c7c3c6aSMatthew Dillon 	/*
12011c7c3c6aSMatthew Dillon 	 * B_PAGING flagged buffers cannot be reassigned because their vp
12021c7c3c6aSMatthew Dillon 	 * is not fully linked in.
12031c7c3c6aSMatthew Dillon 	 */
12041c7c3c6aSMatthew Dillon 	if (bp->b_flags & B_PAGING)
12051c7c3c6aSMatthew Dillon 		panic("cannot reassign paging buffer");
12061c7c3c6aSMatthew Dillon 
1207619594e8SJohn Dyson 	s = splbio();
1208df8bae1dSRodney W. Grimes 	/*
1209df8bae1dSRodney W. Grimes 	 * Delete from old vnode list, if on one.
1210df8bae1dSRodney W. Grimes 	 */
121102b00854SKirk McKusick 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
121202b00854SKirk McKusick 		if (bp->b_xflags & BX_VNDIRTY)
12134ef2094eSJulian Elischer 			listheadp = &bp->b_vp->v_dirtyblkhd;
121416e9e530SPeter Wemm 		else
12154ef2094eSJulian Elischer 			listheadp = &bp->b_vp->v_cleanblkhd;
121616e9e530SPeter Wemm 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
121702b00854SKirk McKusick 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
12184ef2094eSJulian Elischer 		if (bp->b_vp != newvp) {
12194ef2094eSJulian Elischer 			vdrop(bp->b_vp);
12204ef2094eSJulian Elischer 			bp->b_vp = NULL;	/* for clarification */
12214ef2094eSJulian Elischer 		}
1222a051452aSPoul-Henning Kamp 	}
1223df8bae1dSRodney W. Grimes 	/*
12240d94caffSDavid Greenman 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
12250d94caffSDavid Greenman 	 * of clean buffers.
1226df8bae1dSRodney W. Grimes 	 */
12270d94caffSDavid Greenman 	if (bp->b_flags & B_DELWRI) {
12280d94caffSDavid Greenman 		struct buf *tbp;
12290d94caffSDavid Greenman 
1230b1897c19SJulian Elischer 		listheadp = &newvp->v_dirtyblkhd;
1231b1897c19SJulian Elischer 		if ((newvp->v_flag & VONWORKLST) == 0) {
1232b1897c19SJulian Elischer 			switch (newvp->v_type) {
1233b1897c19SJulian Elischer 			case VDIR:
1234e4ab40bcSKirk McKusick 				delay = dirdelay;
1235b1897c19SJulian Elischer 				break;
123638224dcdSPoul-Henning Kamp 			case VCHR:
1237b1897c19SJulian Elischer 			case VBLK:
1238b1897c19SJulian Elischer 				if (newvp->v_specmountpoint != NULL) {
1239e4ab40bcSKirk McKusick 					delay = metadelay;
1240b1897c19SJulian Elischer 					break;
1241b1897c19SJulian Elischer 				}
1242b1897c19SJulian Elischer 				/* fall through */
1243b1897c19SJulian Elischer 			default:
1244e4ab40bcSKirk McKusick 				delay = filedelay;
1245b1897c19SJulian Elischer 			}
1246b1897c19SJulian Elischer 			vn_syncer_add_to_worklist(newvp, delay);
1247b1897c19SJulian Elischer 		}
124802b00854SKirk McKusick 		bp->b_xflags |= BX_VNDIRTY;
124916e9e530SPeter Wemm 		tbp = TAILQ_FIRST(listheadp);
125016e9e530SPeter Wemm 		if (tbp == NULL ||
1251e929c00dSKirk McKusick 		    bp->b_lblkno == 0 ||
1252c37c9620SMatthew Dillon 		    (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
1253e929c00dSKirk McKusick 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
125416e9e530SPeter Wemm 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
1255e929c00dSKirk McKusick 			++reassignbufsortgood;
1256e929c00dSKirk McKusick 		} else if (bp->b_lblkno < 0) {
1257e929c00dSKirk McKusick 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
1258e929c00dSKirk McKusick 			++reassignbufsortgood;
1259e929c00dSKirk McKusick 		} else if (reassignbufmethod == 1) {
1260e929c00dSKirk McKusick 			/*
1261e929c00dSKirk McKusick 			 * New sorting algorithm, only handle sequential case,
1262c37c9620SMatthew Dillon 			 * otherwise append to end (but before metadata)
1263e929c00dSKirk McKusick 			 */
1264e929c00dSKirk McKusick 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
126502b00854SKirk McKusick 			    (tbp->b_xflags & BX_VNDIRTY)) {
1266c37c9620SMatthew Dillon 				/*
1267c37c9620SMatthew Dillon 				 * Found the best place to insert the buffer
1268c37c9620SMatthew Dillon 				 */
1269e929c00dSKirk McKusick 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
1270e929c00dSKirk McKusick 				++reassignbufsortgood;
12710d94caffSDavid Greenman 			} else {
1272c37c9620SMatthew Dillon 				/*
1273c37c9620SMatthew Dillon 				 * Missed, append to end, but before meta-data.
1274c37c9620SMatthew Dillon 				 * We know that the head buffer in the list is
1275c37c9620SMatthew Dillon 				 * not meta-data due to prior conditionals.
1276c37c9620SMatthew Dillon 				 *
1277c37c9620SMatthew Dillon 				 * Indirect effects:  NFS second stage write
1278c37c9620SMatthew Dillon 				 * tends to wind up here, giving maximum
1279c37c9620SMatthew Dillon 				 * distance between the unstable write and the
1280c37c9620SMatthew Dillon 				 * commit rpc.
1281c37c9620SMatthew Dillon 				 */
1282c37c9620SMatthew Dillon 				tbp = TAILQ_LAST(listheadp, buflists);
1283c37c9620SMatthew Dillon 				while (tbp && tbp->b_lblkno < 0)
1284c37c9620SMatthew Dillon 					tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
1285c37c9620SMatthew Dillon 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
1286e929c00dSKirk McKusick 				++reassignbufsortbad;
1287e929c00dSKirk McKusick 			}
1288e929c00dSKirk McKusick 		} else {
1289e929c00dSKirk McKusick 			/*
1290e929c00dSKirk McKusick 			 * Old sorting algorithm, scan queue and insert
1291e929c00dSKirk McKusick 			 */
129216e9e530SPeter Wemm 			struct buf *ttbp;
129316e9e530SPeter Wemm 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
129416e9e530SPeter Wemm 			    (ttbp->b_lblkno < bp->b_lblkno)) {
1295e929c00dSKirk McKusick 				++reassignbufloops;
129616e9e530SPeter Wemm 				tbp = ttbp;
12970d94caffSDavid Greenman 			}
129816e9e530SPeter Wemm 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
12990d94caffSDavid Greenman 		}
13000d94caffSDavid Greenman 	} else {
130102b00854SKirk McKusick 		bp->b_xflags |= BX_VNCLEAN;
130216e9e530SPeter Wemm 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
1303b1897c19SJulian Elischer 		if ((newvp->v_flag & VONWORKLST) &&
130416e9e530SPeter Wemm 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
1305b1897c19SJulian Elischer 			newvp->v_flag &= ~VONWORKLST;
1306b1897c19SJulian Elischer 			LIST_REMOVE(newvp, v_synclist);
1307b1897c19SJulian Elischer 		}
1308df8bae1dSRodney W. Grimes 	}
13094ef2094eSJulian Elischer 	if (bp->b_vp != newvp) {
1310a051452aSPoul-Henning Kamp 		bp->b_vp = newvp;
1311a051452aSPoul-Henning Kamp 		vhold(bp->b_vp);
13124ef2094eSJulian Elischer 	}
1313619594e8SJohn Dyson 	splx(s);
13140d94caffSDavid Greenman }
1315df8bae1dSRodney W. Grimes 
1316df8bae1dSRodney W. Grimes /*
1317df8bae1dSRodney W. Grimes  * Create a vnode for a block device.
131841fadeebSBruce Evans  * Used for mounting the root file system.
131901f76720SJeroen Ruigrok van der Werven  * XXX: This now changed to a VCHR due to the block/char merging.
1320df8bae1dSRodney W. Grimes  */
132126f9a767SRodney W. Grimes int
1322df8bae1dSRodney W. Grimes bdevvp(dev, vpp)
1323df8bae1dSRodney W. Grimes 	dev_t dev;
1324df8bae1dSRodney W. Grimes 	struct vnode **vpp;
1325df8bae1dSRodney W. Grimes {
1326df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1327df8bae1dSRodney W. Grimes 	struct vnode *nvp;
1328df8bae1dSRodney W. Grimes 	int error;
1329df8bae1dSRodney W. Grimes 
13302447bec8SPoul-Henning Kamp 	if (dev == NODEV) {
133137906c68SBruce Evans 		*vpp = NULLVP;
133237906c68SBruce Evans 		return (ENXIO);
133337906c68SBruce Evans 	}
133402a1e48fSKirk McKusick 	if (vfinddev(dev, VCHR, vpp))
133502a1e48fSKirk McKusick 		return (0);
1336df8bae1dSRodney W. Grimes 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
1337df8bae1dSRodney W. Grimes 	if (error) {
133837906c68SBruce Evans 		*vpp = NULLVP;
1339df8bae1dSRodney W. Grimes 		return (error);
1340df8bae1dSRodney W. Grimes 	}
1341df8bae1dSRodney W. Grimes 	vp = nvp;
134201f76720SJeroen Ruigrok van der Werven 	vp->v_type = VCHR;
1343dbafb366SPoul-Henning Kamp 	addalias(vp, dev);
1344df8bae1dSRodney W. Grimes 	*vpp = vp;
1345df8bae1dSRodney W. Grimes 	return (0);
1346df8bae1dSRodney W. Grimes }
1347df8bae1dSRodney W. Grimes 
1348df8bae1dSRodney W. Grimes /*
1349dbafb366SPoul-Henning Kamp  * Add vnode to the alias list hung off the dev_t.
1350dbafb366SPoul-Henning Kamp  *
1351dbafb366SPoul-Henning Kamp  * The reason for this gunk is that multiple vnodes can reference
1352dbafb366SPoul-Henning Kamp  * the same physical device, so checking vp->v_usecount to see
1353dbafb366SPoul-Henning Kamp  * how many users there are is inadequate; the v_usecount for
1354dbafb366SPoul-Henning Kamp  * the vnodes need to be accumulated.  vcount() does that.
1355df8bae1dSRodney W. Grimes  */
13569b971133SKirk McKusick struct vnode *
1357dbafb366SPoul-Henning Kamp addaliasu(nvp, nvp_rdev)
1358dbafb366SPoul-Henning Kamp 	struct vnode *nvp;
1359bfbb9ce6SPoul-Henning Kamp 	udev_t nvp_rdev;
1360df8bae1dSRodney W. Grimes {
13619b971133SKirk McKusick 	struct vnode *ovp;
13629b971133SKirk McKusick 	vop_t **ops;
13639b971133SKirk McKusick 	dev_t dev;
1364df8bae1dSRodney W. Grimes 
1365df8bae1dSRodney W. Grimes 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1366dbafb366SPoul-Henning Kamp 		panic("addaliasu on non-special vnode");
13679b971133SKirk McKusick 	dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
13689b971133SKirk McKusick 	/*
13699b971133SKirk McKusick 	 * Check to see if we have a bdevvp vnode with no associated
13709b971133SKirk McKusick 	 * filesystem. If so, we want to associate the filesystem of
13719b971133SKirk McKusick 	 * the new newly instigated vnode with the bdevvp vnode and
13729b971133SKirk McKusick 	 * discard the newly created vnode rather than leaving the
13739b971133SKirk McKusick 	 * bdevvp vnode lying around with no associated filesystem.
13749b971133SKirk McKusick 	 */
13759b971133SKirk McKusick 	if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
13769b971133SKirk McKusick 		addalias(nvp, dev);
13779b971133SKirk McKusick 		return (nvp);
13789b971133SKirk McKusick 	}
13799b971133SKirk McKusick 	/*
13809b971133SKirk McKusick 	 * Discard unneeded vnode, but save its node specific data.
13819b971133SKirk McKusick 	 * Note that if there is a lock, it is carried over in the
13829b971133SKirk McKusick 	 * node specific data to the replacement vnode.
13839b971133SKirk McKusick 	 */
13849b971133SKirk McKusick 	vref(ovp);
13859b971133SKirk McKusick 	ovp->v_data = nvp->v_data;
13869b971133SKirk McKusick 	ovp->v_tag = nvp->v_tag;
13879b971133SKirk McKusick 	nvp->v_data = NULL;
13889b971133SKirk McKusick 	ops = nvp->v_op;
13899b971133SKirk McKusick 	nvp->v_op = ovp->v_op;
13909b971133SKirk McKusick 	ovp->v_op = ops;
139167e87166SBoris Popov 	lockinit(&ovp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE);
139267e87166SBoris Popov 	if (nvp->v_vnlock)
139367e87166SBoris Popov 		ovp->v_vnlock = &ovp->v_lock;
13949b971133SKirk McKusick 	insmntque(ovp, nvp->v_mount);
13959b971133SKirk McKusick 	vrele(nvp);
13969b971133SKirk McKusick 	vgone(nvp);
13979b971133SKirk McKusick 	return (ovp);
1398df8bae1dSRodney W. Grimes }
1399155f87daSMatthew Dillon 
1400a863c0fbSEivind Eklund /* This is a local helper function that do the same as addaliasu, but for a
1401a863c0fbSEivind Eklund  * dev_t instead of an udev_t. */
1402bba25953SEivind Eklund static void
1403dbafb366SPoul-Henning Kamp addalias(nvp, dev)
1404dbafb366SPoul-Henning Kamp 	struct vnode *nvp;
1405dbafb366SPoul-Henning Kamp 	dev_t dev;
1406dbafb366SPoul-Henning Kamp {
1407a863c0fbSEivind Eklund 
1408453aaa0dSEivind Eklund 	KASSERT(nvp->v_type == VBLK || nvp->v_type == VCHR,
1409453aaa0dSEivind Eklund 	    ("addalias on non-special vnode"));
1410dbafb366SPoul-Henning Kamp 	nvp->v_rdev = dev;
1411dbafb366SPoul-Henning Kamp 	simple_lock(&spechash_slock);
1412dbafb366SPoul-Henning Kamp 	SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
1413996c772fSJohn Dyson 	simple_unlock(&spechash_slock);
1414df8bae1dSRodney W. Grimes }
1415df8bae1dSRodney W. Grimes 
1416df8bae1dSRodney W. Grimes /*
1417df8bae1dSRodney W. Grimes  * Grab a particular vnode from the free list, increment its
1418e7647e6cSKris Kennaway  * reference count and lock it. The vnode lock bit is set if the
1419df8bae1dSRodney W. Grimes  * vnode is being eliminated in vgone. The process is awakened
1420df8bae1dSRodney W. Grimes  * when the transition is completed, and an error returned to
1421df8bae1dSRodney W. Grimes  * indicate that the vnode is no longer usable (possibly having
1422df8bae1dSRodney W. Grimes  * been changed to a new file system type).
1423df8bae1dSRodney W. Grimes  */
142426f9a767SRodney W. Grimes int
1425996c772fSJohn Dyson vget(vp, flags, p)
1426df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1427996c772fSJohn Dyson 	int flags;
1428996c772fSJohn Dyson 	struct proc *p;
1429df8bae1dSRodney W. Grimes {
1430996c772fSJohn Dyson 	int error;
1431df8bae1dSRodney W. Grimes 
1432df8bae1dSRodney W. Grimes 	/*
1433996c772fSJohn Dyson 	 * If the vnode is in the process of being cleaned out for
1434996c772fSJohn Dyson 	 * another use, we wait for the cleaning to finish and then
1435996c772fSJohn Dyson 	 * return failure. Cleaning is determined by checking that
1436996c772fSJohn Dyson 	 * the VXLOCK flag is set.
1437df8bae1dSRodney W. Grimes 	 */
1438453aaa0dSEivind Eklund 	if ((flags & LK_INTERLOCK) == 0)
1439a18b1f1dSJason Evans 		mtx_enter(&vp->v_interlock, MTX_DEF);
1440996c772fSJohn Dyson 	if (vp->v_flag & VXLOCK) {
1441df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1442a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
1443996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vget", 0);
1444996c772fSJohn Dyson 		return (ENOENT);
1445df8bae1dSRodney W. Grimes 	}
14462be70f79SJohn Dyson 
1447df8bae1dSRodney W. Grimes 	vp->v_usecount++;
14482be70f79SJohn Dyson 
1449a051452aSPoul-Henning Kamp 	if (VSHOULDBUSY(vp))
1450a051452aSPoul-Henning Kamp 		vbusy(vp);
1451996c772fSJohn Dyson 	if (flags & LK_TYPE_MASK) {
145264d3c7e3SJohn Dyson 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
145364d3c7e3SJohn Dyson 			/*
145464d3c7e3SJohn Dyson 			 * must expand vrele here because we do not want
145564d3c7e3SJohn Dyson 			 * to call VOP_INACTIVE if the reference count
145664d3c7e3SJohn Dyson 			 * drops back to zero since it was never really
145764d3c7e3SJohn Dyson 			 * active. We must remove it from the free list
145864d3c7e3SJohn Dyson 			 * before sleeping so that multiple processes do
145964d3c7e3SJohn Dyson 			 * not try to recycle it.
146064d3c7e3SJohn Dyson 			 */
1461a18b1f1dSJason Evans 			mtx_enter(&vp->v_interlock, MTX_DEF);
146264d3c7e3SJohn Dyson 			vp->v_usecount--;
146364d3c7e3SJohn Dyson 			if (VSHOULDFREE(vp))
146464d3c7e3SJohn Dyson 				vfree(vp);
1465a18b1f1dSJason Evans 			mtx_exit(&vp->v_interlock, MTX_DEF);
146664d3c7e3SJohn Dyson 		}
1467996c772fSJohn Dyson 		return (error);
1468996c772fSJohn Dyson 	}
1469a18b1f1dSJason Evans 	mtx_exit(&vp->v_interlock, MTX_DEF);
1470df8bae1dSRodney W. Grimes 	return (0);
1471df8bae1dSRodney W. Grimes }
1472df8bae1dSRodney W. Grimes 
1473453aaa0dSEivind Eklund /*
1474a863c0fbSEivind Eklund  * Increase the reference count of a vnode.
1475453aaa0dSEivind Eklund  */
1476483140eaSJohn Dyson void
1477483140eaSJohn Dyson vref(struct vnode *vp)
1478483140eaSJohn Dyson {
1479a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
1480483140eaSJohn Dyson 	vp->v_usecount++;
1481a18b1f1dSJason Evans 	mtx_exit(&vp->v_interlock, MTX_DEF);
1482483140eaSJohn Dyson }
1483483140eaSJohn Dyson 
1484df8bae1dSRodney W. Grimes /*
14850d955f71SJohn Dyson  * Vnode put/release.
1486df8bae1dSRodney W. Grimes  * If count drops to zero, call inactive routine and return to freelist.
1487df8bae1dSRodney W. Grimes  */
14882be70f79SJohn Dyson void
14892be70f79SJohn Dyson vrele(vp)
1490996c772fSJohn Dyson 	struct vnode *vp;
1491df8bae1dSRodney W. Grimes {
1492996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1493df8bae1dSRodney W. Grimes 
1494219cbf59SEivind Eklund 	KASSERT(vp != NULL, ("vrele: null vp"));
14955526d2d9SEivind Eklund 
1496a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
14976476c0d2SJohn Dyson 
1498f8be809eSBoris Popov 	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
1499f8be809eSBoris Popov 
1500a051452aSPoul-Henning Kamp 	if (vp->v_usecount > 1) {
15012be70f79SJohn Dyson 
1502a051452aSPoul-Henning Kamp 		vp->v_usecount--;
1503a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
15046476c0d2SJohn Dyson 
150595e5e988SJohn Dyson 		return;
150695e5e988SJohn Dyson 	}
150795e5e988SJohn Dyson 
150895e5e988SJohn Dyson 	if (vp->v_usecount == 1) {
1509a051452aSPoul-Henning Kamp 
15107cb22688SPoul-Henning Kamp 		vp->v_usecount--;
1511fd9d9ff1SPoul-Henning Kamp 		if (VSHOULDFREE(vp))
1512fd9d9ff1SPoul-Henning Kamp 			vfree(vp);
15130d955f71SJohn Dyson 	/*
15140d955f71SJohn Dyson 	 * If we are doing a vput, the node is already locked, and we must
15150d955f71SJohn Dyson 	 * call VOP_INACTIVE with the node locked.  So, in the case of
15160d955f71SJohn Dyson 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
15170d955f71SJohn Dyson 	 */
15182be70f79SJohn Dyson 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1519996c772fSJohn Dyson 			VOP_INACTIVE(vp, p);
15202be70f79SJohn Dyson 		}
15212be70f79SJohn Dyson 
15222be70f79SJohn Dyson 	} else {
15232be70f79SJohn Dyson #ifdef DIAGNOSTIC
15242be70f79SJohn Dyson 		vprint("vrele: negative ref count", vp);
1525a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
15262be70f79SJohn Dyson #endif
15272be70f79SJohn Dyson 		panic("vrele: negative ref cnt");
1528fd7f690fSJohn Dyson 	}
1529df8bae1dSRodney W. Grimes }
1530df8bae1dSRodney W. Grimes 
1531453aaa0dSEivind Eklund /*
1532a863c0fbSEivind Eklund  * Release an already locked vnode.  This give the same effects as
1533a863c0fbSEivind Eklund  * unlock+vrele(), but takes less time and avoids releasing and
1534a863c0fbSEivind Eklund  * re-aquiring the lock (as vrele() aquires the lock internally.)
1535453aaa0dSEivind Eklund  */
15360d955f71SJohn Dyson void
15370d955f71SJohn Dyson vput(vp)
15380d955f71SJohn Dyson 	struct vnode *vp;
15390d955f71SJohn Dyson {
15402be70f79SJohn Dyson 	struct proc *p = curproc;	/* XXX */
15410d955f71SJohn Dyson 
15425526d2d9SEivind Eklund 	KASSERT(vp != NULL, ("vput: null vp"));
1543a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
1544f8be809eSBoris Popov 	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
1545f8be809eSBoris Popov 
15462be70f79SJohn Dyson 	if (vp->v_usecount > 1) {
15472be70f79SJohn Dyson 
15482be70f79SJohn Dyson 		vp->v_usecount--;
15492be70f79SJohn Dyson 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
155095e5e988SJohn Dyson 		return;
15512be70f79SJohn Dyson 
155295e5e988SJohn Dyson 	}
155395e5e988SJohn Dyson 
155495e5e988SJohn Dyson 	if (vp->v_usecount == 1) {
15552be70f79SJohn Dyson 
15562be70f79SJohn Dyson 		vp->v_usecount--;
15572be70f79SJohn Dyson 		if (VSHOULDFREE(vp))
15582be70f79SJohn Dyson 			vfree(vp);
15592be70f79SJohn Dyson 	/*
15602be70f79SJohn Dyson 	 * If we are doing a vput, the node is already locked, and we must
15612be70f79SJohn Dyson 	 * call VOP_INACTIVE with the node locked.  So, in the case of
15622be70f79SJohn Dyson 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
15632be70f79SJohn Dyson 	 */
1564a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
15652be70f79SJohn Dyson 		VOP_INACTIVE(vp, p);
15662be70f79SJohn Dyson 
15672be70f79SJohn Dyson 	} else {
15682be70f79SJohn Dyson #ifdef DIAGNOSTIC
15692be70f79SJohn Dyson 		vprint("vput: negative ref count", vp);
15702be70f79SJohn Dyson #endif
15712be70f79SJohn Dyson 		panic("vput: negative ref cnt");
15722be70f79SJohn Dyson 	}
15730d955f71SJohn Dyson }
15740d955f71SJohn Dyson 
1575df8bae1dSRodney W. Grimes /*
1576a051452aSPoul-Henning Kamp  * Somebody doesn't want the vnode recycled.
1577df8bae1dSRodney W. Grimes  */
157826f9a767SRodney W. Grimes void
157926f9a767SRodney W. Grimes vhold(vp)
1580df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1581df8bae1dSRodney W. Grimes {
15828293f20aSTor Egge 	int s;
1583df8bae1dSRodney W. Grimes 
15848293f20aSTor Egge   	s = splbio();
1585df8bae1dSRodney W. Grimes 	vp->v_holdcnt++;
1586a051452aSPoul-Henning Kamp 	if (VSHOULDBUSY(vp))
1587a051452aSPoul-Henning Kamp 		vbusy(vp);
15888293f20aSTor Egge 	splx(s);
1589df8bae1dSRodney W. Grimes }
1590df8bae1dSRodney W. Grimes 
1591df8bae1dSRodney W. Grimes /*
1592a863c0fbSEivind Eklund  * Note that there is one less who cares about this vnode.  vdrop() is the
1593a863c0fbSEivind Eklund  * opposite of vhold().
1594df8bae1dSRodney W. Grimes  */
159526f9a767SRodney W. Grimes void
1596a051452aSPoul-Henning Kamp vdrop(vp)
1597df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1598df8bae1dSRodney W. Grimes {
15998293f20aSTor Egge 	int s;
1600df8bae1dSRodney W. Grimes 
16018293f20aSTor Egge 	s = splbio();
1602df8bae1dSRodney W. Grimes 	if (vp->v_holdcnt <= 0)
1603b1897c19SJulian Elischer 		panic("vdrop: holdcnt");
1604df8bae1dSRodney W. Grimes 	vp->v_holdcnt--;
1605a051452aSPoul-Henning Kamp 	if (VSHOULDFREE(vp))
1606a051452aSPoul-Henning Kamp 		vfree(vp);
16078293f20aSTor Egge 	splx(s);
1608df8bae1dSRodney W. Grimes }
1609df8bae1dSRodney W. Grimes 
1610df8bae1dSRodney W. Grimes /*
1611df8bae1dSRodney W. Grimes  * Remove any vnodes in the vnode table belonging to mount point mp.
1612df8bae1dSRodney W. Grimes  *
1613df8bae1dSRodney W. Grimes  * If MNT_NOFORCE is specified, there should not be any active ones,
1614df8bae1dSRodney W. Grimes  * return error if any are found (nb: this is a user error, not a
1615df8bae1dSRodney W. Grimes  * system error). If MNT_FORCE is specified, detach any active vnodes
1616df8bae1dSRodney W. Grimes  * that are found.
1617df8bae1dSRodney W. Grimes  */
1618df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
161927a0b398SPoul-Henning Kamp static int busyprt = 0;		/* print out busy vnodes */
16200f1adf65SBruce Evans SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1621df8bae1dSRodney W. Grimes #endif
1622df8bae1dSRodney W. Grimes 
162326f9a767SRodney W. Grimes int
1624df8bae1dSRodney W. Grimes vflush(mp, skipvp, flags)
1625df8bae1dSRodney W. Grimes 	struct mount *mp;
1626df8bae1dSRodney W. Grimes 	struct vnode *skipvp;
1627df8bae1dSRodney W. Grimes 	int flags;
1628df8bae1dSRodney W. Grimes {
1629996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1630996c772fSJohn Dyson 	struct vnode *vp, *nvp;
1631df8bae1dSRodney W. Grimes 	int busy = 0;
1632df8bae1dSRodney W. Grimes 
1633996c772fSJohn Dyson 	simple_lock(&mntvnode_slock);
1634df8bae1dSRodney W. Grimes loop:
16351b727751SPoul-Henning Kamp 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
16363d2a8cf3SDavid Greenman 		/*
16373d2a8cf3SDavid Greenman 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
16383d2a8cf3SDavid Greenman 		 * Start over if it has (it won't be on the list anymore).
16393d2a8cf3SDavid Greenman 		 */
1640df8bae1dSRodney W. Grimes 		if (vp->v_mount != mp)
1641df8bae1dSRodney W. Grimes 			goto loop;
16421b727751SPoul-Henning Kamp 		nvp = LIST_NEXT(vp, v_mntvnodes);
1643df8bae1dSRodney W. Grimes 		/*
1644df8bae1dSRodney W. Grimes 		 * Skip over a selected vnode.
1645df8bae1dSRodney W. Grimes 		 */
1646df8bae1dSRodney W. Grimes 		if (vp == skipvp)
1647df8bae1dSRodney W. Grimes 			continue;
1648996c772fSJohn Dyson 
1649a18b1f1dSJason Evans 		mtx_enter(&vp->v_interlock, MTX_DEF);
1650df8bae1dSRodney W. Grimes 		/*
1651df8bae1dSRodney W. Grimes 		 * Skip over a vnodes marked VSYSTEM.
1652df8bae1dSRodney W. Grimes 		 */
1653996c772fSJohn Dyson 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1654a18b1f1dSJason Evans 			mtx_exit(&vp->v_interlock, MTX_DEF);
1655df8bae1dSRodney W. Grimes 			continue;
1656996c772fSJohn Dyson 		}
1657df8bae1dSRodney W. Grimes 		/*
16580d94caffSDavid Greenman 		 * If WRITECLOSE is set, only flush out regular file vnodes
16590d94caffSDavid Greenman 		 * open for writing.
1660df8bae1dSRodney W. Grimes 		 */
1661df8bae1dSRodney W. Grimes 		if ((flags & WRITECLOSE) &&
1662996c772fSJohn Dyson 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1663a18b1f1dSJason Evans 			mtx_exit(&vp->v_interlock, MTX_DEF);
1664df8bae1dSRodney W. Grimes 			continue;
1665996c772fSJohn Dyson 		}
16666476c0d2SJohn Dyson 
1667df8bae1dSRodney W. Grimes 		/*
16680d94caffSDavid Greenman 		 * With v_usecount == 0, all we need to do is clear out the
16690d94caffSDavid Greenman 		 * vnode data structures and we are done.
1670df8bae1dSRodney W. Grimes 		 */
1671df8bae1dSRodney W. Grimes 		if (vp->v_usecount == 0) {
1672996c772fSJohn Dyson 			simple_unlock(&mntvnode_slock);
1673996c772fSJohn Dyson 			vgonel(vp, p);
1674996c772fSJohn Dyson 			simple_lock(&mntvnode_slock);
1675df8bae1dSRodney W. Grimes 			continue;
1676df8bae1dSRodney W. Grimes 		}
1677ad980522SJohn Dyson 
1678df8bae1dSRodney W. Grimes 		/*
16790d94caffSDavid Greenman 		 * If FORCECLOSE is set, forcibly close the vnode. For block
16800d94caffSDavid Greenman 		 * or character devices, revert to an anonymous device. For
16810d94caffSDavid Greenman 		 * all other files, just kill them.
1682df8bae1dSRodney W. Grimes 		 */
1683df8bae1dSRodney W. Grimes 		if (flags & FORCECLOSE) {
1684996c772fSJohn Dyson 			simple_unlock(&mntvnode_slock);
1685df8bae1dSRodney W. Grimes 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1686996c772fSJohn Dyson 				vgonel(vp, p);
1687df8bae1dSRodney W. Grimes 			} else {
1688996c772fSJohn Dyson 				vclean(vp, 0, p);
1689df8bae1dSRodney W. Grimes 				vp->v_op = spec_vnodeop_p;
1690df8bae1dSRodney W. Grimes 				insmntque(vp, (struct mount *) 0);
1691df8bae1dSRodney W. Grimes 			}
1692996c772fSJohn Dyson 			simple_lock(&mntvnode_slock);
1693df8bae1dSRodney W. Grimes 			continue;
1694df8bae1dSRodney W. Grimes 		}
1695df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC
1696df8bae1dSRodney W. Grimes 		if (busyprt)
1697df8bae1dSRodney W. Grimes 			vprint("vflush: busy vnode", vp);
1698df8bae1dSRodney W. Grimes #endif
1699a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
1700df8bae1dSRodney W. Grimes 		busy++;
1701df8bae1dSRodney W. Grimes 	}
1702996c772fSJohn Dyson 	simple_unlock(&mntvnode_slock);
1703df8bae1dSRodney W. Grimes 	if (busy)
1704df8bae1dSRodney W. Grimes 		return (EBUSY);
1705df8bae1dSRodney W. Grimes 	return (0);
1706df8bae1dSRodney W. Grimes }
1707df8bae1dSRodney W. Grimes 
1708df8bae1dSRodney W. Grimes /*
1709df8bae1dSRodney W. Grimes  * Disassociate the underlying file system from a vnode.
1710df8bae1dSRodney W. Grimes  */
1711996c772fSJohn Dyson static void
1712514ede09SBruce Evans vclean(vp, flags, p)
1713514ede09SBruce Evans 	struct vnode *vp;
1714514ede09SBruce Evans 	int flags;
1715514ede09SBruce Evans 	struct proc *p;
1716df8bae1dSRodney W. Grimes {
171795e5e988SJohn Dyson 	int active;
1718df8bae1dSRodney W. Grimes 
1719df8bae1dSRodney W. Grimes 	/*
17200d94caffSDavid Greenman 	 * Check to see if the vnode is in use. If so we have to reference it
17210d94caffSDavid Greenman 	 * before we clean it out so that its count cannot fall to zero and
17220d94caffSDavid Greenman 	 * generate a race against ourselves to recycle it.
1723df8bae1dSRodney W. Grimes 	 */
1724bb56ec4aSPoul-Henning Kamp 	if ((active = vp->v_usecount))
1725996c772fSJohn Dyson 		vp->v_usecount++;
172695e5e988SJohn Dyson 
1727df8bae1dSRodney W. Grimes 	/*
17280d94caffSDavid Greenman 	 * Prevent the vnode from being recycled or brought into use while we
17290d94caffSDavid Greenman 	 * clean it out.
1730df8bae1dSRodney W. Grimes 	 */
1731df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK)
1732df8bae1dSRodney W. Grimes 		panic("vclean: deadlock");
1733df8bae1dSRodney W. Grimes 	vp->v_flag |= VXLOCK;
1734df8bae1dSRodney W. Grimes 	/*
1735996c772fSJohn Dyson 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1736996c772fSJohn Dyson 	 * have the object locked while it cleans it out. The VOP_LOCK
1737996c772fSJohn Dyson 	 * ensures that the VOP_INACTIVE routine is done with its work.
1738996c772fSJohn Dyson 	 * For active vnodes, it ensures that no other activity can
1739996c772fSJohn Dyson 	 * occur while the underlying object is being cleaned out.
1740996c772fSJohn Dyson 	 */
1741996c772fSJohn Dyson 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
17423c631446SJohn Dyson 
1743996c772fSJohn Dyson 	/*
1744df8bae1dSRodney W. Grimes 	 * Clean out any buffers associated with the vnode.
174537642196SKirk McKusick 	 * If the flush fails, just toss the buffers.
1746df8bae1dSRodney W. Grimes 	 */
174737642196SKirk McKusick 	if (flags & DOCLOSE) {
1748f2a2857bSKirk McKusick 		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
17499b971133SKirk McKusick 			(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
175037642196SKirk McKusick 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
175137642196SKirk McKusick 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
175237642196SKirk McKusick 	}
175337642196SKirk McKusick 
17549ff5ce6bSBoris Popov 	VOP_DESTROYVOBJECT(vp);
17553c631446SJohn Dyson 
1756df8bae1dSRodney W. Grimes 	/*
1757996c772fSJohn Dyson 	 * If purging an active vnode, it must be closed and
1758996c772fSJohn Dyson 	 * deactivated before being reclaimed. Note that the
1759996c772fSJohn Dyson 	 * VOP_INACTIVE will unlock the vnode.
1760df8bae1dSRodney W. Grimes 	 */
1761df8bae1dSRodney W. Grimes 	if (active) {
1762df8bae1dSRodney W. Grimes 		if (flags & DOCLOSE)
17634d948813SBruce Evans 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1764996c772fSJohn Dyson 		VOP_INACTIVE(vp, p);
1765996c772fSJohn Dyson 	} else {
1766996c772fSJohn Dyson 		/*
1767996c772fSJohn Dyson 		 * Any other processes trying to obtain this lock must first
1768996c772fSJohn Dyson 		 * wait for VXLOCK to clear, then call the new lock operation.
1769996c772fSJohn Dyson 		 */
1770996c772fSJohn Dyson 		VOP_UNLOCK(vp, 0, p);
1771df8bae1dSRodney W. Grimes 	}
1772df8bae1dSRodney W. Grimes 	/*
1773df8bae1dSRodney W. Grimes 	 * Reclaim the vnode.
1774df8bae1dSRodney W. Grimes 	 */
1775996c772fSJohn Dyson 	if (VOP_RECLAIM(vp, p))
1776df8bae1dSRodney W. Grimes 		panic("vclean: cannot reclaim");
177764d3c7e3SJohn Dyson 
17789a2b8fcaSRobert Watson 	if (active) {
17799a2b8fcaSRobert Watson 		/*
17809a2b8fcaSRobert Watson 		 * Inline copy of vrele() since VOP_INACTIVE
17819a2b8fcaSRobert Watson 		 * has already been called.
17829a2b8fcaSRobert Watson 		 */
1783a18b1f1dSJason Evans 		mtx_enter(&vp->v_interlock, MTX_DEF);
17849a2b8fcaSRobert Watson 		if (--vp->v_usecount <= 0) {
17859a2b8fcaSRobert Watson #ifdef DIAGNOSTIC
17869a2b8fcaSRobert Watson 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
17879a2b8fcaSRobert Watson 				vprint("vclean: bad ref count", vp);
17889a2b8fcaSRobert Watson 				panic("vclean: ref cnt");
17899a2b8fcaSRobert Watson 			}
17909a2b8fcaSRobert Watson #endif
17919a2b8fcaSRobert Watson 			vfree(vp);
17929a2b8fcaSRobert Watson 		}
1793a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
17949a2b8fcaSRobert Watson 	}
179564d3c7e3SJohn Dyson 
1796996c772fSJohn Dyson 	cache_purge(vp);
1797a18b1f1dSJason Evans 	if (vp->v_vnlock) {
1798a18b1f1dSJason Evans 		lockdestroy(vp->v_vnlock);
1799996c772fSJohn Dyson 		vp->v_vnlock = NULL;
1800a18b1f1dSJason Evans 	}
1801a18b1f1dSJason Evans 	lockdestroy(&vp->v_lock);
1802df8bae1dSRodney W. Grimes 
180364d3c7e3SJohn Dyson 	if (VSHOULDFREE(vp))
180464d3c7e3SJohn Dyson 		vfree(vp);
180564d3c7e3SJohn Dyson 
1806df8bae1dSRodney W. Grimes 	/*
1807df8bae1dSRodney W. Grimes 	 * Done with purge, notify sleepers of the grim news.
1808df8bae1dSRodney W. Grimes 	 */
1809df8bae1dSRodney W. Grimes 	vp->v_op = dead_vnodeop_p;
18101cbbd625SGarrett Wollman 	vn_pollgone(vp);
1811df8bae1dSRodney W. Grimes 	vp->v_tag = VT_NON;
1812df8bae1dSRodney W. Grimes 	vp->v_flag &= ~VXLOCK;
1813df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXWANT) {
1814df8bae1dSRodney W. Grimes 		vp->v_flag &= ~VXWANT;
1815df8bae1dSRodney W. Grimes 		wakeup((caddr_t) vp);
1816df8bae1dSRodney W. Grimes 	}
1817df8bae1dSRodney W. Grimes }
1818df8bae1dSRodney W. Grimes 
1819df8bae1dSRodney W. Grimes /*
1820df8bae1dSRodney W. Grimes  * Eliminate all activity associated with the requested vnode
1821df8bae1dSRodney W. Grimes  * and with all vnodes aliased to the requested vnode.
1822df8bae1dSRodney W. Grimes  */
1823996c772fSJohn Dyson int
1824996c772fSJohn Dyson vop_revoke(ap)
1825996c772fSJohn Dyson 	struct vop_revoke_args /* {
1826996c772fSJohn Dyson 		struct vnode *a_vp;
1827996c772fSJohn Dyson 		int a_flags;
1828996c772fSJohn Dyson 	} */ *ap;
1829df8bae1dSRodney W. Grimes {
1830996c772fSJohn Dyson 	struct vnode *vp, *vq;
1831dbafb366SPoul-Henning Kamp 	dev_t dev;
1832996c772fSJohn Dyson 
18335526d2d9SEivind Eklund 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
1834996c772fSJohn Dyson 
1835996c772fSJohn Dyson 	vp = ap->a_vp;
1836df8bae1dSRodney W. Grimes 	/*
1837996c772fSJohn Dyson 	 * If a vgone (or vclean) is already in progress,
1838996c772fSJohn Dyson 	 * wait until it is done and return.
1839df8bae1dSRodney W. Grimes 	 */
1840df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK) {
1841df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1842a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
1843996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1844996c772fSJohn Dyson 		return (0);
1845df8bae1dSRodney W. Grimes 	}
1846dbafb366SPoul-Henning Kamp 	dev = vp->v_rdev;
1847dbafb366SPoul-Henning Kamp 	for (;;) {
1848996c772fSJohn Dyson 		simple_lock(&spechash_slock);
1849dbafb366SPoul-Henning Kamp 		vq = SLIST_FIRST(&dev->si_hlist);
1850996c772fSJohn Dyson 		simple_unlock(&spechash_slock);
1851dbafb366SPoul-Henning Kamp 		if (!vq)
1852df8bae1dSRodney W. Grimes 			break;
1853dbafb366SPoul-Henning Kamp 		vgone(vq);
1854df8bae1dSRodney W. Grimes 	}
1855996c772fSJohn Dyson 	return (0);
1856996c772fSJohn Dyson }
1857996c772fSJohn Dyson 
1858996c772fSJohn Dyson /*
1859996c772fSJohn Dyson  * Recycle an unused vnode to the front of the free list.
1860996c772fSJohn Dyson  * Release the passed interlock if the vnode will be recycled.
1861996c772fSJohn Dyson  */
1862996c772fSJohn Dyson int
1863996c772fSJohn Dyson vrecycle(vp, inter_lkp, p)
1864996c772fSJohn Dyson 	struct vnode *vp;
1865996c772fSJohn Dyson 	struct simplelock *inter_lkp;
1866996c772fSJohn Dyson 	struct proc *p;
1867996c772fSJohn Dyson {
1868996c772fSJohn Dyson 
1869a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
1870996c772fSJohn Dyson 	if (vp->v_usecount == 0) {
1871996c772fSJohn Dyson 		if (inter_lkp) {
1872996c772fSJohn Dyson 			simple_unlock(inter_lkp);
1873996c772fSJohn Dyson 		}
1874996c772fSJohn Dyson 		vgonel(vp, p);
1875996c772fSJohn Dyson 		return (1);
1876996c772fSJohn Dyson 	}
1877a18b1f1dSJason Evans 	mtx_exit(&vp->v_interlock, MTX_DEF);
1878996c772fSJohn Dyson 	return (0);
1879df8bae1dSRodney W. Grimes }
1880df8bae1dSRodney W. Grimes 
1881df8bae1dSRodney W. Grimes /*
1882df8bae1dSRodney W. Grimes  * Eliminate all activity associated with a vnode
1883df8bae1dSRodney W. Grimes  * in preparation for reuse.
1884df8bae1dSRodney W. Grimes  */
188526f9a767SRodney W. Grimes void
188626f9a767SRodney W. Grimes vgone(vp)
1887df8bae1dSRodney W. Grimes 	register struct vnode *vp;
1888df8bae1dSRodney W. Grimes {
1889996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
1890996c772fSJohn Dyson 
1891a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
1892996c772fSJohn Dyson 	vgonel(vp, p);
1893996c772fSJohn Dyson }
1894996c772fSJohn Dyson 
1895996c772fSJohn Dyson /*
1896996c772fSJohn Dyson  * vgone, with the vp interlock held.
1897996c772fSJohn Dyson  */
1898b7a5f3caSRobert Watson void
1899996c772fSJohn Dyson vgonel(vp, p)
1900996c772fSJohn Dyson 	struct vnode *vp;
1901996c772fSJohn Dyson 	struct proc *p;
1902996c772fSJohn Dyson {
1903925a3a41SJohn Dyson 	int s;
1904df8bae1dSRodney W. Grimes 
1905df8bae1dSRodney W. Grimes 	/*
1906996c772fSJohn Dyson 	 * If a vgone (or vclean) is already in progress,
1907996c772fSJohn Dyson 	 * wait until it is done and return.
1908df8bae1dSRodney W. Grimes 	 */
1909df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK) {
1910df8bae1dSRodney W. Grimes 		vp->v_flag |= VXWANT;
1911a18b1f1dSJason Evans 		mtx_exit(&vp->v_interlock, MTX_DEF);
1912996c772fSJohn Dyson 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1913df8bae1dSRodney W. Grimes 		return;
1914df8bae1dSRodney W. Grimes 	}
1915ad980522SJohn Dyson 
1916df8bae1dSRodney W. Grimes 	/*
1917df8bae1dSRodney W. Grimes 	 * Clean out the filesystem specific data.
1918df8bae1dSRodney W. Grimes 	 */
1919996c772fSJohn Dyson 	vclean(vp, DOCLOSE, p);
1920a18b1f1dSJason Evans 	mtx_enter(&vp->v_interlock, MTX_DEF);
19212be70f79SJohn Dyson 
1922df8bae1dSRodney W. Grimes 	/*
1923df8bae1dSRodney W. Grimes 	 * Delete from old mount point vnode list, if on one.
1924df8bae1dSRodney W. Grimes 	 */
1925996c772fSJohn Dyson 	if (vp->v_mount != NULL)
1926996c772fSJohn Dyson 		insmntque(vp, (struct mount *)0);
1927df8bae1dSRodney W. Grimes 	/*
1928996c772fSJohn Dyson 	 * If special device, remove it from special device alias list
1929996c772fSJohn Dyson 	 * if it is on one.
1930df8bae1dSRodney W. Grimes 	 */
1931dbafb366SPoul-Henning Kamp 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) {
1932996c772fSJohn Dyson 		simple_lock(&spechash_slock);
1933b0d17ba6SPoul-Henning Kamp 		SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext);
1934d137acccSPoul-Henning Kamp 		freedev(vp->v_rdev);
1935996c772fSJohn Dyson 		simple_unlock(&spechash_slock);
19364d4f9323SPoul-Henning Kamp 		vp->v_rdev = NULL;
1937df8bae1dSRodney W. Grimes 	}
1938996c772fSJohn Dyson 
1939df8bae1dSRodney W. Grimes 	/*
1940996c772fSJohn Dyson 	 * If it is on the freelist and not already at the head,
1941c904bbbdSKirk McKusick 	 * move it to the head of the list. The test of the
1942c904bbbdSKirk McKusick 	 * VDOOMED flag and the reference count of zero is because
1943996c772fSJohn Dyson 	 * it will be removed from the free list by getnewvnode,
1944996c772fSJohn Dyson 	 * but will not have its reference count incremented until
1945996c772fSJohn Dyson 	 * after calling vgone. If the reference count were
1946996c772fSJohn Dyson 	 * incremented first, vgone would (incorrectly) try to
1947996c772fSJohn Dyson 	 * close the previous instance of the underlying object.
1948df8bae1dSRodney W. Grimes 	 */
1949a051452aSPoul-Henning Kamp 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1950925a3a41SJohn Dyson 		s = splbio();
1951996c772fSJohn Dyson 		simple_lock(&vnode_free_list_slock);
1952c904bbbdSKirk McKusick 		if (vp->v_flag & VFREE)
1953df8bae1dSRodney W. Grimes 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1954c904bbbdSKirk McKusick 		else
1955d09a16d8STor Egge 			freevnodes++;
1956925a3a41SJohn Dyson 		vp->v_flag |= VFREE;
1957df8bae1dSRodney W. Grimes 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1958996c772fSJohn Dyson 		simple_unlock(&vnode_free_list_slock);
1959925a3a41SJohn Dyson 		splx(s);
19600082fb46SJordan K. Hubbard 	}
1961996c772fSJohn Dyson 
1962df8bae1dSRodney W. Grimes 	vp->v_type = VBAD;
1963a18b1f1dSJason Evans 	mtx_exit(&vp->v_interlock, MTX_DEF);
1964df8bae1dSRodney W. Grimes }
1965df8bae1dSRodney W. Grimes 
1966df8bae1dSRodney W. Grimes /*
1967df8bae1dSRodney W. Grimes  * Lookup a vnode by device number.
1968df8bae1dSRodney W. Grimes  */
196926f9a767SRodney W. Grimes int
1970df8bae1dSRodney W. Grimes vfinddev(dev, type, vpp)
1971df8bae1dSRodney W. Grimes 	dev_t dev;
1972df8bae1dSRodney W. Grimes 	enum vtype type;
1973df8bae1dSRodney W. Grimes 	struct vnode **vpp;
1974df8bae1dSRodney W. Grimes {
1975dbafb366SPoul-Henning Kamp 	struct vnode *vp;
1976df8bae1dSRodney W. Grimes 
1977b98afd0dSBruce Evans 	simple_lock(&spechash_slock);
1978dbafb366SPoul-Henning Kamp 	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
1979dbafb366SPoul-Henning Kamp 		if (type == vp->v_type) {
1980df8bae1dSRodney W. Grimes 			*vpp = vp;
1981dbafb366SPoul-Henning Kamp 			simple_unlock(&spechash_slock);
1982dbafb366SPoul-Henning Kamp 			return (1);
1983dbafb366SPoul-Henning Kamp 		}
1984df8bae1dSRodney W. Grimes 	}
1985b98afd0dSBruce Evans 	simple_unlock(&spechash_slock);
1986dbafb366SPoul-Henning Kamp 	return (0);
1987df8bae1dSRodney W. Grimes }
1988df8bae1dSRodney W. Grimes 
1989df8bae1dSRodney W. Grimes /*
1990df8bae1dSRodney W. Grimes  * Calculate the total number of references to a special device.
1991df8bae1dSRodney W. Grimes  */
199226f9a767SRodney W. Grimes int
1993df8bae1dSRodney W. Grimes vcount(vp)
1994dbafb366SPoul-Henning Kamp 	struct vnode *vp;
1995df8bae1dSRodney W. Grimes {
199696267288SPoul-Henning Kamp 	struct vnode *vq;
1997df8bae1dSRodney W. Grimes 	int count;
1998df8bae1dSRodney W. Grimes 
1999dbafb366SPoul-Henning Kamp 	count = 0;
2000b98afd0dSBruce Evans 	simple_lock(&spechash_slock);
2001b0d17ba6SPoul-Henning Kamp 	SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext)
2002df8bae1dSRodney W. Grimes 		count += vq->v_usecount;
2003b98afd0dSBruce Evans 	simple_unlock(&spechash_slock);
2004df8bae1dSRodney W. Grimes 	return (count);
2005df8bae1dSRodney W. Grimes }
2006dbafb366SPoul-Henning Kamp 
20077fab7799SPeter Wemm /*
2008e8359a57SSøren Schmidt  * Same as above, but using the dev_t as argument
2009e8359a57SSøren Schmidt  */
2010e8359a57SSøren Schmidt int
2011e8359a57SSøren Schmidt count_dev(dev)
2012e8359a57SSøren Schmidt 	dev_t dev;
2013e8359a57SSøren Schmidt {
2014e8359a57SSøren Schmidt 	struct vnode *vp;
2015e8359a57SSøren Schmidt 
2016e8359a57SSøren Schmidt 	vp = SLIST_FIRST(&dev->si_hlist);
2017e8359a57SSøren Schmidt 	if (vp == NULL)
2018e8359a57SSøren Schmidt 		return (0);
2019e8359a57SSøren Schmidt 	return(vcount(vp));
2020e8359a57SSøren Schmidt }
2021e8359a57SSøren Schmidt 
2022e8359a57SSøren Schmidt /*
2023df8bae1dSRodney W. Grimes  * Print out a description of a vnode.
2024df8bae1dSRodney W. Grimes  */
2025df8bae1dSRodney W. Grimes static char *typename[] =
2026df8bae1dSRodney W. Grimes {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
2027df8bae1dSRodney W. Grimes 
202826f9a767SRodney W. Grimes void
2029df8bae1dSRodney W. Grimes vprint(label, vp)
2030df8bae1dSRodney W. Grimes 	char *label;
2031dbafb366SPoul-Henning Kamp 	struct vnode *vp;
2032df8bae1dSRodney W. Grimes {
20332127f260SArchie Cobbs 	char buf[96];
2034df8bae1dSRodney W. Grimes 
2035df8bae1dSRodney W. Grimes 	if (label != NULL)
2036ac1e407bSBruce Evans 		printf("%s: %p: ", label, (void *)vp);
2037de15ef6aSDoug Rabson 	else
2038ac1e407bSBruce Evans 		printf("%p: ", (void *)vp);
2039ac1e407bSBruce Evans 	printf("type %s, usecount %d, writecount %d, refcount %d,",
2040df8bae1dSRodney W. Grimes 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
2041df8bae1dSRodney W. Grimes 	    vp->v_holdcnt);
2042df8bae1dSRodney W. Grimes 	buf[0] = '\0';
2043df8bae1dSRodney W. Grimes 	if (vp->v_flag & VROOT)
2044df8bae1dSRodney W. Grimes 		strcat(buf, "|VROOT");
2045df8bae1dSRodney W. Grimes 	if (vp->v_flag & VTEXT)
2046df8bae1dSRodney W. Grimes 		strcat(buf, "|VTEXT");
2047df8bae1dSRodney W. Grimes 	if (vp->v_flag & VSYSTEM)
2048df8bae1dSRodney W. Grimes 		strcat(buf, "|VSYSTEM");
2049df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXLOCK)
2050df8bae1dSRodney W. Grimes 		strcat(buf, "|VXLOCK");
2051df8bae1dSRodney W. Grimes 	if (vp->v_flag & VXWANT)
2052df8bae1dSRodney W. Grimes 		strcat(buf, "|VXWANT");
2053df8bae1dSRodney W. Grimes 	if (vp->v_flag & VBWAIT)
2054df8bae1dSRodney W. Grimes 		strcat(buf, "|VBWAIT");
2055a051452aSPoul-Henning Kamp 	if (vp->v_flag & VDOOMED)
2056a051452aSPoul-Henning Kamp 		strcat(buf, "|VDOOMED");
2057a051452aSPoul-Henning Kamp 	if (vp->v_flag & VFREE)
2058a051452aSPoul-Henning Kamp 		strcat(buf, "|VFREE");
205995e5e988SJohn Dyson 	if (vp->v_flag & VOBJBUF)
206095e5e988SJohn Dyson 		strcat(buf, "|VOBJBUF");
2061df8bae1dSRodney W. Grimes 	if (buf[0] != '\0')
2062df8bae1dSRodney W. Grimes 		printf(" flags (%s)", &buf[1]);
2063df8bae1dSRodney W. Grimes 	if (vp->v_data == NULL) {
2064df8bae1dSRodney W. Grimes 		printf("\n");
2065df8bae1dSRodney W. Grimes 	} else {
2066df8bae1dSRodney W. Grimes 		printf("\n\t");
2067df8bae1dSRodney W. Grimes 		VOP_PRINT(vp);
2068df8bae1dSRodney W. Grimes 	}
2069df8bae1dSRodney W. Grimes }
2070df8bae1dSRodney W. Grimes 
20711a477b0cSDavid Greenman #ifdef DDB
2072f5ef029eSPoul-Henning Kamp #include <ddb/ddb.h>
2073df8bae1dSRodney W. Grimes /*
2074df8bae1dSRodney W. Grimes  * List all of the locked vnodes in the system.
2075df8bae1dSRodney W. Grimes  * Called when debugging the kernel.
2076df8bae1dSRodney W. Grimes  */
2077f5ef029eSPoul-Henning Kamp DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
2078df8bae1dSRodney W. Grimes {
2079c35e283aSBruce Evans 	struct proc *p = curproc;	/* XXX */
2080c35e283aSBruce Evans 	struct mount *mp, *nmp;
2081c35e283aSBruce Evans 	struct vnode *vp;
2082df8bae1dSRodney W. Grimes 
2083df8bae1dSRodney W. Grimes 	printf("Locked vnodes\n");
2084a18b1f1dSJason Evans 	mtx_enter(&mountlist_mtx, MTX_DEF);
20850429e37aSPoul-Henning Kamp 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
2086a18b1f1dSJason Evans 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
20870429e37aSPoul-Henning Kamp 			nmp = TAILQ_NEXT(mp, mnt_list);
2088c35e283aSBruce Evans 			continue;
2089c35e283aSBruce Evans 		}
20901b727751SPoul-Henning Kamp 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
20916bdfe06aSEivind Eklund 			if (VOP_ISLOCKED(vp, NULL))
2092df8bae1dSRodney W. Grimes 				vprint((char *)0, vp);
2093df8bae1dSRodney W. Grimes 		}
2094a18b1f1dSJason Evans 		mtx_enter(&mountlist_mtx, MTX_DEF);
20950429e37aSPoul-Henning Kamp 		nmp = TAILQ_NEXT(mp, mnt_list);
2096c35e283aSBruce Evans 		vfs_unbusy(mp, p);
2097c35e283aSBruce Evans 	}
2098a18b1f1dSJason Evans 	mtx_exit(&mountlist_mtx, MTX_DEF);
2099df8bae1dSRodney W. Grimes }
2100df8bae1dSRodney W. Grimes #endif
2101df8bae1dSRodney W. Grimes 
21023a76a594SBruce Evans /*
21033a76a594SBruce Evans  * Top level filesystem related information gathering.
21043a76a594SBruce Evans  */
210582d9ae4eSPoul-Henning Kamp static int	sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
21063a76a594SBruce Evans 
21074a8b9660SBruce Evans static int
210882d9ae4eSPoul-Henning Kamp vfs_sysctl(SYSCTL_HANDLER_ARGS)
2109a896f025SBruce Evans {
21104a8b9660SBruce Evans 	int *name = (int *)arg1 - 1;	/* XXX */
21114a8b9660SBruce Evans 	u_int namelen = arg2 + 1;	/* XXX */
2112a896f025SBruce Evans 	struct vfsconf *vfsp;
2113a896f025SBruce Evans 
2114f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2)
21153a76a594SBruce Evans 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
21164a8b9660SBruce Evans 	if (namelen == 1)
21173a76a594SBruce Evans 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
2118dc91a89eSBruce Evans #endif
2119a896f025SBruce Evans 
2120a863c0fbSEivind Eklund 	/* XXX the below code does not compile; vfs_sysctl does not exist. */
21214a8b9660SBruce Evans #ifdef notyet
21223a76a594SBruce Evans 	/* all sysctl names at this level are at least name and field */
21233a76a594SBruce Evans 	if (namelen < 2)
21243a76a594SBruce Evans 		return (ENOTDIR);		/* overloaded */
21253a76a594SBruce Evans 	if (name[0] != VFS_GENERIC) {
21263a76a594SBruce Evans 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
21273a76a594SBruce Evans 			if (vfsp->vfc_typenum == name[0])
21283a76a594SBruce Evans 				break;
21293a76a594SBruce Evans 		if (vfsp == NULL)
21303a76a594SBruce Evans 			return (EOPNOTSUPP);
21313a76a594SBruce Evans 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
21323a76a594SBruce Evans 		    oldp, oldlenp, newp, newlen, p));
21333a76a594SBruce Evans 	}
21344a8b9660SBruce Evans #endif
21353a76a594SBruce Evans 	switch (name[1]) {
21363a76a594SBruce Evans 	case VFS_MAXTYPENUM:
21373a76a594SBruce Evans 		if (namelen != 2)
21383a76a594SBruce Evans 			return (ENOTDIR);
21393a76a594SBruce Evans 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
21403a76a594SBruce Evans 	case VFS_CONF:
21413a76a594SBruce Evans 		if (namelen != 3)
21423a76a594SBruce Evans 			return (ENOTDIR);	/* overloaded */
21433a76a594SBruce Evans 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
21443a76a594SBruce Evans 			if (vfsp->vfc_typenum == name[2])
21453a76a594SBruce Evans 				break;
21463a76a594SBruce Evans 		if (vfsp == NULL)
21473a76a594SBruce Evans 			return (EOPNOTSUPP);
21483a76a594SBruce Evans 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
21493a76a594SBruce Evans 	}
21503a76a594SBruce Evans 	return (EOPNOTSUPP);
21513a76a594SBruce Evans }
21523a76a594SBruce Evans 
21534a8b9660SBruce Evans SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
21544a8b9660SBruce Evans 	"Generic filesystem");
21554a8b9660SBruce Evans 
2156f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2)
2157a896f025SBruce Evans 
2158a896f025SBruce Evans static int
215982d9ae4eSPoul-Henning Kamp sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
2160a896f025SBruce Evans {
2161a896f025SBruce Evans 	int error;
2162a896f025SBruce Evans 	struct vfsconf *vfsp;
2163a896f025SBruce Evans 	struct ovfsconf ovfs;
21643a76a594SBruce Evans 
21653a76a594SBruce Evans 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
2166a896f025SBruce Evans 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
2167a896f025SBruce Evans 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
2168a896f025SBruce Evans 		ovfs.vfc_index = vfsp->vfc_typenum;
2169a896f025SBruce Evans 		ovfs.vfc_refcount = vfsp->vfc_refcount;
2170a896f025SBruce Evans 		ovfs.vfc_flags = vfsp->vfc_flags;
2171a896f025SBruce Evans 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
2172a896f025SBruce Evans 		if (error)
2173a896f025SBruce Evans 			return error;
2174a896f025SBruce Evans 	}
2175a896f025SBruce Evans 	return 0;
2176a896f025SBruce Evans }
2177a896f025SBruce Evans 
2178f5ce6752SBruce Evans #endif /* 1 || COMPAT_PRELITE2 */
2179a896f025SBruce Evans 
2180453aaa0dSEivind Eklund #if COMPILING_LINT
2181df8bae1dSRodney W. Grimes #define KINFO_VNODESLOP	10
2182df8bae1dSRodney W. Grimes /*
2183df8bae1dSRodney W. Grimes  * Dump vnode list (via sysctl).
2184df8bae1dSRodney W. Grimes  * Copyout address of vnode followed by vnode.
2185df8bae1dSRodney W. Grimes  */
2186df8bae1dSRodney W. Grimes /* ARGSUSED */
21874b2af45fSPoul-Henning Kamp static int
218882d9ae4eSPoul-Henning Kamp sysctl_vnode(SYSCTL_HANDLER_ARGS)
2189df8bae1dSRodney W. Grimes {
2190996c772fSJohn Dyson 	struct proc *p = curproc;	/* XXX */
2191c35e283aSBruce Evans 	struct mount *mp, *nmp;
2192c35e283aSBruce Evans 	struct vnode *nvp, *vp;
2193df8bae1dSRodney W. Grimes 	int error;
2194df8bae1dSRodney W. Grimes 
2195df8bae1dSRodney W. Grimes #define VPTRSZ	sizeof (struct vnode *)
2196df8bae1dSRodney W. Grimes #define VNODESZ	sizeof (struct vnode)
21974b2af45fSPoul-Henning Kamp 
21984b2af45fSPoul-Henning Kamp 	req->lock = 0;
21992d0b1d70SPoul-Henning Kamp 	if (!req->oldptr) /* Make an estimate */
22004b2af45fSPoul-Henning Kamp 		return (SYSCTL_OUT(req, 0,
22014b2af45fSPoul-Henning Kamp 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
2202df8bae1dSRodney W. Grimes 
2203a18b1f1dSJason Evans 	mtx_enter(&mountlist_mtx, MTX_DEF);
22040429e37aSPoul-Henning Kamp 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
2205a18b1f1dSJason Evans 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
22060429e37aSPoul-Henning Kamp 			nmp = TAILQ_NEXT(mp, mnt_list);
2207df8bae1dSRodney W. Grimes 			continue;
2208c35e283aSBruce Evans 		}
2209df8bae1dSRodney W. Grimes again:
2210c35e283aSBruce Evans 		simple_lock(&mntvnode_slock);
22111b727751SPoul-Henning Kamp 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2212df8bae1dSRodney W. Grimes 		     vp != NULL;
2213c35e283aSBruce Evans 		     vp = nvp) {
2214df8bae1dSRodney W. Grimes 			/*
2215c35e283aSBruce Evans 			 * Check that the vp is still associated with
2216c35e283aSBruce Evans 			 * this filesystem.  RACE: could have been
2217c35e283aSBruce Evans 			 * recycled onto the same filesystem.
2218df8bae1dSRodney W. Grimes 			 */
2219df8bae1dSRodney W. Grimes 			if (vp->v_mount != mp) {
2220c35e283aSBruce Evans 				simple_unlock(&mntvnode_slock);
2221df8bae1dSRodney W. Grimes 				goto again;
2222df8bae1dSRodney W. Grimes 			}
22231b727751SPoul-Henning Kamp 			nvp = LIST_NEXT(vp, v_mntvnodes);
2224c35e283aSBruce Evans 			simple_unlock(&mntvnode_slock);
22254b2af45fSPoul-Henning Kamp 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
2226c35e283aSBruce Evans 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
2227df8bae1dSRodney W. Grimes 				return (error);
2228c35e283aSBruce Evans 			simple_lock(&mntvnode_slock);
2229e887950aSBruce Evans 		}
2230c35e283aSBruce Evans 		simple_unlock(&mntvnode_slock);
2231a18b1f1dSJason Evans 		mtx_enter(&mountlist_mtx, MTX_DEF);
22320429e37aSPoul-Henning Kamp 		nmp = TAILQ_NEXT(mp, mnt_list);
2233996c772fSJohn Dyson 		vfs_unbusy(mp, p);
2234df8bae1dSRodney W. Grimes 	}
2235a18b1f1dSJason Evans 	mtx_exit(&mountlist_mtx, MTX_DEF);
2236df8bae1dSRodney W. Grimes 
2237df8bae1dSRodney W. Grimes 	return (0);
2238df8bae1dSRodney W. Grimes }
2239df8bae1dSRodney W. Grimes 
22402e58c0f8SDavid Greenman /*
22412e58c0f8SDavid Greenman  * XXX
22422e58c0f8SDavid Greenman  * Exporting the vnode list on large systems causes them to crash.
22432e58c0f8SDavid Greenman  * Exporting the vnode list on medium systems causes sysctl to coredump.
22442e58c0f8SDavid Greenman  */
224565d0bc13SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
224665d0bc13SPoul-Henning Kamp 	0, 0, sysctl_vnode, "S,vnode", "");
22472e58c0f8SDavid Greenman #endif
22484b2af45fSPoul-Henning Kamp 
2249df8bae1dSRodney W. Grimes /*
2250df8bae1dSRodney W. Grimes  * Check to see if a filesystem is mounted on a block device.
2251df8bae1dSRodney W. Grimes  */
2252df8bae1dSRodney W. Grimes int
2253df8bae1dSRodney W. Grimes vfs_mountedon(vp)
2254996c772fSJohn Dyson 	struct vnode *vp;
2255df8bae1dSRodney W. Grimes {
2256df8bae1dSRodney W. Grimes 
2257b1897c19SJulian Elischer 	if (vp->v_specmountpoint != NULL)
2258df8bae1dSRodney W. Grimes 		return (EBUSY);
2259dbafb366SPoul-Henning Kamp 	return (0);
2260996c772fSJohn Dyson }
2261996c772fSJohn Dyson 
2262996c772fSJohn Dyson /*
2263996c772fSJohn Dyson  * Unmount all filesystems. The list is traversed in reverse order
22647c1557c4SBruce Evans  * of mounting to avoid dependencies.
2265996c772fSJohn Dyson  */
2266996c772fSJohn Dyson void
2267996c772fSJohn Dyson vfs_unmountall()
2268996c772fSJohn Dyson {
22690429e37aSPoul-Henning Kamp 	struct mount *mp;
2270cb87a87cSTor Egge 	struct proc *p;
2271996c772fSJohn Dyson 	int error;
2272996c772fSJohn Dyson 
2273cb87a87cSTor Egge 	if (curproc != NULL)
2274cb87a87cSTor Egge 		p = curproc;
2275cb87a87cSTor Egge 	else
2276cb87a87cSTor Egge 		p = initproc;	/* XXX XXX should this be proc0? */
22777c1557c4SBruce Evans 	/*
22787c1557c4SBruce Evans 	 * Since this only runs when rebooting, it is not interlocked.
22797c1557c4SBruce Evans 	 */
22800429e37aSPoul-Henning Kamp 	while(!TAILQ_EMPTY(&mountlist)) {
22810429e37aSPoul-Henning Kamp 		mp = TAILQ_LAST(&mountlist, mntlist);
22827c1557c4SBruce Evans 		error = dounmount(mp, MNT_FORCE, p);
2283996c772fSJohn Dyson 		if (error) {
22840429e37aSPoul-Henning Kamp 			TAILQ_REMOVE(&mountlist, mp, mnt_list);
22857c1557c4SBruce Evans 			printf("unmount of %s failed (",
22867c1557c4SBruce Evans 			    mp->mnt_stat.f_mntonname);
2287996c772fSJohn Dyson 			if (error == EBUSY)
2288996c772fSJohn Dyson 				printf("BUSY)\n");
2289996c772fSJohn Dyson 			else
2290996c772fSJohn Dyson 				printf("%d)\n", error);
22910429e37aSPoul-Henning Kamp 		} else {
22920429e37aSPoul-Henning Kamp 			/* The unmount has removed mp from the mountlist */
2293996c772fSJohn Dyson 		}
2294996c772fSJohn Dyson 	}
2295df8bae1dSRodney W. Grimes }
2296df8bae1dSRodney W. Grimes 
2297df8bae1dSRodney W. Grimes /*
2298df8bae1dSRodney W. Grimes  * Build hash lists of net addresses and hang them off the mount point.
2299df8bae1dSRodney W. Grimes  * Called by ufs_mount() to set up the lists of export addresses.
2300df8bae1dSRodney W. Grimes  */
2301df8bae1dSRodney W. Grimes static int
2302514ede09SBruce Evans vfs_hang_addrlist(mp, nep, argp)
2303514ede09SBruce Evans 	struct mount *mp;
2304514ede09SBruce Evans 	struct netexport *nep;
2305514ede09SBruce Evans 	struct export_args *argp;
2306df8bae1dSRodney W. Grimes {
2307df8bae1dSRodney W. Grimes 	register struct netcred *np;
2308df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2309df8bae1dSRodney W. Grimes 	register int i;
2310df8bae1dSRodney W. Grimes 	struct radix_node *rn;
2311df8bae1dSRodney W. Grimes 	struct sockaddr *saddr, *smask = 0;
2312df8bae1dSRodney W. Grimes 	struct domain *dom;
2313df8bae1dSRodney W. Grimes 	int error;
2314df8bae1dSRodney W. Grimes 
2315df8bae1dSRodney W. Grimes 	if (argp->ex_addrlen == 0) {
2316df8bae1dSRodney W. Grimes 		if (mp->mnt_flag & MNT_DEFEXPORTED)
2317df8bae1dSRodney W. Grimes 			return (EPERM);
2318df8bae1dSRodney W. Grimes 		np = &nep->ne_defexported;
2319df8bae1dSRodney W. Grimes 		np->netc_exflags = argp->ex_flags;
2320df8bae1dSRodney W. Grimes 		np->netc_anon = argp->ex_anon;
2321df8bae1dSRodney W. Grimes 		np->netc_anon.cr_ref = 1;
2322df8bae1dSRodney W. Grimes 		mp->mnt_flag |= MNT_DEFEXPORTED;
2323df8bae1dSRodney W. Grimes 		return (0);
2324df8bae1dSRodney W. Grimes 	}
2325df8bae1dSRodney W. Grimes 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2326df8bae1dSRodney W. Grimes 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
2327df8bae1dSRodney W. Grimes 	bzero((caddr_t) np, i);
2328df8bae1dSRodney W. Grimes 	saddr = (struct sockaddr *) (np + 1);
2329bb56ec4aSPoul-Henning Kamp 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
2330df8bae1dSRodney W. Grimes 		goto out;
2331df8bae1dSRodney W. Grimes 	if (saddr->sa_len > argp->ex_addrlen)
2332df8bae1dSRodney W. Grimes 		saddr->sa_len = argp->ex_addrlen;
2333df8bae1dSRodney W. Grimes 	if (argp->ex_masklen) {
2334df8bae1dSRodney W. Grimes 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
23355f61c81dSPeter Wemm 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
2336df8bae1dSRodney W. Grimes 		if (error)
2337df8bae1dSRodney W. Grimes 			goto out;
2338df8bae1dSRodney W. Grimes 		if (smask->sa_len > argp->ex_masklen)
2339df8bae1dSRodney W. Grimes 			smask->sa_len = argp->ex_masklen;
2340df8bae1dSRodney W. Grimes 	}
2341df8bae1dSRodney W. Grimes 	i = saddr->sa_family;
2342df8bae1dSRodney W. Grimes 	if ((rnh = nep->ne_rtable[i]) == 0) {
2343df8bae1dSRodney W. Grimes 		/*
23440d94caffSDavid Greenman 		 * Seems silly to initialize every AF when most are not used,
23450d94caffSDavid Greenman 		 * do so on demand here
2346df8bae1dSRodney W. Grimes 		 */
2347df8bae1dSRodney W. Grimes 		for (dom = domains; dom; dom = dom->dom_next)
2348df8bae1dSRodney W. Grimes 			if (dom->dom_family == i && dom->dom_rtattach) {
2349df8bae1dSRodney W. Grimes 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
2350df8bae1dSRodney W. Grimes 				    dom->dom_rtoffset);
2351df8bae1dSRodney W. Grimes 				break;
2352df8bae1dSRodney W. Grimes 			}
2353df8bae1dSRodney W. Grimes 		if ((rnh = nep->ne_rtable[i]) == 0) {
2354df8bae1dSRodney W. Grimes 			error = ENOBUFS;
2355df8bae1dSRodney W. Grimes 			goto out;
2356df8bae1dSRodney W. Grimes 		}
2357df8bae1dSRodney W. Grimes 	}
2358df8bae1dSRodney W. Grimes 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
2359df8bae1dSRodney W. Grimes 	    np->netc_rnodes);
2360df8bae1dSRodney W. Grimes 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2361df8bae1dSRodney W. Grimes 		error = EPERM;
2362df8bae1dSRodney W. Grimes 		goto out;
2363df8bae1dSRodney W. Grimes 	}
2364df8bae1dSRodney W. Grimes 	np->netc_exflags = argp->ex_flags;
2365df8bae1dSRodney W. Grimes 	np->netc_anon = argp->ex_anon;
2366df8bae1dSRodney W. Grimes 	np->netc_anon.cr_ref = 1;
2367df8bae1dSRodney W. Grimes 	return (0);
2368df8bae1dSRodney W. Grimes out:
2369df8bae1dSRodney W. Grimes 	free(np, M_NETADDR);
2370df8bae1dSRodney W. Grimes 	return (error);
2371df8bae1dSRodney W. Grimes }
2372df8bae1dSRodney W. Grimes 
2373a863c0fbSEivind Eklund /* Helper for vfs_free_addrlist. */
2374df8bae1dSRodney W. Grimes /* ARGSUSED */
2375df8bae1dSRodney W. Grimes static int
2376514ede09SBruce Evans vfs_free_netcred(rn, w)
2377514ede09SBruce Evans 	struct radix_node *rn;
2378514ede09SBruce Evans 	void *w;
2379df8bae1dSRodney W. Grimes {
2380df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2381df8bae1dSRodney W. Grimes 
2382df8bae1dSRodney W. Grimes 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2383df8bae1dSRodney W. Grimes 	free((caddr_t) rn, M_NETADDR);
2384df8bae1dSRodney W. Grimes 	return (0);
2385df8bae1dSRodney W. Grimes }
2386df8bae1dSRodney W. Grimes 
2387df8bae1dSRodney W. Grimes /*
2388df8bae1dSRodney W. Grimes  * Free the net address hash lists that are hanging off the mount points.
2389df8bae1dSRodney W. Grimes  */
2390df8bae1dSRodney W. Grimes static void
2391514ede09SBruce Evans vfs_free_addrlist(nep)
2392514ede09SBruce Evans 	struct netexport *nep;
2393df8bae1dSRodney W. Grimes {
2394df8bae1dSRodney W. Grimes 	register int i;
2395df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2396df8bae1dSRodney W. Grimes 
2397df8bae1dSRodney W. Grimes 	for (i = 0; i <= AF_MAX; i++)
2398bb56ec4aSPoul-Henning Kamp 		if ((rnh = nep->ne_rtable[i])) {
2399df8bae1dSRodney W. Grimes 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2400df8bae1dSRodney W. Grimes 			    (caddr_t) rnh);
2401df8bae1dSRodney W. Grimes 			free((caddr_t) rnh, M_RTABLE);
2402df8bae1dSRodney W. Grimes 			nep->ne_rtable[i] = 0;
2403df8bae1dSRodney W. Grimes 		}
2404df8bae1dSRodney W. Grimes }
2405df8bae1dSRodney W. Grimes 
240621a90397SAlfred Perlstein /*
240721a90397SAlfred Perlstein  * High level function to manipulate export options on a mount point
240821a90397SAlfred Perlstein  * and the passed in netexport.
240921a90397SAlfred Perlstein  * Struct export_args *argp is the variable used to twiddle options,
241021a90397SAlfred Perlstein  * the structure is described in sys/mount.h
241121a90397SAlfred Perlstein  */
2412df8bae1dSRodney W. Grimes int
2413df8bae1dSRodney W. Grimes vfs_export(mp, nep, argp)
2414df8bae1dSRodney W. Grimes 	struct mount *mp;
2415df8bae1dSRodney W. Grimes 	struct netexport *nep;
2416df8bae1dSRodney W. Grimes 	struct export_args *argp;
2417df8bae1dSRodney W. Grimes {
2418df8bae1dSRodney W. Grimes 	int error;
2419df8bae1dSRodney W. Grimes 
2420df8bae1dSRodney W. Grimes 	if (argp->ex_flags & MNT_DELEXPORT) {
2421f6b4c285SDoug Rabson 		if (mp->mnt_flag & MNT_EXPUBLIC) {
2422f6b4c285SDoug Rabson 			vfs_setpublicfs(NULL, NULL, NULL);
2423f6b4c285SDoug Rabson 			mp->mnt_flag &= ~MNT_EXPUBLIC;
2424f6b4c285SDoug Rabson 		}
2425df8bae1dSRodney W. Grimes 		vfs_free_addrlist(nep);
2426df8bae1dSRodney W. Grimes 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2427df8bae1dSRodney W. Grimes 	}
2428df8bae1dSRodney W. Grimes 	if (argp->ex_flags & MNT_EXPORTED) {
2429f6b4c285SDoug Rabson 		if (argp->ex_flags & MNT_EXPUBLIC) {
2430f6b4c285SDoug Rabson 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2431f6b4c285SDoug Rabson 				return (error);
2432f6b4c285SDoug Rabson 			mp->mnt_flag |= MNT_EXPUBLIC;
2433f6b4c285SDoug Rabson 		}
2434bb56ec4aSPoul-Henning Kamp 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2435df8bae1dSRodney W. Grimes 			return (error);
2436df8bae1dSRodney W. Grimes 		mp->mnt_flag |= MNT_EXPORTED;
2437df8bae1dSRodney W. Grimes 	}
2438df8bae1dSRodney W. Grimes 	return (0);
2439df8bae1dSRodney W. Grimes }
2440df8bae1dSRodney W. Grimes 
2441f6b4c285SDoug Rabson /*
2442f6b4c285SDoug Rabson  * Set the publicly exported filesystem (WebNFS). Currently, only
2443f6b4c285SDoug Rabson  * one public filesystem is possible in the spec (RFC 2054 and 2055)
2444f6b4c285SDoug Rabson  */
2445f6b4c285SDoug Rabson int
2446f6b4c285SDoug Rabson vfs_setpublicfs(mp, nep, argp)
2447f6b4c285SDoug Rabson 	struct mount *mp;
2448f6b4c285SDoug Rabson 	struct netexport *nep;
2449f6b4c285SDoug Rabson 	struct export_args *argp;
2450f6b4c285SDoug Rabson {
2451f6b4c285SDoug Rabson 	int error;
2452f6b4c285SDoug Rabson 	struct vnode *rvp;
2453f6b4c285SDoug Rabson 	char *cp;
2454f6b4c285SDoug Rabson 
2455f6b4c285SDoug Rabson 	/*
2456f6b4c285SDoug Rabson 	 * mp == NULL -> invalidate the current info, the FS is
2457f6b4c285SDoug Rabson 	 * no longer exported. May be called from either vfs_export
2458f6b4c285SDoug Rabson 	 * or unmount, so check if it hasn't already been done.
2459f6b4c285SDoug Rabson 	 */
2460f6b4c285SDoug Rabson 	if (mp == NULL) {
2461f6b4c285SDoug Rabson 		if (nfs_pub.np_valid) {
2462f6b4c285SDoug Rabson 			nfs_pub.np_valid = 0;
2463f6b4c285SDoug Rabson 			if (nfs_pub.np_index != NULL) {
2464f6b4c285SDoug Rabson 				FREE(nfs_pub.np_index, M_TEMP);
2465f6b4c285SDoug Rabson 				nfs_pub.np_index = NULL;
2466f6b4c285SDoug Rabson 			}
2467f6b4c285SDoug Rabson 		}
2468f6b4c285SDoug Rabson 		return (0);
2469f6b4c285SDoug Rabson 	}
2470f6b4c285SDoug Rabson 
2471f6b4c285SDoug Rabson 	/*
2472f6b4c285SDoug Rabson 	 * Only one allowed at a time.
2473f6b4c285SDoug Rabson 	 */
2474f6b4c285SDoug Rabson 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2475f6b4c285SDoug Rabson 		return (EBUSY);
2476f6b4c285SDoug Rabson 
2477f6b4c285SDoug Rabson 	/*
2478f6b4c285SDoug Rabson 	 * Get real filehandle for root of exported FS.
2479f6b4c285SDoug Rabson 	 */
2480f6b4c285SDoug Rabson 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2481f6b4c285SDoug Rabson 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2482f6b4c285SDoug Rabson 
2483f6b4c285SDoug Rabson 	if ((error = VFS_ROOT(mp, &rvp)))
2484f6b4c285SDoug Rabson 		return (error);
2485f6b4c285SDoug Rabson 
2486f6b4c285SDoug Rabson 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2487f6b4c285SDoug Rabson 		return (error);
2488f6b4c285SDoug Rabson 
2489f6b4c285SDoug Rabson 	vput(rvp);
2490f6b4c285SDoug Rabson 
2491f6b4c285SDoug Rabson 	/*
2492f6b4c285SDoug Rabson 	 * If an indexfile was specified, pull it in.
2493f6b4c285SDoug Rabson 	 */
2494f6b4c285SDoug Rabson 	if (argp->ex_indexfile != NULL) {
2495f6b4c285SDoug Rabson 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2496f6b4c285SDoug Rabson 		    M_WAITOK);
2497f6b4c285SDoug Rabson 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2498f6b4c285SDoug Rabson 		    MAXNAMLEN, (size_t *)0);
2499f6b4c285SDoug Rabson 		if (!error) {
2500f6b4c285SDoug Rabson 			/*
2501f6b4c285SDoug Rabson 			 * Check for illegal filenames.
2502f6b4c285SDoug Rabson 			 */
2503f6b4c285SDoug Rabson 			for (cp = nfs_pub.np_index; *cp; cp++) {
2504f6b4c285SDoug Rabson 				if (*cp == '/') {
2505f6b4c285SDoug Rabson 					error = EINVAL;
2506f6b4c285SDoug Rabson 					break;
2507f6b4c285SDoug Rabson 				}
2508f6b4c285SDoug Rabson 			}
2509f6b4c285SDoug Rabson 		}
2510f6b4c285SDoug Rabson 		if (error) {
2511f6b4c285SDoug Rabson 			FREE(nfs_pub.np_index, M_TEMP);
2512f6b4c285SDoug Rabson 			return (error);
2513f6b4c285SDoug Rabson 		}
2514f6b4c285SDoug Rabson 	}
2515f6b4c285SDoug Rabson 
2516f6b4c285SDoug Rabson 	nfs_pub.np_mount = mp;
2517f6b4c285SDoug Rabson 	nfs_pub.np_valid = 1;
2518f6b4c285SDoug Rabson 	return (0);
2519f6b4c285SDoug Rabson }
2520f6b4c285SDoug Rabson 
252121a90397SAlfred Perlstein /*
252221a90397SAlfred Perlstein  * Used by the filesystems to determine if a given network address
252321a90397SAlfred Perlstein  * (passed in 'nam') is present in thier exports list, returns a pointer
252421a90397SAlfred Perlstein  * to struct netcred so that the filesystem can examine it for
252521a90397SAlfred Perlstein  * access rights (read/write/etc).
252621a90397SAlfred Perlstein  */
2527df8bae1dSRodney W. Grimes struct netcred *
2528df8bae1dSRodney W. Grimes vfs_export_lookup(mp, nep, nam)
2529df8bae1dSRodney W. Grimes 	register struct mount *mp;
2530df8bae1dSRodney W. Grimes 	struct netexport *nep;
253157bf258eSGarrett Wollman 	struct sockaddr *nam;
2532df8bae1dSRodney W. Grimes {
2533df8bae1dSRodney W. Grimes 	register struct netcred *np;
2534df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
2535df8bae1dSRodney W. Grimes 	struct sockaddr *saddr;
2536df8bae1dSRodney W. Grimes 
2537df8bae1dSRodney W. Grimes 	np = NULL;
2538df8bae1dSRodney W. Grimes 	if (mp->mnt_flag & MNT_EXPORTED) {
2539df8bae1dSRodney W. Grimes 		/*
2540df8bae1dSRodney W. Grimes 		 * Lookup in the export list first.
2541df8bae1dSRodney W. Grimes 		 */
2542df8bae1dSRodney W. Grimes 		if (nam != NULL) {
254357bf258eSGarrett Wollman 			saddr = nam;
2544df8bae1dSRodney W. Grimes 			rnh = nep->ne_rtable[saddr->sa_family];
2545df8bae1dSRodney W. Grimes 			if (rnh != NULL) {
2546df8bae1dSRodney W. Grimes 				np = (struct netcred *)
2547df8bae1dSRodney W. Grimes 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2548df8bae1dSRodney W. Grimes 							      rnh);
2549df8bae1dSRodney W. Grimes 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2550df8bae1dSRodney W. Grimes 					np = NULL;
2551df8bae1dSRodney W. Grimes 			}
2552df8bae1dSRodney W. Grimes 		}
2553df8bae1dSRodney W. Grimes 		/*
2554df8bae1dSRodney W. Grimes 		 * If no address match, use the default if it exists.
2555df8bae1dSRodney W. Grimes 		 */
2556df8bae1dSRodney W. Grimes 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2557df8bae1dSRodney W. Grimes 			np = &nep->ne_defexported;
2558df8bae1dSRodney W. Grimes 	}
2559df8bae1dSRodney W. Grimes 	return (np);
2560df8bae1dSRodney W. Grimes }
256161f5d510SDavid Greenman 
256261f5d510SDavid Greenman /*
256361f5d510SDavid Greenman  * perform msync on all vnodes under a mount point
256461f5d510SDavid Greenman  * the mount point must be locked.
256561f5d510SDavid Greenman  */
256661f5d510SDavid Greenman void
256761f5d510SDavid Greenman vfs_msync(struct mount *mp, int flags) {
2568a316d390SJohn Dyson 	struct vnode *vp, *nvp;
256937b8ccd3SPeter Wemm 	struct vm_object *obj;
257095e5e988SJohn Dyson 	int anyio, tries;
257195e5e988SJohn Dyson 
257295e5e988SJohn Dyson 	tries = 5;
257361f5d510SDavid Greenman loop:
257495e5e988SJohn Dyson 	anyio = 0;
25751b727751SPoul-Henning Kamp 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
257661f5d510SDavid Greenman 
25771b727751SPoul-Henning Kamp 		nvp = LIST_NEXT(vp, v_mntvnodes);
257895e5e988SJohn Dyson 
257995e5e988SJohn Dyson 		if (vp->v_mount != mp) {
258095e5e988SJohn Dyson 			goto loop;
258195e5e988SJohn Dyson 		}
258295e5e988SJohn Dyson 
258337b8ccd3SPeter Wemm 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
258437b8ccd3SPeter Wemm 			continue;
258537b8ccd3SPeter Wemm 
258637b8ccd3SPeter Wemm 		if (flags != MNT_WAIT) {
25879ff5ce6bSBoris Popov 			if (VOP_GETVOBJECT(vp, &obj) != 0 ||
25889ff5ce6bSBoris Popov 			    (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
258937b8ccd3SPeter Wemm 				continue;
25906bdfe06aSEivind Eklund 			if (VOP_ISLOCKED(vp, NULL))
259161f5d510SDavid Greenman 				continue;
259295e5e988SJohn Dyson 		}
259395e5e988SJohn Dyson 
2594a18b1f1dSJason Evans 		mtx_enter(&vp->v_interlock, MTX_DEF);
25959ff5ce6bSBoris Popov 		if (VOP_GETVOBJECT(vp, &obj) == 0 &&
25969ff5ce6bSBoris Popov 		    (obj->flags & OBJ_MIGHTBEDIRTY)) {
259795e5e988SJohn Dyson 			if (!vget(vp,
259895e5e988SJohn Dyson 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
25999ff5ce6bSBoris Popov 				if (VOP_GETVOBJECT(vp, &obj) == 0) {
26009ff5ce6bSBoris Popov 					vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
260195e5e988SJohn Dyson 					anyio = 1;
260295e5e988SJohn Dyson 				}
260395e5e988SJohn Dyson 				vput(vp);
260495e5e988SJohn Dyson 			}
260595e5e988SJohn Dyson 		} else {
2606a18b1f1dSJason Evans 			mtx_exit(&vp->v_interlock, MTX_DEF);
260761f5d510SDavid Greenman 		}
260861f5d510SDavid Greenman 	}
260995e5e988SJohn Dyson 	if (anyio && (--tries > 0))
261095e5e988SJohn Dyson 		goto loop;
261161f5d510SDavid Greenman }
26126476c0d2SJohn Dyson 
26136476c0d2SJohn Dyson /*
26146476c0d2SJohn Dyson  * Create the VM object needed for VMIO and mmap support.  This
26156476c0d2SJohn Dyson  * is done for all VREG files in the system.  Some filesystems might
26166476c0d2SJohn Dyson  * afford the additional metadata buffering capability of the
26176476c0d2SJohn Dyson  * VMIO code by making the device node be VMIO mode also.
261895e5e988SJohn Dyson  *
2619fb116777SEivind Eklund  * vp must be locked when vfs_object_create is called.
26206476c0d2SJohn Dyson  */
26216476c0d2SJohn Dyson int
2622fb116777SEivind Eklund vfs_object_create(vp, p, cred)
26236476c0d2SJohn Dyson 	struct vnode *vp;
26246476c0d2SJohn Dyson 	struct proc *p;
26256476c0d2SJohn Dyson 	struct ucred *cred;
26266476c0d2SJohn Dyson {
26279ff5ce6bSBoris Popov 	return (VOP_CREATEVOBJECT(vp, cred, p));
26286476c0d2SJohn Dyson }
2629b15a966eSPoul-Henning Kamp 
2630453aaa0dSEivind Eklund /*
2631453aaa0dSEivind Eklund  * Mark a vnode as free, putting it up for recycling.
2632453aaa0dSEivind Eklund  */
2633c904bbbdSKirk McKusick void
2634a051452aSPoul-Henning Kamp vfree(vp)
2635b15a966eSPoul-Henning Kamp 	struct vnode *vp;
2636b15a966eSPoul-Henning Kamp {
2637925a3a41SJohn Dyson 	int s;
2638925a3a41SJohn Dyson 
2639925a3a41SJohn Dyson 	s = splbio();
2640a051452aSPoul-Henning Kamp 	simple_lock(&vnode_free_list_slock);
2641c904bbbdSKirk McKusick 	KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
2642a051452aSPoul-Henning Kamp 	if (vp->v_flag & VAGE) {
2643a051452aSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2644a051452aSPoul-Henning Kamp 	} else {
2645b15a966eSPoul-Henning Kamp 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
26468670684aSPoul-Henning Kamp 	}
2647a051452aSPoul-Henning Kamp 	freevnodes++;
2648b15a966eSPoul-Henning Kamp 	simple_unlock(&vnode_free_list_slock);
2649a051452aSPoul-Henning Kamp 	vp->v_flag &= ~VAGE;
2650a051452aSPoul-Henning Kamp 	vp->v_flag |= VFREE;
2651925a3a41SJohn Dyson 	splx(s);
2652b15a966eSPoul-Henning Kamp }
2653a051452aSPoul-Henning Kamp 
2654453aaa0dSEivind Eklund /*
2655453aaa0dSEivind Eklund  * Opposite of vfree() - mark a vnode as in use.
2656453aaa0dSEivind Eklund  */
265747221757SJohn Dyson void
2658a051452aSPoul-Henning Kamp vbusy(vp)
2659a051452aSPoul-Henning Kamp 	struct vnode *vp;
2660a051452aSPoul-Henning Kamp {
2661925a3a41SJohn Dyson 	int s;
2662925a3a41SJohn Dyson 
2663925a3a41SJohn Dyson 	s = splbio();
2664a051452aSPoul-Henning Kamp 	simple_lock(&vnode_free_list_slock);
2665c904bbbdSKirk McKusick 	KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
2666a051452aSPoul-Henning Kamp 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2667a051452aSPoul-Henning Kamp 	freevnodes--;
2668a051452aSPoul-Henning Kamp 	simple_unlock(&vnode_free_list_slock);
266964d3c7e3SJohn Dyson 	vp->v_flag &= ~(VFREE|VAGE);
2670925a3a41SJohn Dyson 	splx(s);
2671b15a966eSPoul-Henning Kamp }
26721cbbd625SGarrett Wollman 
26731cbbd625SGarrett Wollman /*
26741cbbd625SGarrett Wollman  * Record a process's interest in events which might happen to
26751cbbd625SGarrett Wollman  * a vnode.  Because poll uses the historic select-style interface
26761cbbd625SGarrett Wollman  * internally, this routine serves as both the ``check for any
26771cbbd625SGarrett Wollman  * pending events'' and the ``record my interest in future events''
26781cbbd625SGarrett Wollman  * functions.  (These are done together, while the lock is held,
26791cbbd625SGarrett Wollman  * to avoid race conditions.)
26801cbbd625SGarrett Wollman  */
26811cbbd625SGarrett Wollman int
26821cbbd625SGarrett Wollman vn_pollrecord(vp, p, events)
26831cbbd625SGarrett Wollman 	struct vnode *vp;
26841cbbd625SGarrett Wollman 	struct proc *p;
26851cbbd625SGarrett Wollman 	short events;
26861cbbd625SGarrett Wollman {
26871cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
26881cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_revents & events) {
26891cbbd625SGarrett Wollman 		/*
26901cbbd625SGarrett Wollman 		 * This leaves events we are not interested
26911cbbd625SGarrett Wollman 		 * in available for the other process which
26921cbbd625SGarrett Wollman 		 * which presumably had requested them
26931cbbd625SGarrett Wollman 		 * (otherwise they would never have been
26941cbbd625SGarrett Wollman 		 * recorded).
26951cbbd625SGarrett Wollman 		 */
26961cbbd625SGarrett Wollman 		events &= vp->v_pollinfo.vpi_revents;
26971cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_revents &= ~events;
26981cbbd625SGarrett Wollman 
26991cbbd625SGarrett Wollman 		simple_unlock(&vp->v_pollinfo.vpi_lock);
27001cbbd625SGarrett Wollman 		return events;
27011cbbd625SGarrett Wollman 	}
27021cbbd625SGarrett Wollman 	vp->v_pollinfo.vpi_events |= events;
27031cbbd625SGarrett Wollman 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
27041cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
27051cbbd625SGarrett Wollman 	return 0;
27061cbbd625SGarrett Wollman }
27071cbbd625SGarrett Wollman 
27081cbbd625SGarrett Wollman /*
27091cbbd625SGarrett Wollman  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
27101cbbd625SGarrett Wollman  * it is possible for us to miss an event due to race conditions, but
27111cbbd625SGarrett Wollman  * that condition is expected to be rare, so for the moment it is the
27121cbbd625SGarrett Wollman  * preferred interface.
27131cbbd625SGarrett Wollman  */
27141cbbd625SGarrett Wollman void
27151cbbd625SGarrett Wollman vn_pollevent(vp, events)
27161cbbd625SGarrett Wollman 	struct vnode *vp;
27171cbbd625SGarrett Wollman 	short events;
27181cbbd625SGarrett Wollman {
27191cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
27201cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_events & events) {
27211cbbd625SGarrett Wollman 		/*
27221cbbd625SGarrett Wollman 		 * We clear vpi_events so that we don't
27231cbbd625SGarrett Wollman 		 * call selwakeup() twice if two events are
27241cbbd625SGarrett Wollman 		 * posted before the polling process(es) is
27251cbbd625SGarrett Wollman 		 * awakened.  This also ensures that we take at
27261cbbd625SGarrett Wollman 		 * most one selwakeup() if the polling process
27271cbbd625SGarrett Wollman 		 * is no longer interested.  However, it does
27281cbbd625SGarrett Wollman 		 * mean that only one event can be noticed at
27291cbbd625SGarrett Wollman 		 * a time.  (Perhaps we should only clear those
27301cbbd625SGarrett Wollman 		 * event bits which we note?) XXX
27311cbbd625SGarrett Wollman 		 */
27321cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
27331cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_revents |= events;
27341cbbd625SGarrett Wollman 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
27351cbbd625SGarrett Wollman 	}
27361cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
27371cbbd625SGarrett Wollman }
27381cbbd625SGarrett Wollman 
27391cbbd625SGarrett Wollman /*
27401cbbd625SGarrett Wollman  * Wake up anyone polling on vp because it is being revoked.
27411cbbd625SGarrett Wollman  * This depends on dead_poll() returning POLLHUP for correct
27421cbbd625SGarrett Wollman  * behavior.
27431cbbd625SGarrett Wollman  */
27441cbbd625SGarrett Wollman void
27451cbbd625SGarrett Wollman vn_pollgone(vp)
27461cbbd625SGarrett Wollman 	struct vnode *vp;
27471cbbd625SGarrett Wollman {
27481cbbd625SGarrett Wollman 	simple_lock(&vp->v_pollinfo.vpi_lock);
27491cbbd625SGarrett Wollman 	if (vp->v_pollinfo.vpi_events) {
27501cbbd625SGarrett Wollman 		vp->v_pollinfo.vpi_events = 0;
27511cbbd625SGarrett Wollman 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
27521cbbd625SGarrett Wollman 	}
27531cbbd625SGarrett Wollman 	simple_unlock(&vp->v_pollinfo.vpi_lock);
27541cbbd625SGarrett Wollman }
2755b1897c19SJulian Elischer 
2756b1897c19SJulian Elischer 
2757b1897c19SJulian Elischer 
2758b1897c19SJulian Elischer /*
2759b1897c19SJulian Elischer  * Routine to create and manage a filesystem syncer vnode.
2760b1897c19SJulian Elischer  */
2761b1897c19SJulian Elischer #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
27620df45b5aSEivind Eklund static int	sync_fsync __P((struct  vop_fsync_args *));
27630df45b5aSEivind Eklund static int	sync_inactive __P((struct  vop_inactive_args *));
27640df45b5aSEivind Eklund static int	sync_reclaim  __P((struct  vop_reclaim_args *));
2765b1897c19SJulian Elischer #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
2766b1897c19SJulian Elischer #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
27670df45b5aSEivind Eklund static int	sync_print __P((struct vop_print_args *));
2768b1897c19SJulian Elischer #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
2769b1897c19SJulian Elischer 
2770db878ba4SEivind Eklund static vop_t **sync_vnodeop_p;
2771db878ba4SEivind Eklund static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
2772b1897c19SJulian Elischer 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
2773b1897c19SJulian Elischer 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
2774b1897c19SJulian Elischer 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
2775b1897c19SJulian Elischer 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
2776b1897c19SJulian Elischer 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
2777b1897c19SJulian Elischer 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
2778b1897c19SJulian Elischer 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
2779b1897c19SJulian Elischer 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
2780b1897c19SJulian Elischer 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
2781b1897c19SJulian Elischer 	{ NULL, NULL }
2782b1897c19SJulian Elischer };
2783db878ba4SEivind Eklund static struct vnodeopv_desc sync_vnodeop_opv_desc =
2784b1897c19SJulian Elischer 	{ &sync_vnodeop_p, sync_vnodeop_entries };
2785b1897c19SJulian Elischer 
2786b1897c19SJulian Elischer VNODEOP_SET(sync_vnodeop_opv_desc);
2787b1897c19SJulian Elischer 
2788b1897c19SJulian Elischer /*
2789b1897c19SJulian Elischer  * Create a new filesystem syncer vnode for the specified mount point.
2790b1897c19SJulian Elischer  */
2791b1897c19SJulian Elischer int
2792b1897c19SJulian Elischer vfs_allocate_syncvnode(mp)
2793b1897c19SJulian Elischer 	struct mount *mp;
2794b1897c19SJulian Elischer {
2795b1897c19SJulian Elischer 	struct vnode *vp;
2796b1897c19SJulian Elischer 	static long start, incr, next;
2797b1897c19SJulian Elischer 	int error;
2798b1897c19SJulian Elischer 
2799b1897c19SJulian Elischer 	/* Allocate a new vnode */
2800b1897c19SJulian Elischer 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
2801b1897c19SJulian Elischer 		mp->mnt_syncer = NULL;
2802b1897c19SJulian Elischer 		return (error);
2803b1897c19SJulian Elischer 	}
2804b1897c19SJulian Elischer 	vp->v_type = VNON;
2805b1897c19SJulian Elischer 	/*
2806b1897c19SJulian Elischer 	 * Place the vnode onto the syncer worklist. We attempt to
2807b1897c19SJulian Elischer 	 * scatter them about on the list so that they will go off
2808b1897c19SJulian Elischer 	 * at evenly distributed times even if all the filesystems
2809b1897c19SJulian Elischer 	 * are mounted at once.
2810b1897c19SJulian Elischer 	 */
2811b1897c19SJulian Elischer 	next += incr;
2812b1897c19SJulian Elischer 	if (next == 0 || next > syncer_maxdelay) {
2813b1897c19SJulian Elischer 		start /= 2;
2814b1897c19SJulian Elischer 		incr /= 2;
2815b1897c19SJulian Elischer 		if (start == 0) {
2816b1897c19SJulian Elischer 			start = syncer_maxdelay / 2;
2817b1897c19SJulian Elischer 			incr = syncer_maxdelay;
2818b1897c19SJulian Elischer 		}
2819b1897c19SJulian Elischer 		next = start;
2820b1897c19SJulian Elischer 	}
2821b1897c19SJulian Elischer 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
2822b1897c19SJulian Elischer 	mp->mnt_syncer = vp;
2823b1897c19SJulian Elischer 	return (0);
2824b1897c19SJulian Elischer }
2825b1897c19SJulian Elischer 
2826b1897c19SJulian Elischer /*
2827b1897c19SJulian Elischer  * Do a lazy sync of the filesystem.
2828b1897c19SJulian Elischer  */
2829db878ba4SEivind Eklund static int
2830b1897c19SJulian Elischer sync_fsync(ap)
2831b1897c19SJulian Elischer 	struct vop_fsync_args /* {
2832b1897c19SJulian Elischer 		struct vnode *a_vp;
2833b1897c19SJulian Elischer 		struct ucred *a_cred;
2834b1897c19SJulian Elischer 		int a_waitfor;
2835b1897c19SJulian Elischer 		struct proc *a_p;
2836b1897c19SJulian Elischer 	} */ *ap;
2837b1897c19SJulian Elischer {
2838b1897c19SJulian Elischer 	struct vnode *syncvp = ap->a_vp;
2839b1897c19SJulian Elischer 	struct mount *mp = syncvp->v_mount;
2840b1897c19SJulian Elischer 	struct proc *p = ap->a_p;
2841b1897c19SJulian Elischer 	int asyncflag;
2842b1897c19SJulian Elischer 
2843b1897c19SJulian Elischer 	/*
2844b1897c19SJulian Elischer 	 * We only need to do something if this is a lazy evaluation.
2845b1897c19SJulian Elischer 	 */
2846b1897c19SJulian Elischer 	if (ap->a_waitfor != MNT_LAZY)
2847b1897c19SJulian Elischer 		return (0);
2848b1897c19SJulian Elischer 
2849b1897c19SJulian Elischer 	/*
2850b1897c19SJulian Elischer 	 * Move ourselves to the back of the sync list.
2851b1897c19SJulian Elischer 	 */
2852b1897c19SJulian Elischer 	vn_syncer_add_to_worklist(syncvp, syncdelay);
2853b1897c19SJulian Elischer 
2854b1897c19SJulian Elischer 	/*
2855b1897c19SJulian Elischer 	 * Walk the list of vnodes pushing all that are dirty and
2856b1897c19SJulian Elischer 	 * not already on the sync list.
2857b1897c19SJulian Elischer 	 */
2858a18b1f1dSJason Evans 	mtx_enter(&mountlist_mtx, MTX_DEF);
2859a18b1f1dSJason Evans 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, p) != 0) {
2860a18b1f1dSJason Evans 		mtx_exit(&mountlist_mtx, MTX_DEF);
2861b1897c19SJulian Elischer 		return (0);
286271033a8cSTor Egge 	}
2863f2a2857bSKirk McKusick 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
2864f2a2857bSKirk McKusick 		vfs_unbusy(mp, p);
2865a18b1f1dSJason Evans 		mtx_exit(&mountlist_mtx, MTX_DEF);
2866f2a2857bSKirk McKusick 		return (0);
2867f2a2857bSKirk McKusick 	}
2868b1897c19SJulian Elischer 	asyncflag = mp->mnt_flag & MNT_ASYNC;
2869b1897c19SJulian Elischer 	mp->mnt_flag &= ~MNT_ASYNC;
2870efdc5523SPeter Wemm 	vfs_msync(mp, MNT_NOWAIT);
2871b1897c19SJulian Elischer 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
2872b1897c19SJulian Elischer 	if (asyncflag)
2873b1897c19SJulian Elischer 		mp->mnt_flag |= MNT_ASYNC;
2874f2a2857bSKirk McKusick 	vn_finished_write(mp);
2875b1897c19SJulian Elischer 	vfs_unbusy(mp, p);
2876b1897c19SJulian Elischer 	return (0);
2877b1897c19SJulian Elischer }
2878b1897c19SJulian Elischer 
2879b1897c19SJulian Elischer /*
2880b1897c19SJulian Elischer  * The syncer vnode is no referenced.
2881b1897c19SJulian Elischer  */
2882db878ba4SEivind Eklund static int
2883b1897c19SJulian Elischer sync_inactive(ap)
2884b1897c19SJulian Elischer 	struct vop_inactive_args /* {
2885b1897c19SJulian Elischer 		struct vnode *a_vp;
2886b1897c19SJulian Elischer 		struct proc *a_p;
2887b1897c19SJulian Elischer 	} */ *ap;
2888b1897c19SJulian Elischer {
2889b1897c19SJulian Elischer 
2890b1897c19SJulian Elischer 	vgone(ap->a_vp);
2891b1897c19SJulian Elischer 	return (0);
2892b1897c19SJulian Elischer }
2893b1897c19SJulian Elischer 
2894b1897c19SJulian Elischer /*
2895b1897c19SJulian Elischer  * The syncer vnode is no longer needed and is being decommissioned.
289642e26d47SMatthew Dillon  *
289742e26d47SMatthew Dillon  * Modifications to the worklist must be protected at splbio().
2898b1897c19SJulian Elischer  */
2899db878ba4SEivind Eklund static int
2900b1897c19SJulian Elischer sync_reclaim(ap)
2901b1897c19SJulian Elischer 	struct vop_reclaim_args /* {
2902b1897c19SJulian Elischer 		struct vnode *a_vp;
2903b1897c19SJulian Elischer 	} */ *ap;
2904b1897c19SJulian Elischer {
2905b1897c19SJulian Elischer 	struct vnode *vp = ap->a_vp;
290642e26d47SMatthew Dillon 	int s;
2907b1897c19SJulian Elischer 
290842e26d47SMatthew Dillon 	s = splbio();
2909b1897c19SJulian Elischer 	vp->v_mount->mnt_syncer = NULL;
2910b1897c19SJulian Elischer 	if (vp->v_flag & VONWORKLST) {
2911b1897c19SJulian Elischer 		LIST_REMOVE(vp, v_synclist);
2912b1897c19SJulian Elischer 		vp->v_flag &= ~VONWORKLST;
2913b1897c19SJulian Elischer 	}
291442e26d47SMatthew Dillon 	splx(s);
2915b1897c19SJulian Elischer 
2916b1897c19SJulian Elischer 	return (0);
2917b1897c19SJulian Elischer }
2918b1897c19SJulian Elischer 
2919b1897c19SJulian Elischer /*
2920b1897c19SJulian Elischer  * Print out a syncer vnode.
2921b1897c19SJulian Elischer  */
2922db878ba4SEivind Eklund static int
2923b1897c19SJulian Elischer sync_print(ap)
2924b1897c19SJulian Elischer 	struct vop_print_args /* {
2925b1897c19SJulian Elischer 		struct vnode *a_vp;
2926b1897c19SJulian Elischer 	} */ *ap;
2927b1897c19SJulian Elischer {
2928b1897c19SJulian Elischer 	struct vnode *vp = ap->a_vp;
2929b1897c19SJulian Elischer 
2930b1897c19SJulian Elischer 	printf("syncer vnode");
2931b1897c19SJulian Elischer 	if (vp->v_vnlock != NULL)
2932b1897c19SJulian Elischer 		lockmgr_printinfo(vp->v_vnlock);
2933b1897c19SJulian Elischer 	printf("\n");
2934b1897c19SJulian Elischer 	return (0);
2935b1897c19SJulian Elischer }
29366ca54864SPoul-Henning Kamp 
29376ca54864SPoul-Henning Kamp /*
29386ca54864SPoul-Henning Kamp  * extract the dev_t from a VBLK or VCHR
29396ca54864SPoul-Henning Kamp  */
29406ca54864SPoul-Henning Kamp dev_t
29416ca54864SPoul-Henning Kamp vn_todev(vp)
29426ca54864SPoul-Henning Kamp 	struct vnode *vp;
29436ca54864SPoul-Henning Kamp {
29446ca54864SPoul-Henning Kamp 	if (vp->v_type != VBLK && vp->v_type != VCHR)
29456ca54864SPoul-Henning Kamp 		return (NODEV);
29466ca54864SPoul-Henning Kamp 	return (vp->v_rdev);
29476ca54864SPoul-Henning Kamp }
294841d2e3e0SPoul-Henning Kamp 
294941d2e3e0SPoul-Henning Kamp /*
295041d2e3e0SPoul-Henning Kamp  * Check if vnode represents a disk device
295141d2e3e0SPoul-Henning Kamp  */
295241d2e3e0SPoul-Henning Kamp int
2953ba4ad1fcSPoul-Henning Kamp vn_isdisk(vp, errp)
295441d2e3e0SPoul-Henning Kamp 	struct vnode *vp;
2955ba4ad1fcSPoul-Henning Kamp 	int *errp;
295641d2e3e0SPoul-Henning Kamp {
295764dc16dfSPoul-Henning Kamp 	struct cdevsw *cdevsw;
295864dc16dfSPoul-Henning Kamp 
2959ba4ad1fcSPoul-Henning Kamp 	if (vp->v_type != VBLK && vp->v_type != VCHR) {
2960ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2961ba4ad1fcSPoul-Henning Kamp 			*errp = ENOTBLK;
296241d2e3e0SPoul-Henning Kamp 		return (0);
2963ba4ad1fcSPoul-Henning Kamp 	}
2964b081a64aSChris Costello 	if (vp->v_rdev == NULL) {
2965b081a64aSChris Costello 		if (errp != NULL)
2966b081a64aSChris Costello 			*errp = ENXIO;
2967b081a64aSChris Costello 		return (0);
2968b081a64aSChris Costello 	}
296964dc16dfSPoul-Henning Kamp 	cdevsw = devsw(vp->v_rdev);
297064dc16dfSPoul-Henning Kamp 	if (cdevsw == NULL) {
2971ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2972ba4ad1fcSPoul-Henning Kamp 			*errp = ENXIO;
297341d2e3e0SPoul-Henning Kamp 		return (0);
2974ba4ad1fcSPoul-Henning Kamp 	}
297564dc16dfSPoul-Henning Kamp 	if (!(cdevsw->d_flags & D_DISK)) {
2976ba4ad1fcSPoul-Henning Kamp 		if (errp != NULL)
2977ba4ad1fcSPoul-Henning Kamp 			*errp = ENOTBLK;
297841d2e3e0SPoul-Henning Kamp 		return (0);
2979ba4ad1fcSPoul-Henning Kamp 	}
2980ba4ad1fcSPoul-Henning Kamp 	if (errp != NULL)
2981ba4ad1fcSPoul-Henning Kamp 		*errp = 0;
298241d2e3e0SPoul-Henning Kamp 	return (1);
298341d2e3e0SPoul-Henning Kamp }
298441d2e3e0SPoul-Henning Kamp 
2985453aaa0dSEivind Eklund /*
2986a863c0fbSEivind Eklund  * Free data allocated by namei(); see namei(9) for details.
2987453aaa0dSEivind Eklund  */
2988e12d97d2SEivind Eklund void
2989e12d97d2SEivind Eklund NDFREE(ndp, flags)
2990e12d97d2SEivind Eklund      struct nameidata *ndp;
2991e12d97d2SEivind Eklund      const uint flags;
2992e12d97d2SEivind Eklund {
2993e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_FREE_PNBUF) &&
2994e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & HASBUF)) {
2995e12d97d2SEivind Eklund 		zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
2996e12d97d2SEivind Eklund 		ndp->ni_cnd.cn_flags &= ~HASBUF;
2997e12d97d2SEivind Eklund 	}
2998e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_DVP_UNLOCK) &&
2999e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
3000e12d97d2SEivind Eklund 	    ndp->ni_dvp != ndp->ni_vp)
3001e12d97d2SEivind Eklund 		VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc);
3002e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_DVP_RELE) &&
3003e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
3004e12d97d2SEivind Eklund 		vrele(ndp->ni_dvp);
3005e12d97d2SEivind Eklund 		ndp->ni_dvp = NULL;
3006e12d97d2SEivind Eklund 	}
3007e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_VP_UNLOCK) &&
3008e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
3009e12d97d2SEivind Eklund 		VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc);
3010e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_VP_RELE) &&
3011e12d97d2SEivind Eklund 	    ndp->ni_vp) {
3012e12d97d2SEivind Eklund 		vrele(ndp->ni_vp);
3013e12d97d2SEivind Eklund 		ndp->ni_vp = NULL;
3014e12d97d2SEivind Eklund 	}
3015e12d97d2SEivind Eklund 	if (!(flags & NDF_NO_STARTDIR_RELE) &&
3016e12d97d2SEivind Eklund 	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
3017e12d97d2SEivind Eklund 		vrele(ndp->ni_startdir);
3018e12d97d2SEivind Eklund 		ndp->ni_startdir = NULL;
3019e12d97d2SEivind Eklund 	}
3020e12d97d2SEivind Eklund }
3021e39c53edSPoul-Henning Kamp 
3022e0848358SRobert Watson /*
3023e0848358SRobert Watson  * Common file system object access control check routine.  Accepts a
3024e0848358SRobert Watson  * vnode's type, "mode", uid and gid, requested access mode, credentials,
3025e0848358SRobert Watson  * and optional call-by-reference privused argument allowing vaccess()
3026e0848358SRobert Watson  * to indicate to the caller whether privilege was used to satisfy the
3027e0848358SRobert Watson  * request.  Returns 0 on success, or an errno on failure.
3028e0848358SRobert Watson  */
3029e39c53edSPoul-Henning Kamp int
3030012c643dSRobert Watson vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused)
3031e39c53edSPoul-Henning Kamp 	enum vtype type;
3032e39c53edSPoul-Henning Kamp 	mode_t file_mode;
3033012c643dSRobert Watson 	uid_t file_uid;
3034012c643dSRobert Watson 	gid_t file_gid;
3035e39c53edSPoul-Henning Kamp 	mode_t acc_mode;
3036e39c53edSPoul-Henning Kamp 	struct ucred *cred;
3037012c643dSRobert Watson 	int *privused;
3038e39c53edSPoul-Henning Kamp {
3039012c643dSRobert Watson 	mode_t dac_granted;
3040012c643dSRobert Watson #ifdef CAPABILITIES
3041012c643dSRobert Watson 	mode_t cap_granted;
3042012c643dSRobert Watson #endif
3043e39c53edSPoul-Henning Kamp 
3044e39c53edSPoul-Henning Kamp 	/*
3045012c643dSRobert Watson 	 * Look for a normal, non-privileged way to access the file/directory
3046012c643dSRobert Watson 	 * as requested.  If it exists, go with that.
3047e39c53edSPoul-Henning Kamp 	 */
3048012c643dSRobert Watson 
3049012c643dSRobert Watson 	if (privused != NULL)
3050012c643dSRobert Watson 		*privused = 0;
3051012c643dSRobert Watson 
3052012c643dSRobert Watson 	dac_granted = 0;
3053012c643dSRobert Watson 
3054012c643dSRobert Watson 	/* Check the owner. */
3055012c643dSRobert Watson 	if (cred->cr_uid == file_uid) {
3056012c643dSRobert Watson 		if (file_mode & S_IXUSR)
3057012c643dSRobert Watson 			dac_granted |= VEXEC;
3058012c643dSRobert Watson 		if (file_mode & S_IRUSR)
3059012c643dSRobert Watson 			dac_granted |= VREAD;
3060012c643dSRobert Watson 		if (file_mode & S_IWUSR)
3061012c643dSRobert Watson 			dac_granted |= VWRITE;
3062012c643dSRobert Watson 
3063012c643dSRobert Watson 		if ((acc_mode & dac_granted) == acc_mode)
3064e39c53edSPoul-Henning Kamp 			return (0);
3065e39c53edSPoul-Henning Kamp 
3066012c643dSRobert Watson 		goto privcheck;
3067e39c53edSPoul-Henning Kamp 	}
3068e39c53edSPoul-Henning Kamp 
3069012c643dSRobert Watson 	/* Otherwise, check the groups (first match) */
3070012c643dSRobert Watson 	if (groupmember(file_gid, cred)) {
3071012c643dSRobert Watson 		if (file_mode & S_IXGRP)
3072012c643dSRobert Watson 			dac_granted |= VEXEC;
3073012c643dSRobert Watson 		if (file_mode & S_IRGRP)
3074012c643dSRobert Watson 			dac_granted |= VREAD;
3075012c643dSRobert Watson 		if (file_mode & S_IWGRP)
3076012c643dSRobert Watson 			dac_granted |= VWRITE;
3077012c643dSRobert Watson 
3078012c643dSRobert Watson 		if ((acc_mode & dac_granted) == acc_mode)
3079012c643dSRobert Watson 			return (0);
3080012c643dSRobert Watson 
3081012c643dSRobert Watson 		goto privcheck;
3082e39c53edSPoul-Henning Kamp 	}
3083e39c53edSPoul-Henning Kamp 
3084e39c53edSPoul-Henning Kamp 	/* Otherwise, check everyone else. */
3085012c643dSRobert Watson 	if (file_mode & S_IXOTH)
3086012c643dSRobert Watson 		dac_granted |= VEXEC;
3087012c643dSRobert Watson 	if (file_mode & S_IROTH)
3088012c643dSRobert Watson 		dac_granted |= VREAD;
3089012c643dSRobert Watson 	if (file_mode & S_IWOTH)
3090012c643dSRobert Watson 		dac_granted |= VWRITE;
3091012c643dSRobert Watson 	if ((acc_mode & dac_granted) == acc_mode)
3092012c643dSRobert Watson 		return (0);
3093012c643dSRobert Watson 
3094012c643dSRobert Watson privcheck:
3095012c643dSRobert Watson 	if (!suser_xxx(cred, NULL, PRISON_ROOT)) {
3096012c643dSRobert Watson 		/* XXX audit: privilege used */
3097012c643dSRobert Watson 		if (privused != NULL)
3098012c643dSRobert Watson 			*privused = 1;
3099012c643dSRobert Watson 		return (0);
3100012c643dSRobert Watson 	}
3101012c643dSRobert Watson 
3102012c643dSRobert Watson #ifdef CAPABILITIES
3103012c643dSRobert Watson 	/*
3104012c643dSRobert Watson 	 * Build a capability mask to determine if the set of capabilities
3105012c643dSRobert Watson 	 * satisfies the requirements when combined with the granted mask
3106012c643dSRobert Watson 	 * from above.
3107012c643dSRobert Watson 	 * For each capability, if the capability is required, bitwise
3108012c643dSRobert Watson 	 * or the request type onto the cap_granted mask.
3109012c643dSRobert Watson 	 */
3110012c643dSRobert Watson 	cap_granted = 0;
3111012c643dSRobert Watson 	if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
3112728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT))
3113012c643dSRobert Watson 	    cap_granted |= VEXEC;
3114012c643dSRobert Watson 
3115012c643dSRobert Watson 	if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) &&
3116728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT))
3117012c643dSRobert Watson 		cap_granted |= VREAD;
3118012c643dSRobert Watson 
3119012c643dSRobert Watson 	if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
3120728783c2SRobert Watson 	    !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT))
3121012c643dSRobert Watson 		cap_granted |= VWRITE;
3122012c643dSRobert Watson 
3123728783c2SRobert Watson 	if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) {
3124012c643dSRobert Watson 		/* XXX audit: privilege used */
3125012c643dSRobert Watson 		if (privused != NULL)
3126012c643dSRobert Watson 			*privused = 1;
3127012c643dSRobert Watson 		return (0);
3128012c643dSRobert Watson 	}
3129012c643dSRobert Watson #endif
3130012c643dSRobert Watson 
3131012c643dSRobert Watson 	return (EACCES);
3132e39c53edSPoul-Henning Kamp }
3133