1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1989, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38996c772fSJohn Dyson * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39c3aac50fSPeter Wemm * $FreeBSD$ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 42df8bae1dSRodney W. Grimes /* 43df8bae1dSRodney W. Grimes * External virtual filesystem routines 44df8bae1dSRodney W. Grimes */ 450e41ee30SGarrett Wollman #include "opt_ddb.h" 463275cf73SPoul-Henning Kamp #include "opt_ffs.h" 47df8bae1dSRodney W. Grimes 48df8bae1dSRodney W. Grimes #include <sys/param.h> 49df8bae1dSRodney W. Grimes #include <sys/systm.h> 509626b608SPoul-Henning Kamp #include <sys/bio.h> 515e950839SLuoqi Chen #include <sys/buf.h> 525e950839SLuoqi Chen #include <sys/conf.h> 535e950839SLuoqi Chen #include <sys/dirent.h> 545e950839SLuoqi Chen #include <sys/domain.h> 555e950839SLuoqi Chen #include <sys/eventhandler.h> 564d948813SBruce Evans #include <sys/fcntl.h> 57986f4ce7SBruce Evans #include <sys/kernel.h> 589c8b8baaSPeter Wemm #include <sys/kthread.h> 590384fff8SJason Evans #include <sys/ktr.h> 60a1c995b6SPoul-Henning Kamp #include <sys/malloc.h> 61df8bae1dSRodney W. Grimes #include <sys/mount.h> 62e12d97d2SEivind Eklund #include <sys/namei.h> 635e950839SLuoqi Chen #include <sys/proc.h> 645e950839SLuoqi Chen #include <sys/reboot.h> 65771b51efSBruce Evans #include <sys/socket.h> 66df8bae1dSRodney W. Grimes #include <sys/stat.h> 675e950839SLuoqi Chen #include <sys/sysctl.h> 682be70f79SJohn Dyson #include <sys/vmmeter.h> 695e950839SLuoqi Chen #include <sys/vnode.h> 70df8bae1dSRodney W. Grimes 71d3114049SBruce Evans #include <machine/limits.h> 720384fff8SJason Evans #include <machine/mutex.h> 73d3114049SBruce Evans 74df8bae1dSRodney W. Grimes #include <vm/vm.h> 75efeaf95aSDavid Greenman #include <vm/vm_object.h> 76efeaf95aSDavid Greenman #include <vm/vm_extern.h> 771efb74fbSJohn Dyson #include <vm/pmap.h> 781efb74fbSJohn Dyson #include <vm/vm_map.h> 791c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 8047221757SJohn Dyson #include <vm/vm_pager.h> 816476c0d2SJohn Dyson #include <vm/vnode_pager.h> 822d8acc0fSJohn Dyson #include <vm/vm_zone.h> 83df8bae1dSRodney W. Grimes 84a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 8555166637SPoul-Henning Kamp 86bba25953SEivind Eklund static void addalias __P((struct vnode *vp, dev_t nvp_rdev)); 87cb451ebdSBruce Evans static void insmntque __P((struct vnode *vp, struct mount *mp)); 88996c772fSJohn Dyson static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 89453aaa0dSEivind Eklund 90453aaa0dSEivind Eklund /* 91453aaa0dSEivind Eklund * Number of vnodes in existence. Increased whenever getnewvnode() 92453aaa0dSEivind Eklund * allocates a new vnode, never decreased. 93453aaa0dSEivind Eklund */ 94cb451ebdSBruce Evans static unsigned long numvnodes; 95b15a966eSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 9698d93822SBruce Evans 97453aaa0dSEivind Eklund /* 98453aaa0dSEivind Eklund * Conversion tables for conversion from vnode types to inode formats 99453aaa0dSEivind Eklund * and back. 100453aaa0dSEivind Eklund */ 101df8bae1dSRodney W. Grimes enum vtype iftovt_tab[16] = { 102df8bae1dSRodney W. Grimes VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 103df8bae1dSRodney W. Grimes VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 104df8bae1dSRodney W. Grimes }; 105df8bae1dSRodney W. Grimes int vttoif_tab[9] = { 106df8bae1dSRodney W. Grimes 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 107df8bae1dSRodney W. Grimes S_IFSOCK, S_IFIFO, S_IFMT, 108df8bae1dSRodney W. Grimes }; 109df8bae1dSRodney W. Grimes 110a863c0fbSEivind Eklund /* 111a863c0fbSEivind Eklund * List of vnodes that are ready for recycling. 112a863c0fbSEivind Eklund */ 113453aaa0dSEivind Eklund static TAILQ_HEAD(freelst, vnode) vnode_free_list; 114925a3a41SJohn Dyson 115453aaa0dSEivind Eklund /* 116453aaa0dSEivind Eklund * Minimum number of free vnodes. If there are fewer than this free vnodes, 117453aaa0dSEivind Eklund * getnewvnode() will return a newly allocated vnode. 118453aaa0dSEivind Eklund */ 11987b1940aSPoul-Henning Kamp static u_long wantfreevnodes = 25; 12000544193SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 121453aaa0dSEivind Eklund /* Number of vnodes in the free list. */ 122cba2a7c6SBruce Evans static u_long freevnodes = 0; 123a051452aSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 124fbd6e6c9SPoul-Henning Kamp 125453aaa0dSEivind Eklund /* 126a863c0fbSEivind Eklund * Various variables used for debugging the new implementation of 127a863c0fbSEivind Eklund * reassignbuf(). 128a863c0fbSEivind Eklund * XXX these are probably of (very) limited utility now. 129453aaa0dSEivind Eklund */ 130e929c00dSKirk McKusick static int reassignbufcalls; 131e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 132e929c00dSKirk McKusick static int reassignbufloops; 133e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 134e929c00dSKirk McKusick static int reassignbufsortgood; 135e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 136e929c00dSKirk McKusick static int reassignbufsortbad; 137e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 138453aaa0dSEivind Eklund /* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */ 139e929c00dSKirk McKusick static int reassignbufmethod = 1; 140e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 141e929c00dSKirk McKusick 142bef608bdSJohn Dyson #ifdef ENABLE_VFS_IOOPT 143a863c0fbSEivind Eklund /* See NOTES for a description of this setting. */ 144ad8ac923SKirk McKusick int vfs_ioopt = 0; 14560f8d464SJohn Dyson SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 14626300b34SJohn Dyson #endif 14760f8d464SJohn Dyson 148a863c0fbSEivind Eklund /* List of mounted filesystems. */ 149453aaa0dSEivind Eklund struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 150a863c0fbSEivind Eklund 151453aaa0dSEivind Eklund /* For any iteration/modification of mountlist */ 152a18b1f1dSJason Evans struct mtx mountlist_mtx; 153a863c0fbSEivind Eklund 154453aaa0dSEivind Eklund /* For any iteration/modification of mnt_vnodelist */ 155996c772fSJohn Dyson struct simplelock mntvnode_slock; 156453aaa0dSEivind Eklund /* 157453aaa0dSEivind Eklund * Cache for the mount type id assigned to NFS. This is used for 158453aaa0dSEivind Eklund * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c. 159453aaa0dSEivind Eklund */ 160500b04a2SBruce Evans int nfs_mount_type = -1; 161453aaa0dSEivind Eklund 162289bdf33SBruce Evans #ifndef NULL_SIMPLELOCKS 163453aaa0dSEivind Eklund /* To keep more than one thread at a time from running vfs_getnewfsid */ 164289bdf33SBruce Evans static struct simplelock mntid_slock; 165a863c0fbSEivind Eklund 166453aaa0dSEivind Eklund /* For any iteration/modification of vnode_free_list */ 167303b270bSEivind Eklund static struct simplelock vnode_free_list_slock; 168453aaa0dSEivind Eklund 169453aaa0dSEivind Eklund /* 170453aaa0dSEivind Eklund * For any iteration/modification of dev->si_hlist (linked through 171453aaa0dSEivind Eklund * v_specnext) 172453aaa0dSEivind Eklund */ 173996c772fSJohn Dyson static struct simplelock spechash_slock; 174289bdf33SBruce Evans #endif 175453aaa0dSEivind Eklund 176453aaa0dSEivind Eklund /* Publicly exported FS */ 177453aaa0dSEivind Eklund struct nfs_public nfs_pub; 178a863c0fbSEivind Eklund 179453aaa0dSEivind Eklund /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ 1802d8acc0fSJohn Dyson static vm_zone_t vnode_zone; 181a863c0fbSEivind Eklund 182453aaa0dSEivind Eklund /* Set to 1 to print out reclaim of active vnodes */ 183453aaa0dSEivind Eklund int prtactive = 0; 184df8bae1dSRodney W. Grimes 185b1897c19SJulian Elischer /* 186b1897c19SJulian Elischer * The workitem queue. 187453aaa0dSEivind Eklund * 188453aaa0dSEivind Eklund * It is useful to delay writes of file data and filesystem metadata 189453aaa0dSEivind Eklund * for tens of seconds so that quickly created and deleted files need 190453aaa0dSEivind Eklund * not waste disk bandwidth being created and removed. To realize this, 191453aaa0dSEivind Eklund * we append vnodes to a "workitem" queue. When running with a soft 192453aaa0dSEivind Eklund * updates implementation, most pending metadata dependencies should 193453aaa0dSEivind Eklund * not wait for more than a few seconds. Thus, mounted on block devices 194453aaa0dSEivind Eklund * are delayed only about a half the time that file data is delayed. 195453aaa0dSEivind Eklund * Similarly, directory updates are more critical, so are only delayed 196453aaa0dSEivind Eklund * about a third the time that file data is delayed. Thus, there are 197453aaa0dSEivind Eklund * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 198453aaa0dSEivind Eklund * one each second (driven off the filesystem syncer process). The 199453aaa0dSEivind Eklund * syncer_delayno variable indicates the next queue that is to be processed. 200453aaa0dSEivind Eklund * Items that need to be processed soon are placed in this queue: 201453aaa0dSEivind Eklund * 202453aaa0dSEivind Eklund * syncer_workitem_pending[syncer_delayno] 203453aaa0dSEivind Eklund * 204453aaa0dSEivind Eklund * A delay of fifteen seconds is done by placing the request fifteen 205453aaa0dSEivind Eklund * entries later in the queue: 206453aaa0dSEivind Eklund * 207453aaa0dSEivind Eklund * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 208453aaa0dSEivind Eklund * 209b1897c19SJulian Elischer */ 210453aaa0dSEivind Eklund static int syncer_delayno = 0; 211453aaa0dSEivind Eklund static long syncer_mask; 212453aaa0dSEivind Eklund LIST_HEAD(synclist, vnode); 213453aaa0dSEivind Eklund static struct synclist *syncer_workitem_pending; 214453aaa0dSEivind Eklund 215b1897c19SJulian Elischer #define SYNCER_MAXDELAY 32 216db878ba4SEivind Eklund static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 217e4ab40bcSKirk McKusick time_t syncdelay = 30; /* max time to delay syncing data */ 218e4ab40bcSKirk McKusick time_t filedelay = 30; /* time to delay syncing files */ 219e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 22067812eacSKirk McKusick time_t dirdelay = 29; /* time to delay syncing directories */ 221e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 22267812eacSKirk McKusick time_t metadelay = 28; /* time to delay syncing metadata */ 223e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 224e4ab40bcSKirk McKusick static int rushjob; /* number of slots to run ASAP */ 225e4ab40bcSKirk McKusick static int stat_rush_requests; /* number of times I/O speeded up */ 226e4ab40bcSKirk McKusick SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 227b1897c19SJulian Elischer 228453aaa0dSEivind Eklund /* 229a863c0fbSEivind Eklund * Number of vnodes we want to exist at any one time. This is mostly used 230a863c0fbSEivind Eklund * to size hash tables in vnode-related code. It is normally not used in 231a863c0fbSEivind Eklund * getnewvnode(), as wantfreevnodes is normally nonzero.) 232a863c0fbSEivind Eklund * 233a863c0fbSEivind Eklund * XXX desiredvnodes is historical cruft and should not exist. 234453aaa0dSEivind Eklund */ 2350d94caffSDavid Greenman int desiredvnodes; 2363d177f46SBill Fumerola SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 2373d177f46SBill Fumerola &desiredvnodes, 0, "Maximum number of vnodes"); 2380d94caffSDavid Greenman 23998d93822SBruce Evans static void vfs_free_addrlist __P((struct netexport *nep)); 24098d93822SBruce Evans static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 24198d93822SBruce Evans static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 24298d93822SBruce Evans struct export_args *argp)); 24398d93822SBruce Evans 244df8bae1dSRodney W. Grimes /* 245df8bae1dSRodney W. Grimes * Initialize the vnode management data structures. 246df8bae1dSRodney W. Grimes */ 24726f9a767SRodney W. Grimes void 248df8bae1dSRodney W. Grimes vntblinit() 249df8bae1dSRodney W. Grimes { 250df8bae1dSRodney W. Grimes 2512be70f79SJohn Dyson desiredvnodes = maxproc + cnt.v_page_count / 4; 252a18b1f1dSJason Evans mtx_init(&mountlist_mtx, "mountlist", MTX_DEF); 253996c772fSJohn Dyson simple_lock_init(&mntvnode_slock); 254996c772fSJohn Dyson simple_lock_init(&mntid_slock); 255996c772fSJohn Dyson simple_lock_init(&spechash_slock); 256df8bae1dSRodney W. Grimes TAILQ_INIT(&vnode_free_list); 257996c772fSJohn Dyson simple_lock_init(&vnode_free_list_slock); 2582d8acc0fSJohn Dyson vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 259b1897c19SJulian Elischer /* 260b1897c19SJulian Elischer * Initialize the filesystem syncer. 261b1897c19SJulian Elischer */ 262b1897c19SJulian Elischer syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 263b1897c19SJulian Elischer &syncer_mask); 264b1897c19SJulian Elischer syncer_maxdelay = syncer_mask + 1; 265df8bae1dSRodney W. Grimes } 266df8bae1dSRodney W. Grimes 267df8bae1dSRodney W. Grimes /* 268996c772fSJohn Dyson * Mark a mount point as busy. Used to synchronize access and to delay 269996c772fSJohn Dyson * unmounting. Interlock is not released on failure. 270df8bae1dSRodney W. Grimes */ 27126f9a767SRodney W. Grimes int 272996c772fSJohn Dyson vfs_busy(mp, flags, interlkp, p) 273996c772fSJohn Dyson struct mount *mp; 274996c772fSJohn Dyson int flags; 275a18b1f1dSJason Evans struct mtx *interlkp; 276996c772fSJohn Dyson struct proc *p; 277df8bae1dSRodney W. Grimes { 278996c772fSJohn Dyson int lkflags; 279df8bae1dSRodney W. Grimes 280b1f4a44bSJulian Elischer if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 281996c772fSJohn Dyson if (flags & LK_NOWAIT) 282996c772fSJohn Dyson return (ENOENT); 283b1f4a44bSJulian Elischer mp->mnt_kern_flag |= MNTK_MWAIT; 284996c772fSJohn Dyson if (interlkp) { 285a18b1f1dSJason Evans mtx_exit(interlkp, MTX_DEF); 286df8bae1dSRodney W. Grimes } 287df8bae1dSRodney W. Grimes /* 288996c772fSJohn Dyson * Since all busy locks are shared except the exclusive 289996c772fSJohn Dyson * lock granted when unmounting, the only place that a 290996c772fSJohn Dyson * wakeup needs to be done is at the release of the 291996c772fSJohn Dyson * exclusive lock at the end of dounmount. 292df8bae1dSRodney W. Grimes */ 293996c772fSJohn Dyson tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 294996c772fSJohn Dyson if (interlkp) { 295a18b1f1dSJason Evans mtx_enter(interlkp, MTX_DEF); 296df8bae1dSRodney W. Grimes } 297996c772fSJohn Dyson return (ENOENT); 298df8bae1dSRodney W. Grimes } 2998f9110f6SJohn Dyson lkflags = LK_SHARED | LK_NOPAUSE; 300996c772fSJohn Dyson if (interlkp) 301996c772fSJohn Dyson lkflags |= LK_INTERLOCK; 302996c772fSJohn Dyson if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 303996c772fSJohn Dyson panic("vfs_busy: unexpected lock failure"); 304df8bae1dSRodney W. Grimes return (0); 305df8bae1dSRodney W. Grimes } 306df8bae1dSRodney W. Grimes 307df8bae1dSRodney W. Grimes /* 308df8bae1dSRodney W. Grimes * Free a busy filesystem. 309df8bae1dSRodney W. Grimes */ 31026f9a767SRodney W. Grimes void 311996c772fSJohn Dyson vfs_unbusy(mp, p) 312996c772fSJohn Dyson struct mount *mp; 313996c772fSJohn Dyson struct proc *p; 314df8bae1dSRodney W. Grimes { 315df8bae1dSRodney W. Grimes 316996c772fSJohn Dyson lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 317e0e9c421SDavid Greenman } 318e0e9c421SDavid Greenman 319e0e9c421SDavid Greenman /* 320996c772fSJohn Dyson * Lookup a filesystem type, and if found allocate and initialize 321996c772fSJohn Dyson * a mount structure for it. 322996c772fSJohn Dyson * 323996c772fSJohn Dyson * Devname is usually updated by mount(8) after booting. 324e0e9c421SDavid Greenman */ 325996c772fSJohn Dyson int 326996c772fSJohn Dyson vfs_rootmountalloc(fstypename, devname, mpp) 327996c772fSJohn Dyson char *fstypename; 328996c772fSJohn Dyson char *devname; 329996c772fSJohn Dyson struct mount **mpp; 330e0e9c421SDavid Greenman { 331996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 332996c772fSJohn Dyson struct vfsconf *vfsp; 333996c772fSJohn Dyson struct mount *mp; 334996c772fSJohn Dyson 335ecbb00a2SDoug Rabson if (fstypename == NULL) 336ecbb00a2SDoug Rabson return (ENODEV); 337996c772fSJohn Dyson for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 338996c772fSJohn Dyson if (!strcmp(vfsp->vfc_name, fstypename)) 339996c772fSJohn Dyson break; 340996c772fSJohn Dyson if (vfsp == NULL) 341996c772fSJohn Dyson return (ENODEV); 342996c772fSJohn Dyson mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 343996c772fSJohn Dyson bzero((char *)mp, (u_long)sizeof(struct mount)); 3448f9110f6SJohn Dyson lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 345996c772fSJohn Dyson (void)vfs_busy(mp, LK_NOWAIT, 0, p); 346996c772fSJohn Dyson LIST_INIT(&mp->mnt_vnodelist); 347996c772fSJohn Dyson mp->mnt_vfc = vfsp; 348996c772fSJohn Dyson mp->mnt_op = vfsp->vfc_vfsops; 349996c772fSJohn Dyson mp->mnt_flag = MNT_RDONLY; 350996c772fSJohn Dyson mp->mnt_vnodecovered = NULLVP; 351996c772fSJohn Dyson vfsp->vfc_refcount++; 3521b5464efSPoul-Henning Kamp mp->mnt_iosize_max = DFLTPHYS; 353996c772fSJohn Dyson mp->mnt_stat.f_type = vfsp->vfc_typenum; 354996c772fSJohn Dyson mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 355996c772fSJohn Dyson strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 356996c772fSJohn Dyson mp->mnt_stat.f_mntonname[0] = '/'; 357996c772fSJohn Dyson mp->mnt_stat.f_mntonname[1] = 0; 358996c772fSJohn Dyson (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 359996c772fSJohn Dyson *mpp = mp; 360996c772fSJohn Dyson return (0); 361996c772fSJohn Dyson } 362996c772fSJohn Dyson 363996c772fSJohn Dyson /* 364996c772fSJohn Dyson * Find an appropriate filesystem to use for the root. If a filesystem 365996c772fSJohn Dyson * has not been preselected, walk through the list of known filesystems 366996c772fSJohn Dyson * trying those that have mountroot routines, and try them until one 367996c772fSJohn Dyson * works or we have tried them all. 368996c772fSJohn Dyson */ 369996c772fSJohn Dyson #ifdef notdef /* XXX JH */ 370996c772fSJohn Dyson int 371514ede09SBruce Evans lite2_vfs_mountroot() 372996c772fSJohn Dyson { 373996c772fSJohn Dyson struct vfsconf *vfsp; 374514ede09SBruce Evans extern int (*lite2_mountroot) __P((void)); 375e0e9c421SDavid Greenman int error; 376e0e9c421SDavid Greenman 377996c772fSJohn Dyson if (lite2_mountroot != NULL) 378996c772fSJohn Dyson return ((*lite2_mountroot)()); 379996c772fSJohn Dyson for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 380996c772fSJohn Dyson if (vfsp->vfc_mountroot == NULL) 381e0e9c421SDavid Greenman continue; 382996c772fSJohn Dyson if ((error = (*vfsp->vfc_mountroot)()) == 0) 383996c772fSJohn Dyson return (0); 384996c772fSJohn Dyson printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 385e0e9c421SDavid Greenman } 386996c772fSJohn Dyson return (ENODEV); 387e0e9c421SDavid Greenman } 388996c772fSJohn Dyson #endif 389e0e9c421SDavid Greenman 390df8bae1dSRodney W. Grimes /* 391df8bae1dSRodney W. Grimes * Lookup a mount point by filesystem identifier. 392df8bae1dSRodney W. Grimes */ 393df8bae1dSRodney W. Grimes struct mount * 394996c772fSJohn Dyson vfs_getvfs(fsid) 395df8bae1dSRodney W. Grimes fsid_t *fsid; 396df8bae1dSRodney W. Grimes { 397df8bae1dSRodney W. Grimes register struct mount *mp; 398df8bae1dSRodney W. Grimes 399a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 4000429e37aSPoul-Henning Kamp TAILQ_FOREACH(mp, &mountlist, mnt_list) { 401df8bae1dSRodney W. Grimes if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 402996c772fSJohn Dyson mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 403a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 404df8bae1dSRodney W. Grimes return (mp); 405df8bae1dSRodney W. Grimes } 406996c772fSJohn Dyson } 407a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 408df8bae1dSRodney W. Grimes return ((struct mount *) 0); 409df8bae1dSRodney W. Grimes } 410df8bae1dSRodney W. Grimes 411df8bae1dSRodney W. Grimes /* 41205ecdd70SBruce Evans * Get a new unique fsid. Try to make its val[0] unique, since this value 41305ecdd70SBruce Evans * will be used to create fake device numbers for stat(). Also try (but 41405ecdd70SBruce Evans * not so hard) make its val[0] unique mod 2^16, since some emulators only 41505ecdd70SBruce Evans * support 16-bit device numbers. We end up with unique val[0]'s for the 41605ecdd70SBruce Evans * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. 417e6f71111SMatthew Dillon * 41861214975SBruce Evans * Keep in mind that several mounts may be running in parallel. Starting 41905ecdd70SBruce Evans * the search one past where the previous search terminated is both a 42005ecdd70SBruce Evans * micro-optimization and a defense against returning the same fsid to 42105ecdd70SBruce Evans * different mounts. 422df8bae1dSRodney W. Grimes */ 423df8bae1dSRodney W. Grimes void 424996c772fSJohn Dyson vfs_getnewfsid(mp) 425df8bae1dSRodney W. Grimes struct mount *mp; 426df8bae1dSRodney W. Grimes { 42705ecdd70SBruce Evans static u_int16_t mntid_base; 428df8bae1dSRodney W. Grimes fsid_t tfsid; 42905ecdd70SBruce Evans int mtype; 430df8bae1dSRodney W. Grimes 431996c772fSJohn Dyson simple_lock(&mntid_slock); 432996c772fSJohn Dyson mtype = mp->mnt_vfc->vfc_typenum; 433df8bae1dSRodney W. Grimes tfsid.val[1] = mtype; 4343660ebc2SBoris Popov mtype = (mtype & 0xFF) << 24; 43505ecdd70SBruce Evans for (;;) { 4363660ebc2SBoris Popov tfsid.val[0] = makeudev(255, 4373660ebc2SBoris Popov mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); 4383660ebc2SBoris Popov mntid_base++; 439e6f71111SMatthew Dillon if (vfs_getvfs(&tfsid) == NULL) 440e6f71111SMatthew Dillon break; 441df8bae1dSRodney W. Grimes } 442df8bae1dSRodney W. Grimes mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 443e6f71111SMatthew Dillon mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; 444996c772fSJohn Dyson simple_unlock(&mntid_slock); 445df8bae1dSRodney W. Grimes } 446df8bae1dSRodney W. Grimes 447df8bae1dSRodney W. Grimes /* 448a2801b77SJohn Polstra * Knob to control the precision of file timestamps: 449a2801b77SJohn Polstra * 450a2801b77SJohn Polstra * 0 = seconds only; nanoseconds zeroed. 451a2801b77SJohn Polstra * 1 = seconds and nanoseconds, accurate within 1/HZ. 452a2801b77SJohn Polstra * 2 = seconds and nanoseconds, truncated to microseconds. 453a2801b77SJohn Polstra * >=3 = seconds and nanoseconds, maximum precision. 454a2801b77SJohn Polstra */ 455a2801b77SJohn Polstra enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 456a2801b77SJohn Polstra 457a2801b77SJohn Polstra static int timestamp_precision = TSP_SEC; 458a2801b77SJohn Polstra SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 459a2801b77SJohn Polstra ×tamp_precision, 0, ""); 460a2801b77SJohn Polstra 461a2801b77SJohn Polstra /* 462a2801b77SJohn Polstra * Get a current timestamp. 463a2801b77SJohn Polstra */ 464a2801b77SJohn Polstra void 465a2801b77SJohn Polstra vfs_timestamp(tsp) 466a2801b77SJohn Polstra struct timespec *tsp; 467a2801b77SJohn Polstra { 468a2801b77SJohn Polstra struct timeval tv; 469a2801b77SJohn Polstra 470a2801b77SJohn Polstra switch (timestamp_precision) { 471a2801b77SJohn Polstra case TSP_SEC: 472a2801b77SJohn Polstra tsp->tv_sec = time_second; 473a2801b77SJohn Polstra tsp->tv_nsec = 0; 474a2801b77SJohn Polstra break; 475a2801b77SJohn Polstra case TSP_HZ: 476a2801b77SJohn Polstra getnanotime(tsp); 477a2801b77SJohn Polstra break; 478a2801b77SJohn Polstra case TSP_USEC: 479a2801b77SJohn Polstra microtime(&tv); 480a2801b77SJohn Polstra TIMEVAL_TO_TIMESPEC(&tv, tsp); 481a2801b77SJohn Polstra break; 482a2801b77SJohn Polstra case TSP_NSEC: 483a2801b77SJohn Polstra default: 484a2801b77SJohn Polstra nanotime(tsp); 485a2801b77SJohn Polstra break; 486a2801b77SJohn Polstra } 487a2801b77SJohn Polstra } 488a2801b77SJohn Polstra 489a2801b77SJohn Polstra /* 490df8bae1dSRodney W. Grimes * Set vnode attributes to VNOVAL 491df8bae1dSRodney W. Grimes */ 49226f9a767SRodney W. Grimes void 49326f9a767SRodney W. Grimes vattr_null(vap) 494df8bae1dSRodney W. Grimes register struct vattr *vap; 495df8bae1dSRodney W. Grimes { 496df8bae1dSRodney W. Grimes 497df8bae1dSRodney W. Grimes vap->va_type = VNON; 49826f9a767SRodney W. Grimes vap->va_size = VNOVAL; 49926f9a767SRodney W. Grimes vap->va_bytes = VNOVAL; 5007a6c46b5SDoug Rabson vap->va_mode = VNOVAL; 5017a6c46b5SDoug Rabson vap->va_nlink = VNOVAL; 5027a6c46b5SDoug Rabson vap->va_uid = VNOVAL; 5037a6c46b5SDoug Rabson vap->va_gid = VNOVAL; 5047a6c46b5SDoug Rabson vap->va_fsid = VNOVAL; 5057a6c46b5SDoug Rabson vap->va_fileid = VNOVAL; 5067a6c46b5SDoug Rabson vap->va_blocksize = VNOVAL; 5077a6c46b5SDoug Rabson vap->va_rdev = VNOVAL; 5087a6c46b5SDoug Rabson vap->va_atime.tv_sec = VNOVAL; 5097a6c46b5SDoug Rabson vap->va_atime.tv_nsec = VNOVAL; 5107a6c46b5SDoug Rabson vap->va_mtime.tv_sec = VNOVAL; 5117a6c46b5SDoug Rabson vap->va_mtime.tv_nsec = VNOVAL; 5127a6c46b5SDoug Rabson vap->va_ctime.tv_sec = VNOVAL; 5137a6c46b5SDoug Rabson vap->va_ctime.tv_nsec = VNOVAL; 5147a6c46b5SDoug Rabson vap->va_flags = VNOVAL; 5157a6c46b5SDoug Rabson vap->va_gen = VNOVAL; 516df8bae1dSRodney W. Grimes vap->va_vaflags = 0; 517df8bae1dSRodney W. Grimes } 518df8bae1dSRodney W. Grimes 519df8bae1dSRodney W. Grimes /* 520df8bae1dSRodney W. Grimes * Routines having to do with the management of the vnode table. 521df8bae1dSRodney W. Grimes */ 522df8bae1dSRodney W. Grimes 523df8bae1dSRodney W. Grimes /* 524df8bae1dSRodney W. Grimes * Return the next vnode from the free list. 525df8bae1dSRodney W. Grimes */ 52626f9a767SRodney W. Grimes int 527df8bae1dSRodney W. Grimes getnewvnode(tag, mp, vops, vpp) 528df8bae1dSRodney W. Grimes enum vtagtype tag; 529df8bae1dSRodney W. Grimes struct mount *mp; 530f57e6547SBruce Evans vop_t **vops; 531df8bae1dSRodney W. Grimes struct vnode **vpp; 532df8bae1dSRodney W. Grimes { 533c904bbbdSKirk McKusick int s, count; 534996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 535c904bbbdSKirk McKusick struct vnode *vp = NULL; 536f2a2857bSKirk McKusick struct mount *vnmp; 53795e5e988SJohn Dyson vm_object_t object; 538df8bae1dSRodney W. Grimes 539b15a966eSPoul-Henning Kamp /* 540b15a966eSPoul-Henning Kamp * We take the least recently used vnode from the freelist 541b15a966eSPoul-Henning Kamp * if we can get it and it has no cached pages, and no 542b15a966eSPoul-Henning Kamp * namecache entries are relative to it. 543b15a966eSPoul-Henning Kamp * Otherwise we allocate a new vnode 544b15a966eSPoul-Henning Kamp */ 545b15a966eSPoul-Henning Kamp 546925a3a41SJohn Dyson s = splbio(); 547996c772fSJohn Dyson simple_lock(&vnode_free_list_slock); 548925a3a41SJohn Dyson 54900544193SPoul-Henning Kamp if (wantfreevnodes && freevnodes < wantfreevnodes) { 55000544193SPoul-Henning Kamp vp = NULL; 551d047b580SPoul-Henning Kamp } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 552d047b580SPoul-Henning Kamp /* 553d047b580SPoul-Henning Kamp * XXX: this is only here to be backwards compatible 554d047b580SPoul-Henning Kamp */ 55500544193SPoul-Henning Kamp vp = NULL; 556c904bbbdSKirk McKusick } else for (count = 0; count < freevnodes; count++) { 557c904bbbdSKirk McKusick vp = TAILQ_FIRST(&vnode_free_list); 558c904bbbdSKirk McKusick if (vp == NULL || vp->v_usecount) 559c904bbbdSKirk McKusick panic("getnewvnode: free vnode isn't"); 56095e5e988SJohn Dyson TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 561c904bbbdSKirk McKusick /* 562c904bbbdSKirk McKusick * Don't recycle if active in the namecache or 563c904bbbdSKirk McKusick * if it still has cached pages or we cannot get 564c904bbbdSKirk McKusick * its interlock. 565c904bbbdSKirk McKusick */ 566c904bbbdSKirk McKusick if (LIST_FIRST(&vp->v_cache_src) != NULL || 5679ff5ce6bSBoris Popov (VOP_GETVOBJECT(vp, &object) == 0 && 5689ff5ce6bSBoris Popov (object->resident_page_count || object->ref_count)) || 569a18b1f1dSJason Evans !mtx_try_enter(&vp->v_interlock, MTX_DEF)) { 570c904bbbdSKirk McKusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 571c904bbbdSKirk McKusick vp = NULL; 572b15a966eSPoul-Henning Kamp continue; 573c904bbbdSKirk McKusick } 574f2a2857bSKirk McKusick /* 575f2a2857bSKirk McKusick * Skip over it if its filesystem is being suspended. 576f2a2857bSKirk McKusick */ 577f2a2857bSKirk McKusick if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0) 578b15a966eSPoul-Henning Kamp break; 579a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 580f2a2857bSKirk McKusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 581f2a2857bSKirk McKusick vp = NULL; 582b15a966eSPoul-Henning Kamp } 583b15a966eSPoul-Henning Kamp if (vp) { 584a051452aSPoul-Henning Kamp vp->v_flag |= VDOOMED; 585b15a966eSPoul-Henning Kamp freevnodes--; 586996c772fSJohn Dyson simple_unlock(&vnode_free_list_slock); 587a051452aSPoul-Henning Kamp cache_purge(vp); 588df8bae1dSRodney W. Grimes vp->v_lease = NULL; 5892be70f79SJohn Dyson if (vp->v_type != VBAD) { 590996c772fSJohn Dyson vgonel(vp, p); 5912be70f79SJohn Dyson } else { 592a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 593996c772fSJohn Dyson } 594f2a2857bSKirk McKusick vn_finished_write(vnmp); 595bd7e5f99SJohn Dyson 5965526d2d9SEivind Eklund #ifdef INVARIANTS 597797f2d22SPoul-Henning Kamp { 598797f2d22SPoul-Henning Kamp int s; 5990d94caffSDavid Greenman 600df8bae1dSRodney W. Grimes if (vp->v_data) 601df8bae1dSRodney W. Grimes panic("cleaned vnode isn't"); 602df8bae1dSRodney W. Grimes s = splbio(); 603df8bae1dSRodney W. Grimes if (vp->v_numoutput) 604df8bae1dSRodney W. Grimes panic("Clean vnode has pending I/O's"); 605df8bae1dSRodney W. Grimes splx(s); 606f2a2857bSKirk McKusick if (vp->v_writecount != 0) 607f2a2857bSKirk McKusick panic("Non-zero write count"); 608797f2d22SPoul-Henning Kamp } 609df8bae1dSRodney W. Grimes #endif 610df8bae1dSRodney W. Grimes vp->v_flag = 0; 611df8bae1dSRodney W. Grimes vp->v_lastw = 0; 612df8bae1dSRodney W. Grimes vp->v_lasta = 0; 613df8bae1dSRodney W. Grimes vp->v_cstart = 0; 614df8bae1dSRodney W. Grimes vp->v_clen = 0; 615df8bae1dSRodney W. Grimes vp->v_socket = 0; 616b15a966eSPoul-Henning Kamp } else { 617b15a966eSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 6182d8acc0fSJohn Dyson vp = (struct vnode *) zalloc(vnode_zone); 619b15a966eSPoul-Henning Kamp bzero((char *) vp, sizeof *vp); 620a18b1f1dSJason Evans mtx_init(&vp->v_interlock, "vnode interlock", MTX_DEF); 621b15a966eSPoul-Henning Kamp vp->v_dd = vp; 622a051452aSPoul-Henning Kamp cache_purge(vp); 623b15a966eSPoul-Henning Kamp LIST_INIT(&vp->v_cache_src); 624b15a966eSPoul-Henning Kamp TAILQ_INIT(&vp->v_cache_dst); 625b15a966eSPoul-Henning Kamp numvnodes++; 626df8bae1dSRodney W. Grimes } 627b15a966eSPoul-Henning Kamp 62816e9e530SPeter Wemm TAILQ_INIT(&vp->v_cleanblkhd); 62916e9e530SPeter Wemm TAILQ_INIT(&vp->v_dirtyblkhd); 630f9ceb7c7SDavid Greenman vp->v_type = VNON; 631df8bae1dSRodney W. Grimes vp->v_tag = tag; 632df8bae1dSRodney W. Grimes vp->v_op = vops; 63367e87166SBoris Popov lockinit(&vp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE); 634df8bae1dSRodney W. Grimes insmntque(vp, mp); 635df8bae1dSRodney W. Grimes *vpp = vp; 636df8bae1dSRodney W. Grimes vp->v_usecount = 1; 637df8bae1dSRodney W. Grimes vp->v_data = 0; 638925a3a41SJohn Dyson splx(s); 63964d3c7e3SJohn Dyson 640fb116777SEivind Eklund vfs_object_create(vp, p, p->p_ucred); 641df8bae1dSRodney W. Grimes return (0); 642df8bae1dSRodney W. Grimes } 643df8bae1dSRodney W. Grimes 644df8bae1dSRodney W. Grimes /* 645df8bae1dSRodney W. Grimes * Move a vnode from one mount queue to another. 646df8bae1dSRodney W. Grimes */ 647cb451ebdSBruce Evans static void 648df8bae1dSRodney W. Grimes insmntque(vp, mp) 649df8bae1dSRodney W. Grimes register struct vnode *vp; 650df8bae1dSRodney W. Grimes register struct mount *mp; 651df8bae1dSRodney W. Grimes { 652df8bae1dSRodney W. Grimes 653996c772fSJohn Dyson simple_lock(&mntvnode_slock); 654df8bae1dSRodney W. Grimes /* 655df8bae1dSRodney W. Grimes * Delete from old mount point vnode list, if on one. 656df8bae1dSRodney W. Grimes */ 657df8bae1dSRodney W. Grimes if (vp->v_mount != NULL) 658df8bae1dSRodney W. Grimes LIST_REMOVE(vp, v_mntvnodes); 659df8bae1dSRodney W. Grimes /* 660df8bae1dSRodney W. Grimes * Insert into list of vnodes for the new mount point, if available. 661df8bae1dSRodney W. Grimes */ 662996c772fSJohn Dyson if ((vp->v_mount = mp) == NULL) { 663996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 664df8bae1dSRodney W. Grimes return; 665996c772fSJohn Dyson } 666df8bae1dSRodney W. Grimes LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 667996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 668df8bae1dSRodney W. Grimes } 669df8bae1dSRodney W. Grimes 670df8bae1dSRodney W. Grimes /* 671df8bae1dSRodney W. Grimes * Update outstanding I/O count and do wakeup if requested. 672df8bae1dSRodney W. Grimes */ 67326f9a767SRodney W. Grimes void 674df8bae1dSRodney W. Grimes vwakeup(bp) 675df8bae1dSRodney W. Grimes register struct buf *bp; 676df8bae1dSRodney W. Grimes { 677df8bae1dSRodney W. Grimes register struct vnode *vp; 678df8bae1dSRodney W. Grimes 679df8bae1dSRodney W. Grimes bp->b_flags &= ~B_WRITEINPROG; 680bb56ec4aSPoul-Henning Kamp if ((vp = bp->b_vp)) { 681df8bae1dSRodney W. Grimes vp->v_numoutput--; 682df8bae1dSRodney W. Grimes if (vp->v_numoutput < 0) 683df8bae1dSRodney W. Grimes panic("vwakeup: neg numoutput"); 684a3a8bb29SDavid Greenman if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 685df8bae1dSRodney W. Grimes vp->v_flag &= ~VBWAIT; 686df8bae1dSRodney W. Grimes wakeup((caddr_t) &vp->v_numoutput); 687df8bae1dSRodney W. Grimes } 688df8bae1dSRodney W. Grimes } 689df8bae1dSRodney W. Grimes } 690df8bae1dSRodney W. Grimes 691df8bae1dSRodney W. Grimes /* 692df8bae1dSRodney W. Grimes * Flush out and invalidate all buffers associated with a vnode. 693df8bae1dSRodney W. Grimes * Called with the underlying object locked. 694df8bae1dSRodney W. Grimes */ 695df8bae1dSRodney W. Grimes int 696df8bae1dSRodney W. Grimes vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 697df8bae1dSRodney W. Grimes register struct vnode *vp; 698df8bae1dSRodney W. Grimes int flags; 699df8bae1dSRodney W. Grimes struct ucred *cred; 700df8bae1dSRodney W. Grimes struct proc *p; 701df8bae1dSRodney W. Grimes int slpflag, slptimeo; 702df8bae1dSRodney W. Grimes { 703df8bae1dSRodney W. Grimes register struct buf *bp; 704df8bae1dSRodney W. Grimes struct buf *nbp, *blist; 705df8bae1dSRodney W. Grimes int s, error; 7061cdeb653SDavid Greenman vm_object_t object; 707df8bae1dSRodney W. Grimes 70828913ebeSJulian Elischer if (flags & V_SAVE) { 70928913ebeSJulian Elischer s = splbio(); 71028913ebeSJulian Elischer while (vp->v_numoutput) { 71128913ebeSJulian Elischer vp->v_flag |= VBWAIT; 71229c98cd8SEivind Eklund error = tsleep((caddr_t)&vp->v_numoutput, 71329c98cd8SEivind Eklund slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 71429c98cd8SEivind Eklund if (error) { 71529c98cd8SEivind Eklund splx(s); 71629c98cd8SEivind Eklund return (error); 71729c98cd8SEivind Eklund } 71828913ebeSJulian Elischer } 71916e9e530SPeter Wemm if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 72028913ebeSJulian Elischer splx(s); 72128913ebeSJulian Elischer if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 722df8bae1dSRodney W. Grimes return (error); 72328913ebeSJulian Elischer s = splbio(); 72428913ebeSJulian Elischer if (vp->v_numoutput > 0 || 72516e9e530SPeter Wemm !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 726df8bae1dSRodney W. Grimes panic("vinvalbuf: dirty bufs"); 727df8bae1dSRodney W. Grimes } 72828913ebeSJulian Elischer splx(s); 72928913ebeSJulian Elischer } 7306476c0d2SJohn Dyson s = splbio(); 731df8bae1dSRodney W. Grimes for (;;) { 73216e9e530SPeter Wemm blist = TAILQ_FIRST(&vp->v_cleanblkhd); 73320f02ef5SPeter Wemm if (!blist) 73416e9e530SPeter Wemm blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 735df8bae1dSRodney W. Grimes if (!blist) 736df8bae1dSRodney W. Grimes break; 737df8bae1dSRodney W. Grimes 738df8bae1dSRodney W. Grimes for (bp = blist; bp; bp = nbp) { 73916e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 74067812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 74167812eacSKirk McKusick error = BUF_TIMELOCK(bp, 74267812eacSKirk McKusick LK_EXCLUSIVE | LK_SLEEPFAIL, 74367812eacSKirk McKusick "vinvalbuf", slpflag, slptimeo); 74467812eacSKirk McKusick if (error == ENOLCK) 74567812eacSKirk McKusick break; 746df8bae1dSRodney W. Grimes splx(s); 747df8bae1dSRodney W. Grimes return (error); 7482f2160daSDavid Greenman } 749df8bae1dSRodney W. Grimes /* 7500d94caffSDavid Greenman * XXX Since there are no node locks for NFS, I 7510d94caffSDavid Greenman * believe there is a slight chance that a delayed 7520d94caffSDavid Greenman * write will occur while sleeping just above, so 75352c64c95SJohn Dyson * check for it. Note that vfs_bio_awrite expects 75452c64c95SJohn Dyson * buffers to reside on a queue, while VOP_BWRITE and 75552c64c95SJohn Dyson * brelse do not. 756df8bae1dSRodney W. Grimes */ 75752c64c95SJohn Dyson if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 75852c64c95SJohn Dyson (flags & V_SAVE)) { 75952c64c95SJohn Dyson 76095e5e988SJohn Dyson if (bp->b_vp == vp) { 76195e5e988SJohn Dyson if (bp->b_flags & B_CLUSTEROK) { 76267812eacSKirk McKusick BUF_UNLOCK(bp); 76395e5e988SJohn Dyson vfs_bio_awrite(bp); 76495e5e988SJohn Dyson } else { 76552c64c95SJohn Dyson bremfree(bp); 76667812eacSKirk McKusick bp->b_flags |= B_ASYNC; 767b99c307aSPoul-Henning Kamp BUF_WRITE(bp); 76895e5e988SJohn Dyson } 76995e5e988SJohn Dyson } else { 77052c64c95SJohn Dyson bremfree(bp); 771b99c307aSPoul-Henning Kamp (void) BUF_WRITE(bp); 77295e5e988SJohn Dyson } 773df8bae1dSRodney W. Grimes break; 774df8bae1dSRodney W. Grimes } 77552c64c95SJohn Dyson bremfree(bp); 77667812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 777bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 778df8bae1dSRodney W. Grimes brelse(bp); 779df8bae1dSRodney W. Grimes } 780df8bae1dSRodney W. Grimes } 7811cdeb653SDavid Greenman 7820d94caffSDavid Greenman while (vp->v_numoutput > 0) { 7830d94caffSDavid Greenman vp->v_flag |= VBWAIT; 7840d94caffSDavid Greenman tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 7850d94caffSDavid Greenman } 7862f2160daSDavid Greenman 7870d94caffSDavid Greenman splx(s); 7880d94caffSDavid Greenman 789ff769afcSDavid Greenman /* 790ff769afcSDavid Greenman * Destroy the copy in the VM cache, too. 791ff769afcSDavid Greenman */ 792a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 7939ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &object) == 0) { 79495e5e988SJohn Dyson vm_object_page_remove(object, 0, 0, 79595e5e988SJohn Dyson (flags & V_SAVE) ? TRUE : FALSE); 7961cdeb653SDavid Greenman } 797a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 79895e5e988SJohn Dyson 79916e9e530SPeter Wemm if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 800df8bae1dSRodney W. Grimes panic("vinvalbuf: flush failed"); 801df8bae1dSRodney W. Grimes return (0); 802df8bae1dSRodney W. Grimes } 803df8bae1dSRodney W. Grimes 804df8bae1dSRodney W. Grimes /* 805bef608bdSJohn Dyson * Truncate a file's buffer and pages to a specified length. This 806bef608bdSJohn Dyson * is in lieu of the old vinvalbuf mechanism, which performed unneeded 807bef608bdSJohn Dyson * sync activity. 808bef608bdSJohn Dyson */ 809bef608bdSJohn Dyson int 810bef608bdSJohn Dyson vtruncbuf(vp, cred, p, length, blksize) 811bef608bdSJohn Dyson register struct vnode *vp; 812bef608bdSJohn Dyson struct ucred *cred; 813bef608bdSJohn Dyson struct proc *p; 814bef608bdSJohn Dyson off_t length; 815bef608bdSJohn Dyson int blksize; 816bef608bdSJohn Dyson { 817bef608bdSJohn Dyson register struct buf *bp; 818f5ef029eSPoul-Henning Kamp struct buf *nbp; 819f5ef029eSPoul-Henning Kamp int s, anyfreed; 820bef608bdSJohn Dyson int trunclbn; 821bef608bdSJohn Dyson 822bef608bdSJohn Dyson /* 823bef608bdSJohn Dyson * Round up to the *next* lbn. 824bef608bdSJohn Dyson */ 8251c77c6b7SJohn Dyson trunclbn = (length + blksize - 1) / blksize; 826bef608bdSJohn Dyson 827bef608bdSJohn Dyson s = splbio(); 828bef608bdSJohn Dyson restart: 829bef608bdSJohn Dyson anyfreed = 1; 830bef608bdSJohn Dyson for (;anyfreed;) { 831bef608bdSJohn Dyson anyfreed = 0; 83216e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 83316e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 834bef608bdSJohn Dyson if (bp->b_lblkno >= trunclbn) { 83567812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 83667812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 83752c64c95SJohn Dyson goto restart; 838bef608bdSJohn Dyson } else { 839bef608bdSJohn Dyson bremfree(bp); 84067812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_RELBUF); 841bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 842bef608bdSJohn Dyson brelse(bp); 843bef608bdSJohn Dyson anyfreed = 1; 844bef608bdSJohn Dyson } 84502b00854SKirk McKusick if (nbp && 84602b00854SKirk McKusick (((nbp->b_xflags & BX_VNCLEAN) == 0) || 84752c64c95SJohn Dyson (nbp->b_vp != vp) || 848bef608bdSJohn Dyson (nbp->b_flags & B_DELWRI))) { 849bef608bdSJohn Dyson goto restart; 850bef608bdSJohn Dyson } 851bef608bdSJohn Dyson } 852bef608bdSJohn Dyson } 853bef608bdSJohn Dyson 85416e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 85516e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 856bef608bdSJohn Dyson if (bp->b_lblkno >= trunclbn) { 85767812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 85867812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 85952c64c95SJohn Dyson goto restart; 860bef608bdSJohn Dyson } else { 861bef608bdSJohn Dyson bremfree(bp); 86267812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_RELBUF); 863bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 864bef608bdSJohn Dyson brelse(bp); 865bef608bdSJohn Dyson anyfreed = 1; 866bef608bdSJohn Dyson } 86702b00854SKirk McKusick if (nbp && 86802b00854SKirk McKusick (((nbp->b_xflags & BX_VNDIRTY) == 0) || 86952c64c95SJohn Dyson (nbp->b_vp != vp) || 870bef608bdSJohn Dyson (nbp->b_flags & B_DELWRI) == 0)) { 871bef608bdSJohn Dyson goto restart; 872bef608bdSJohn Dyson } 873bef608bdSJohn Dyson } 874bef608bdSJohn Dyson } 875bef608bdSJohn Dyson } 8762deb5d04SJohn Dyson 87752c64c95SJohn Dyson if (length > 0) { 87852c64c95SJohn Dyson restartsync: 87916e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 88016e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 8812deb5d04SJohn Dyson if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 88267812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 88367812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 88467812eacSKirk McKusick goto restart; 8852deb5d04SJohn Dyson } else { 8862deb5d04SJohn Dyson bremfree(bp); 88752c64c95SJohn Dyson if (bp->b_vp == vp) { 88852c64c95SJohn Dyson bp->b_flags |= B_ASYNC; 88952c64c95SJohn Dyson } else { 89052c64c95SJohn Dyson bp->b_flags &= ~B_ASYNC; 89152c64c95SJohn Dyson } 892b99c307aSPoul-Henning Kamp BUF_WRITE(bp); 8932deb5d04SJohn Dyson } 89452c64c95SJohn Dyson goto restartsync; 8952deb5d04SJohn Dyson } 89652c64c95SJohn Dyson 8972deb5d04SJohn Dyson } 8982deb5d04SJohn Dyson } 8992deb5d04SJohn Dyson 9002deb5d04SJohn Dyson while (vp->v_numoutput > 0) { 9012deb5d04SJohn Dyson vp->v_flag |= VBWAIT; 9022deb5d04SJohn Dyson tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 9032deb5d04SJohn Dyson } 9042deb5d04SJohn Dyson 905bef608bdSJohn Dyson splx(s); 906bef608bdSJohn Dyson 907bef608bdSJohn Dyson vnode_pager_setsize(vp, length); 908bef608bdSJohn Dyson 909bef608bdSJohn Dyson return (0); 910bef608bdSJohn Dyson } 911bef608bdSJohn Dyson 912bef608bdSJohn Dyson /* 913df8bae1dSRodney W. Grimes * Associate a buffer with a vnode. 914df8bae1dSRodney W. Grimes */ 91526f9a767SRodney W. Grimes void 916df8bae1dSRodney W. Grimes bgetvp(vp, bp) 917df8bae1dSRodney W. Grimes register struct vnode *vp; 918df8bae1dSRodney W. Grimes register struct buf *bp; 919df8bae1dSRodney W. Grimes { 920602d2b48SDavid Greenman int s; 921df8bae1dSRodney W. Grimes 9225526d2d9SEivind Eklund KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 923219cbf59SEivind Eklund 924a051452aSPoul-Henning Kamp vhold(vp); 925df8bae1dSRodney W. Grimes bp->b_vp = vp; 92641d2e3e0SPoul-Henning Kamp bp->b_dev = vn_todev(vp); 927df8bae1dSRodney W. Grimes /* 928df8bae1dSRodney W. Grimes * Insert onto list for new vnode. 929df8bae1dSRodney W. Grimes */ 930602d2b48SDavid Greenman s = splbio(); 93102b00854SKirk McKusick bp->b_xflags |= BX_VNCLEAN; 93202b00854SKirk McKusick bp->b_xflags &= ~BX_VNDIRTY; 93316e9e530SPeter Wemm TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 934602d2b48SDavid Greenman splx(s); 935df8bae1dSRodney W. Grimes } 936df8bae1dSRodney W. Grimes 937df8bae1dSRodney W. Grimes /* 938df8bae1dSRodney W. Grimes * Disassociate a buffer from a vnode. 939df8bae1dSRodney W. Grimes */ 94026f9a767SRodney W. Grimes void 941df8bae1dSRodney W. Grimes brelvp(bp) 942df8bae1dSRodney W. Grimes register struct buf *bp; 943df8bae1dSRodney W. Grimes { 944df8bae1dSRodney W. Grimes struct vnode *vp; 94516e9e530SPeter Wemm struct buflists *listheadp; 946602d2b48SDavid Greenman int s; 947df8bae1dSRodney W. Grimes 9485526d2d9SEivind Eklund KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 9492be70f79SJohn Dyson 950df8bae1dSRodney W. Grimes /* 951df8bae1dSRodney W. Grimes * Delete from old vnode list, if on one. 952df8bae1dSRodney W. Grimes */ 953b1897c19SJulian Elischer vp = bp->b_vp; 954602d2b48SDavid Greenman s = splbio(); 95502b00854SKirk McKusick if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 95602b00854SKirk McKusick if (bp->b_xflags & BX_VNDIRTY) 95716e9e530SPeter Wemm listheadp = &vp->v_dirtyblkhd; 95816e9e530SPeter Wemm else 95916e9e530SPeter Wemm listheadp = &vp->v_cleanblkhd; 96016e9e530SPeter Wemm TAILQ_REMOVE(listheadp, bp, b_vnbufs); 96102b00854SKirk McKusick bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 96216e9e530SPeter Wemm } 96316e9e530SPeter Wemm if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 964b1897c19SJulian Elischer vp->v_flag &= ~VONWORKLST; 965b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 966b1897c19SJulian Elischer } 967602d2b48SDavid Greenman splx(s); 968df8bae1dSRodney W. Grimes bp->b_vp = (struct vnode *) 0; 969a051452aSPoul-Henning Kamp vdrop(vp); 970df8bae1dSRodney W. Grimes } 971df8bae1dSRodney W. Grimes 972df8bae1dSRodney W. Grimes /* 973b1897c19SJulian Elischer * Add an item to the syncer work queue. 974b1897c19SJulian Elischer */ 97542e26d47SMatthew Dillon static void 97642e26d47SMatthew Dillon vn_syncer_add_to_worklist(struct vnode *vp, int delay) 977b1897c19SJulian Elischer { 978b1897c19SJulian Elischer int s, slot; 979b1897c19SJulian Elischer 980b1897c19SJulian Elischer s = splbio(); 981b1897c19SJulian Elischer 982b1897c19SJulian Elischer if (vp->v_flag & VONWORKLST) { 983b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 984b1897c19SJulian Elischer } 985b1897c19SJulian Elischer 986b1897c19SJulian Elischer if (delay > syncer_maxdelay - 2) 987b1897c19SJulian Elischer delay = syncer_maxdelay - 2; 988b1897c19SJulian Elischer slot = (syncer_delayno + delay) & syncer_mask; 989b1897c19SJulian Elischer 990b1897c19SJulian Elischer LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 991b1897c19SJulian Elischer vp->v_flag |= VONWORKLST; 992b1897c19SJulian Elischer splx(s); 993b1897c19SJulian Elischer } 994b1897c19SJulian Elischer 9954ef2094eSJulian Elischer struct proc *updateproc; 996155f87daSMatthew Dillon static void sched_sync __P((void)); 9979c8b8baaSPeter Wemm static struct kproc_desc up_kp = { 998b1897c19SJulian Elischer "syncer", 999b1897c19SJulian Elischer sched_sync, 1000b1897c19SJulian Elischer &updateproc 1001b1897c19SJulian Elischer }; 10029c8b8baaSPeter Wemm SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 1003b1897c19SJulian Elischer 1004b1897c19SJulian Elischer /* 1005b1897c19SJulian Elischer * System filesystem synchronizer daemon. 1006b1897c19SJulian Elischer */ 1007b1897c19SJulian Elischer void 1008b1897c19SJulian Elischer sched_sync(void) 1009b1897c19SJulian Elischer { 1010b1897c19SJulian Elischer struct synclist *slp; 1011b1897c19SJulian Elischer struct vnode *vp; 1012f2a2857bSKirk McKusick struct mount *mp; 1013b1897c19SJulian Elischer long starttime; 1014b1897c19SJulian Elischer int s; 1015b1897c19SJulian Elischer struct proc *p = updateproc; 1016b1897c19SJulian Elischer 10170384fff8SJason Evans mtx_enter(&Giant, MTX_DEF); 10180384fff8SJason Evans 10195e950839SLuoqi Chen EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, 10205e950839SLuoqi Chen SHUTDOWN_PRI_LAST); 10215e950839SLuoqi Chen 1022b1897c19SJulian Elischer for (;;) { 10235e950839SLuoqi Chen kproc_suspend_loop(p); 10245e950839SLuoqi Chen 1025227ee8a1SPoul-Henning Kamp starttime = time_second; 1026b1897c19SJulian Elischer 1027b1897c19SJulian Elischer /* 102842e26d47SMatthew Dillon * Push files whose dirty time has expired. Be careful 102942e26d47SMatthew Dillon * of interrupt race on slp queue. 1030b1897c19SJulian Elischer */ 1031b1897c19SJulian Elischer s = splbio(); 1032b1897c19SJulian Elischer slp = &syncer_workitem_pending[syncer_delayno]; 1033b1897c19SJulian Elischer syncer_delayno += 1; 1034b1897c19SJulian Elischer if (syncer_delayno == syncer_maxdelay) 1035b1897c19SJulian Elischer syncer_delayno = 0; 1036b1897c19SJulian Elischer splx(s); 1037b1897c19SJulian Elischer 1038b1897c19SJulian Elischer while ((vp = LIST_FIRST(slp)) != NULL) { 1039f2a2857bSKirk McKusick if (VOP_ISLOCKED(vp, NULL) == 0 && 1040f2a2857bSKirk McKusick vn_start_write(vp, &mp, V_NOWAIT) == 0) { 1041b1897c19SJulian Elischer vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1042b1897c19SJulian Elischer (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 1043b1897c19SJulian Elischer VOP_UNLOCK(vp, 0, p); 1044f2a2857bSKirk McKusick vn_finished_write(mp); 10454ef2094eSJulian Elischer } 104642e26d47SMatthew Dillon s = splbio(); 1047b1897c19SJulian Elischer if (LIST_FIRST(slp) == vp) { 10484ef2094eSJulian Elischer /* 10494ef2094eSJulian Elischer * Note: v_tag VT_VFS vps can remain on the 10504ef2094eSJulian Elischer * worklist too with no dirty blocks, but 10514ef2094eSJulian Elischer * since sync_fsync() moves it to a different 10524ef2094eSJulian Elischer * slot we are safe. 10534ef2094eSJulian Elischer */ 105416e9e530SPeter Wemm if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 1055ba4ad1fcSPoul-Henning Kamp !vn_isdisk(vp, NULL)) 105642e26d47SMatthew Dillon panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 1057b1897c19SJulian Elischer /* 105842e26d47SMatthew Dillon * Put us back on the worklist. The worklist 105942e26d47SMatthew Dillon * routine will remove us from our current 106042e26d47SMatthew Dillon * position and then add us back in at a later 106142e26d47SMatthew Dillon * position. 1062b1897c19SJulian Elischer */ 1063b1897c19SJulian Elischer vn_syncer_add_to_worklist(vp, syncdelay); 1064b1897c19SJulian Elischer } 106542e26d47SMatthew Dillon splx(s); 1066b1897c19SJulian Elischer } 1067b1897c19SJulian Elischer 1068b1897c19SJulian Elischer /* 1069b1897c19SJulian Elischer * Do soft update processing. 1070b1897c19SJulian Elischer */ 10713275cf73SPoul-Henning Kamp #ifdef SOFTUPDATES 1072a2e7a027SPoul-Henning Kamp softdep_process_worklist(NULL); 10733275cf73SPoul-Henning Kamp #endif 1074b1897c19SJulian Elischer 1075b1897c19SJulian Elischer /* 1076b1897c19SJulian Elischer * The variable rushjob allows the kernel to speed up the 1077b1897c19SJulian Elischer * processing of the filesystem syncer process. A rushjob 1078b1897c19SJulian Elischer * value of N tells the filesystem syncer to process the next 1079b1897c19SJulian Elischer * N seconds worth of work on its queue ASAP. Currently rushjob 1080b1897c19SJulian Elischer * is used by the soft update code to speed up the filesystem 1081b1897c19SJulian Elischer * syncer process when the incore state is getting so far 1082b1897c19SJulian Elischer * ahead of the disk that the kernel memory pool is being 1083b1897c19SJulian Elischer * threatened with exhaustion. 1084b1897c19SJulian Elischer */ 1085b1897c19SJulian Elischer if (rushjob > 0) { 1086b1897c19SJulian Elischer rushjob -= 1; 1087b1897c19SJulian Elischer continue; 1088b1897c19SJulian Elischer } 1089b1897c19SJulian Elischer /* 1090b1897c19SJulian Elischer * If it has taken us less than a second to process the 1091b1897c19SJulian Elischer * current work, then wait. Otherwise start right over 1092b1897c19SJulian Elischer * again. We can still lose time if any single round 1093b1897c19SJulian Elischer * takes more than two seconds, but it does not really 1094b1897c19SJulian Elischer * matter as we are just trying to generally pace the 1095b1897c19SJulian Elischer * filesystem activity. 1096b1897c19SJulian Elischer */ 1097227ee8a1SPoul-Henning Kamp if (time_second == starttime) 1098b1897c19SJulian Elischer tsleep(&lbolt, PPAUSE, "syncer", 0); 1099b1897c19SJulian Elischer } 1100b1897c19SJulian Elischer } 1101b1897c19SJulian Elischer 1102b1897c19SJulian Elischer /* 1103e4ab40bcSKirk McKusick * Request the syncer daemon to speed up its work. 1104e4ab40bcSKirk McKusick * We never push it to speed up more than half of its 1105e4ab40bcSKirk McKusick * normal turn time, otherwise it could take over the cpu. 1106e4ab40bcSKirk McKusick */ 1107e4ab40bcSKirk McKusick int 1108e4ab40bcSKirk McKusick speedup_syncer() 1109e4ab40bcSKirk McKusick { 1110e4ab40bcSKirk McKusick int s; 1111e4ab40bcSKirk McKusick 1112e4ab40bcSKirk McKusick s = splhigh(); 1113e4ab40bcSKirk McKusick if (updateproc->p_wchan == &lbolt) 1114e4ab40bcSKirk McKusick setrunnable(updateproc); 1115e4ab40bcSKirk McKusick splx(s); 1116e4ab40bcSKirk McKusick if (rushjob < syncdelay / 2) { 1117e4ab40bcSKirk McKusick rushjob += 1; 1118e4ab40bcSKirk McKusick stat_rush_requests += 1; 1119e4ab40bcSKirk McKusick return (1); 1120e4ab40bcSKirk McKusick } 1121e4ab40bcSKirk McKusick return(0); 1122e4ab40bcSKirk McKusick } 1123e4ab40bcSKirk McKusick 1124e4ab40bcSKirk McKusick /* 11250d94caffSDavid Greenman * Associate a p-buffer with a vnode. 11261c7c3c6aSMatthew Dillon * 11271c7c3c6aSMatthew Dillon * Also sets B_PAGING flag to indicate that vnode is not fully associated 11281c7c3c6aSMatthew Dillon * with the buffer. i.e. the bp has not been linked into the vnode or 11291c7c3c6aSMatthew Dillon * ref-counted. 11300d94caffSDavid Greenman */ 11310d94caffSDavid Greenman void 11320d94caffSDavid Greenman pbgetvp(vp, bp) 11330d94caffSDavid Greenman register struct vnode *vp; 11340d94caffSDavid Greenman register struct buf *bp; 11350d94caffSDavid Greenman { 1136219cbf59SEivind Eklund 11375526d2d9SEivind Eklund KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 11385526d2d9SEivind Eklund 11390d94caffSDavid Greenman bp->b_vp = vp; 11401c7c3c6aSMatthew Dillon bp->b_flags |= B_PAGING; 114141d2e3e0SPoul-Henning Kamp bp->b_dev = vn_todev(vp); 11420d94caffSDavid Greenman } 11430d94caffSDavid Greenman 11440d94caffSDavid Greenman /* 11450d94caffSDavid Greenman * Disassociate a p-buffer from a vnode. 11460d94caffSDavid Greenman */ 11470d94caffSDavid Greenman void 11480d94caffSDavid Greenman pbrelvp(bp) 11490d94caffSDavid Greenman register struct buf *bp; 11500d94caffSDavid Greenman { 11510d94caffSDavid Greenman 11525526d2d9SEivind Eklund KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 11530d94caffSDavid Greenman 11541c7c3c6aSMatthew Dillon /* XXX REMOVE ME */ 11551c7c3c6aSMatthew Dillon if (bp->b_vnbufs.tqe_next != NULL) { 11561c7c3c6aSMatthew Dillon panic( 11571c7c3c6aSMatthew Dillon "relpbuf(): b_vp was probably reassignbuf()d %p %x", 11581c7c3c6aSMatthew Dillon bp, 11591c7c3c6aSMatthew Dillon (int)bp->b_flags 11601c7c3c6aSMatthew Dillon ); 11611c7c3c6aSMatthew Dillon } 11620d94caffSDavid Greenman bp->b_vp = (struct vnode *) 0; 11631c7c3c6aSMatthew Dillon bp->b_flags &= ~B_PAGING; 11641c7c3c6aSMatthew Dillon } 11651c7c3c6aSMatthew Dillon 1166453aaa0dSEivind Eklund /* 1167453aaa0dSEivind Eklund * Change the vnode a pager buffer is associated with. 1168453aaa0dSEivind Eklund */ 11691c7c3c6aSMatthew Dillon void 11701c7c3c6aSMatthew Dillon pbreassignbuf(bp, newvp) 11711c7c3c6aSMatthew Dillon struct buf *bp; 11721c7c3c6aSMatthew Dillon struct vnode *newvp; 11731c7c3c6aSMatthew Dillon { 1174a863c0fbSEivind Eklund 1175453aaa0dSEivind Eklund KASSERT(bp->b_flags & B_PAGING, 1176453aaa0dSEivind Eklund ("pbreassignbuf() on non phys bp %p", bp)); 11771c7c3c6aSMatthew Dillon bp->b_vp = newvp; 11780d94caffSDavid Greenman } 11790d94caffSDavid Greenman 11800d94caffSDavid Greenman /* 1181df8bae1dSRodney W. Grimes * Reassign a buffer from one vnode to another. 1182df8bae1dSRodney W. Grimes * Used to assign file specific control information 1183df8bae1dSRodney W. Grimes * (indirect blocks) to the vnode to which they belong. 1184df8bae1dSRodney W. Grimes */ 118526f9a767SRodney W. Grimes void 1186df8bae1dSRodney W. Grimes reassignbuf(bp, newvp) 1187df8bae1dSRodney W. Grimes register struct buf *bp; 1188df8bae1dSRodney W. Grimes register struct vnode *newvp; 1189df8bae1dSRodney W. Grimes { 1190b1897c19SJulian Elischer struct buflists *listheadp; 1191b1897c19SJulian Elischer int delay; 1192619594e8SJohn Dyson int s; 1193df8bae1dSRodney W. Grimes 1194df8bae1dSRodney W. Grimes if (newvp == NULL) { 1195df8bae1dSRodney W. Grimes printf("reassignbuf: NULL"); 1196df8bae1dSRodney W. Grimes return; 1197df8bae1dSRodney W. Grimes } 1198e929c00dSKirk McKusick ++reassignbufcalls; 1199619594e8SJohn Dyson 12001c7c3c6aSMatthew Dillon /* 12011c7c3c6aSMatthew Dillon * B_PAGING flagged buffers cannot be reassigned because their vp 12021c7c3c6aSMatthew Dillon * is not fully linked in. 12031c7c3c6aSMatthew Dillon */ 12041c7c3c6aSMatthew Dillon if (bp->b_flags & B_PAGING) 12051c7c3c6aSMatthew Dillon panic("cannot reassign paging buffer"); 12061c7c3c6aSMatthew Dillon 1207619594e8SJohn Dyson s = splbio(); 1208df8bae1dSRodney W. Grimes /* 1209df8bae1dSRodney W. Grimes * Delete from old vnode list, if on one. 1210df8bae1dSRodney W. Grimes */ 121102b00854SKirk McKusick if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 121202b00854SKirk McKusick if (bp->b_xflags & BX_VNDIRTY) 12134ef2094eSJulian Elischer listheadp = &bp->b_vp->v_dirtyblkhd; 121416e9e530SPeter Wemm else 12154ef2094eSJulian Elischer listheadp = &bp->b_vp->v_cleanblkhd; 121616e9e530SPeter Wemm TAILQ_REMOVE(listheadp, bp, b_vnbufs); 121702b00854SKirk McKusick bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 12184ef2094eSJulian Elischer if (bp->b_vp != newvp) { 12194ef2094eSJulian Elischer vdrop(bp->b_vp); 12204ef2094eSJulian Elischer bp->b_vp = NULL; /* for clarification */ 12214ef2094eSJulian Elischer } 1222a051452aSPoul-Henning Kamp } 1223df8bae1dSRodney W. Grimes /* 12240d94caffSDavid Greenman * If dirty, put on list of dirty buffers; otherwise insert onto list 12250d94caffSDavid Greenman * of clean buffers. 1226df8bae1dSRodney W. Grimes */ 12270d94caffSDavid Greenman if (bp->b_flags & B_DELWRI) { 12280d94caffSDavid Greenman struct buf *tbp; 12290d94caffSDavid Greenman 1230b1897c19SJulian Elischer listheadp = &newvp->v_dirtyblkhd; 1231b1897c19SJulian Elischer if ((newvp->v_flag & VONWORKLST) == 0) { 1232b1897c19SJulian Elischer switch (newvp->v_type) { 1233b1897c19SJulian Elischer case VDIR: 1234e4ab40bcSKirk McKusick delay = dirdelay; 1235b1897c19SJulian Elischer break; 123638224dcdSPoul-Henning Kamp case VCHR: 1237b1897c19SJulian Elischer case VBLK: 1238b1897c19SJulian Elischer if (newvp->v_specmountpoint != NULL) { 1239e4ab40bcSKirk McKusick delay = metadelay; 1240b1897c19SJulian Elischer break; 1241b1897c19SJulian Elischer } 1242b1897c19SJulian Elischer /* fall through */ 1243b1897c19SJulian Elischer default: 1244e4ab40bcSKirk McKusick delay = filedelay; 1245b1897c19SJulian Elischer } 1246b1897c19SJulian Elischer vn_syncer_add_to_worklist(newvp, delay); 1247b1897c19SJulian Elischer } 124802b00854SKirk McKusick bp->b_xflags |= BX_VNDIRTY; 124916e9e530SPeter Wemm tbp = TAILQ_FIRST(listheadp); 125016e9e530SPeter Wemm if (tbp == NULL || 1251e929c00dSKirk McKusick bp->b_lblkno == 0 || 1252c37c9620SMatthew Dillon (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || 1253e929c00dSKirk McKusick (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 125416e9e530SPeter Wemm TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1255e929c00dSKirk McKusick ++reassignbufsortgood; 1256e929c00dSKirk McKusick } else if (bp->b_lblkno < 0) { 1257e929c00dSKirk McKusick TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1258e929c00dSKirk McKusick ++reassignbufsortgood; 1259e929c00dSKirk McKusick } else if (reassignbufmethod == 1) { 1260e929c00dSKirk McKusick /* 1261e929c00dSKirk McKusick * New sorting algorithm, only handle sequential case, 1262c37c9620SMatthew Dillon * otherwise append to end (but before metadata) 1263e929c00dSKirk McKusick */ 1264e929c00dSKirk McKusick if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 126502b00854SKirk McKusick (tbp->b_xflags & BX_VNDIRTY)) { 1266c37c9620SMatthew Dillon /* 1267c37c9620SMatthew Dillon * Found the best place to insert the buffer 1268c37c9620SMatthew Dillon */ 1269e929c00dSKirk McKusick TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1270e929c00dSKirk McKusick ++reassignbufsortgood; 12710d94caffSDavid Greenman } else { 1272c37c9620SMatthew Dillon /* 1273c37c9620SMatthew Dillon * Missed, append to end, but before meta-data. 1274c37c9620SMatthew Dillon * We know that the head buffer in the list is 1275c37c9620SMatthew Dillon * not meta-data due to prior conditionals. 1276c37c9620SMatthew Dillon * 1277c37c9620SMatthew Dillon * Indirect effects: NFS second stage write 1278c37c9620SMatthew Dillon * tends to wind up here, giving maximum 1279c37c9620SMatthew Dillon * distance between the unstable write and the 1280c37c9620SMatthew Dillon * commit rpc. 1281c37c9620SMatthew Dillon */ 1282c37c9620SMatthew Dillon tbp = TAILQ_LAST(listheadp, buflists); 1283c37c9620SMatthew Dillon while (tbp && tbp->b_lblkno < 0) 1284c37c9620SMatthew Dillon tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); 1285c37c9620SMatthew Dillon TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1286e929c00dSKirk McKusick ++reassignbufsortbad; 1287e929c00dSKirk McKusick } 1288e929c00dSKirk McKusick } else { 1289e929c00dSKirk McKusick /* 1290e929c00dSKirk McKusick * Old sorting algorithm, scan queue and insert 1291e929c00dSKirk McKusick */ 129216e9e530SPeter Wemm struct buf *ttbp; 129316e9e530SPeter Wemm while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 129416e9e530SPeter Wemm (ttbp->b_lblkno < bp->b_lblkno)) { 1295e929c00dSKirk McKusick ++reassignbufloops; 129616e9e530SPeter Wemm tbp = ttbp; 12970d94caffSDavid Greenman } 129816e9e530SPeter Wemm TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 12990d94caffSDavid Greenman } 13000d94caffSDavid Greenman } else { 130102b00854SKirk McKusick bp->b_xflags |= BX_VNCLEAN; 130216e9e530SPeter Wemm TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1303b1897c19SJulian Elischer if ((newvp->v_flag & VONWORKLST) && 130416e9e530SPeter Wemm TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1305b1897c19SJulian Elischer newvp->v_flag &= ~VONWORKLST; 1306b1897c19SJulian Elischer LIST_REMOVE(newvp, v_synclist); 1307b1897c19SJulian Elischer } 1308df8bae1dSRodney W. Grimes } 13094ef2094eSJulian Elischer if (bp->b_vp != newvp) { 1310a051452aSPoul-Henning Kamp bp->b_vp = newvp; 1311a051452aSPoul-Henning Kamp vhold(bp->b_vp); 13124ef2094eSJulian Elischer } 1313619594e8SJohn Dyson splx(s); 13140d94caffSDavid Greenman } 1315df8bae1dSRodney W. Grimes 1316df8bae1dSRodney W. Grimes /* 1317df8bae1dSRodney W. Grimes * Create a vnode for a block device. 131841fadeebSBruce Evans * Used for mounting the root file system. 131901f76720SJeroen Ruigrok van der Werven * XXX: This now changed to a VCHR due to the block/char merging. 1320df8bae1dSRodney W. Grimes */ 132126f9a767SRodney W. Grimes int 1322df8bae1dSRodney W. Grimes bdevvp(dev, vpp) 1323df8bae1dSRodney W. Grimes dev_t dev; 1324df8bae1dSRodney W. Grimes struct vnode **vpp; 1325df8bae1dSRodney W. Grimes { 1326df8bae1dSRodney W. Grimes register struct vnode *vp; 1327df8bae1dSRodney W. Grimes struct vnode *nvp; 1328df8bae1dSRodney W. Grimes int error; 1329df8bae1dSRodney W. Grimes 13302447bec8SPoul-Henning Kamp if (dev == NODEV) { 133137906c68SBruce Evans *vpp = NULLVP; 133237906c68SBruce Evans return (ENXIO); 133337906c68SBruce Evans } 133402a1e48fSKirk McKusick if (vfinddev(dev, VCHR, vpp)) 133502a1e48fSKirk McKusick return (0); 1336df8bae1dSRodney W. Grimes error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1337df8bae1dSRodney W. Grimes if (error) { 133837906c68SBruce Evans *vpp = NULLVP; 1339df8bae1dSRodney W. Grimes return (error); 1340df8bae1dSRodney W. Grimes } 1341df8bae1dSRodney W. Grimes vp = nvp; 134201f76720SJeroen Ruigrok van der Werven vp->v_type = VCHR; 1343dbafb366SPoul-Henning Kamp addalias(vp, dev); 1344df8bae1dSRodney W. Grimes *vpp = vp; 1345df8bae1dSRodney W. Grimes return (0); 1346df8bae1dSRodney W. Grimes } 1347df8bae1dSRodney W. Grimes 1348df8bae1dSRodney W. Grimes /* 1349dbafb366SPoul-Henning Kamp * Add vnode to the alias list hung off the dev_t. 1350dbafb366SPoul-Henning Kamp * 1351dbafb366SPoul-Henning Kamp * The reason for this gunk is that multiple vnodes can reference 1352dbafb366SPoul-Henning Kamp * the same physical device, so checking vp->v_usecount to see 1353dbafb366SPoul-Henning Kamp * how many users there are is inadequate; the v_usecount for 1354dbafb366SPoul-Henning Kamp * the vnodes need to be accumulated. vcount() does that. 1355df8bae1dSRodney W. Grimes */ 13569b971133SKirk McKusick struct vnode * 1357dbafb366SPoul-Henning Kamp addaliasu(nvp, nvp_rdev) 1358dbafb366SPoul-Henning Kamp struct vnode *nvp; 1359bfbb9ce6SPoul-Henning Kamp udev_t nvp_rdev; 1360df8bae1dSRodney W. Grimes { 13619b971133SKirk McKusick struct vnode *ovp; 13629b971133SKirk McKusick vop_t **ops; 13639b971133SKirk McKusick dev_t dev; 1364df8bae1dSRodney W. Grimes 1365df8bae1dSRodney W. Grimes if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1366dbafb366SPoul-Henning Kamp panic("addaliasu on non-special vnode"); 13679b971133SKirk McKusick dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); 13689b971133SKirk McKusick /* 13699b971133SKirk McKusick * Check to see if we have a bdevvp vnode with no associated 13709b971133SKirk McKusick * filesystem. If so, we want to associate the filesystem of 13719b971133SKirk McKusick * the new newly instigated vnode with the bdevvp vnode and 13729b971133SKirk McKusick * discard the newly created vnode rather than leaving the 13739b971133SKirk McKusick * bdevvp vnode lying around with no associated filesystem. 13749b971133SKirk McKusick */ 13759b971133SKirk McKusick if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) { 13769b971133SKirk McKusick addalias(nvp, dev); 13779b971133SKirk McKusick return (nvp); 13789b971133SKirk McKusick } 13799b971133SKirk McKusick /* 13809b971133SKirk McKusick * Discard unneeded vnode, but save its node specific data. 13819b971133SKirk McKusick * Note that if there is a lock, it is carried over in the 13829b971133SKirk McKusick * node specific data to the replacement vnode. 13839b971133SKirk McKusick */ 13849b971133SKirk McKusick vref(ovp); 13859b971133SKirk McKusick ovp->v_data = nvp->v_data; 13869b971133SKirk McKusick ovp->v_tag = nvp->v_tag; 13879b971133SKirk McKusick nvp->v_data = NULL; 13889b971133SKirk McKusick ops = nvp->v_op; 13899b971133SKirk McKusick nvp->v_op = ovp->v_op; 13909b971133SKirk McKusick ovp->v_op = ops; 139167e87166SBoris Popov lockinit(&ovp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE); 139267e87166SBoris Popov if (nvp->v_vnlock) 139367e87166SBoris Popov ovp->v_vnlock = &ovp->v_lock; 13949b971133SKirk McKusick insmntque(ovp, nvp->v_mount); 13959b971133SKirk McKusick vrele(nvp); 13969b971133SKirk McKusick vgone(nvp); 13979b971133SKirk McKusick return (ovp); 1398df8bae1dSRodney W. Grimes } 1399155f87daSMatthew Dillon 1400a863c0fbSEivind Eklund /* This is a local helper function that do the same as addaliasu, but for a 1401a863c0fbSEivind Eklund * dev_t instead of an udev_t. */ 1402bba25953SEivind Eklund static void 1403dbafb366SPoul-Henning Kamp addalias(nvp, dev) 1404dbafb366SPoul-Henning Kamp struct vnode *nvp; 1405dbafb366SPoul-Henning Kamp dev_t dev; 1406dbafb366SPoul-Henning Kamp { 1407a863c0fbSEivind Eklund 1408453aaa0dSEivind Eklund KASSERT(nvp->v_type == VBLK || nvp->v_type == VCHR, 1409453aaa0dSEivind Eklund ("addalias on non-special vnode")); 1410dbafb366SPoul-Henning Kamp nvp->v_rdev = dev; 1411dbafb366SPoul-Henning Kamp simple_lock(&spechash_slock); 1412dbafb366SPoul-Henning Kamp SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1413996c772fSJohn Dyson simple_unlock(&spechash_slock); 1414df8bae1dSRodney W. Grimes } 1415df8bae1dSRodney W. Grimes 1416df8bae1dSRodney W. Grimes /* 1417df8bae1dSRodney W. Grimes * Grab a particular vnode from the free list, increment its 1418e7647e6cSKris Kennaway * reference count and lock it. The vnode lock bit is set if the 1419df8bae1dSRodney W. Grimes * vnode is being eliminated in vgone. The process is awakened 1420df8bae1dSRodney W. Grimes * when the transition is completed, and an error returned to 1421df8bae1dSRodney W. Grimes * indicate that the vnode is no longer usable (possibly having 1422df8bae1dSRodney W. Grimes * been changed to a new file system type). 1423df8bae1dSRodney W. Grimes */ 142426f9a767SRodney W. Grimes int 1425996c772fSJohn Dyson vget(vp, flags, p) 1426df8bae1dSRodney W. Grimes register struct vnode *vp; 1427996c772fSJohn Dyson int flags; 1428996c772fSJohn Dyson struct proc *p; 1429df8bae1dSRodney W. Grimes { 1430996c772fSJohn Dyson int error; 1431df8bae1dSRodney W. Grimes 1432df8bae1dSRodney W. Grimes /* 1433996c772fSJohn Dyson * If the vnode is in the process of being cleaned out for 1434996c772fSJohn Dyson * another use, we wait for the cleaning to finish and then 1435996c772fSJohn Dyson * return failure. Cleaning is determined by checking that 1436996c772fSJohn Dyson * the VXLOCK flag is set. 1437df8bae1dSRodney W. Grimes */ 1438453aaa0dSEivind Eklund if ((flags & LK_INTERLOCK) == 0) 1439a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1440996c772fSJohn Dyson if (vp->v_flag & VXLOCK) { 1441df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1442a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1443996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vget", 0); 1444996c772fSJohn Dyson return (ENOENT); 1445df8bae1dSRodney W. Grimes } 14462be70f79SJohn Dyson 1447df8bae1dSRodney W. Grimes vp->v_usecount++; 14482be70f79SJohn Dyson 1449a051452aSPoul-Henning Kamp if (VSHOULDBUSY(vp)) 1450a051452aSPoul-Henning Kamp vbusy(vp); 1451996c772fSJohn Dyson if (flags & LK_TYPE_MASK) { 145264d3c7e3SJohn Dyson if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 145364d3c7e3SJohn Dyson /* 145464d3c7e3SJohn Dyson * must expand vrele here because we do not want 145564d3c7e3SJohn Dyson * to call VOP_INACTIVE if the reference count 145664d3c7e3SJohn Dyson * drops back to zero since it was never really 145764d3c7e3SJohn Dyson * active. We must remove it from the free list 145864d3c7e3SJohn Dyson * before sleeping so that multiple processes do 145964d3c7e3SJohn Dyson * not try to recycle it. 146064d3c7e3SJohn Dyson */ 1461a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 146264d3c7e3SJohn Dyson vp->v_usecount--; 146364d3c7e3SJohn Dyson if (VSHOULDFREE(vp)) 146464d3c7e3SJohn Dyson vfree(vp); 1465a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 146664d3c7e3SJohn Dyson } 1467996c772fSJohn Dyson return (error); 1468996c772fSJohn Dyson } 1469a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1470df8bae1dSRodney W. Grimes return (0); 1471df8bae1dSRodney W. Grimes } 1472df8bae1dSRodney W. Grimes 1473453aaa0dSEivind Eklund /* 1474a863c0fbSEivind Eklund * Increase the reference count of a vnode. 1475453aaa0dSEivind Eklund */ 1476483140eaSJohn Dyson void 1477483140eaSJohn Dyson vref(struct vnode *vp) 1478483140eaSJohn Dyson { 1479a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1480483140eaSJohn Dyson vp->v_usecount++; 1481a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1482483140eaSJohn Dyson } 1483483140eaSJohn Dyson 1484df8bae1dSRodney W. Grimes /* 14850d955f71SJohn Dyson * Vnode put/release. 1486df8bae1dSRodney W. Grimes * If count drops to zero, call inactive routine and return to freelist. 1487df8bae1dSRodney W. Grimes */ 14882be70f79SJohn Dyson void 14892be70f79SJohn Dyson vrele(vp) 1490996c772fSJohn Dyson struct vnode *vp; 1491df8bae1dSRodney W. Grimes { 1492996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1493df8bae1dSRodney W. Grimes 1494219cbf59SEivind Eklund KASSERT(vp != NULL, ("vrele: null vp")); 14955526d2d9SEivind Eklund 1496a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 14976476c0d2SJohn Dyson 1498f8be809eSBoris Popov KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close")); 1499f8be809eSBoris Popov 1500a051452aSPoul-Henning Kamp if (vp->v_usecount > 1) { 15012be70f79SJohn Dyson 1502a051452aSPoul-Henning Kamp vp->v_usecount--; 1503a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 15046476c0d2SJohn Dyson 150595e5e988SJohn Dyson return; 150695e5e988SJohn Dyson } 150795e5e988SJohn Dyson 150895e5e988SJohn Dyson if (vp->v_usecount == 1) { 1509a051452aSPoul-Henning Kamp 15107cb22688SPoul-Henning Kamp vp->v_usecount--; 1511fd9d9ff1SPoul-Henning Kamp if (VSHOULDFREE(vp)) 1512fd9d9ff1SPoul-Henning Kamp vfree(vp); 15130d955f71SJohn Dyson /* 15140d955f71SJohn Dyson * If we are doing a vput, the node is already locked, and we must 15150d955f71SJohn Dyson * call VOP_INACTIVE with the node locked. So, in the case of 15160d955f71SJohn Dyson * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 15170d955f71SJohn Dyson */ 15182be70f79SJohn Dyson if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1519996c772fSJohn Dyson VOP_INACTIVE(vp, p); 15202be70f79SJohn Dyson } 15212be70f79SJohn Dyson 15222be70f79SJohn Dyson } else { 15232be70f79SJohn Dyson #ifdef DIAGNOSTIC 15242be70f79SJohn Dyson vprint("vrele: negative ref count", vp); 1525a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 15262be70f79SJohn Dyson #endif 15272be70f79SJohn Dyson panic("vrele: negative ref cnt"); 1528fd7f690fSJohn Dyson } 1529df8bae1dSRodney W. Grimes } 1530df8bae1dSRodney W. Grimes 1531453aaa0dSEivind Eklund /* 1532a863c0fbSEivind Eklund * Release an already locked vnode. This give the same effects as 1533a863c0fbSEivind Eklund * unlock+vrele(), but takes less time and avoids releasing and 1534a863c0fbSEivind Eklund * re-aquiring the lock (as vrele() aquires the lock internally.) 1535453aaa0dSEivind Eklund */ 15360d955f71SJohn Dyson void 15370d955f71SJohn Dyson vput(vp) 15380d955f71SJohn Dyson struct vnode *vp; 15390d955f71SJohn Dyson { 15402be70f79SJohn Dyson struct proc *p = curproc; /* XXX */ 15410d955f71SJohn Dyson 15425526d2d9SEivind Eklund KASSERT(vp != NULL, ("vput: null vp")); 1543a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1544f8be809eSBoris Popov KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close")); 1545f8be809eSBoris Popov 15462be70f79SJohn Dyson if (vp->v_usecount > 1) { 15472be70f79SJohn Dyson 15482be70f79SJohn Dyson vp->v_usecount--; 15492be70f79SJohn Dyson VOP_UNLOCK(vp, LK_INTERLOCK, p); 155095e5e988SJohn Dyson return; 15512be70f79SJohn Dyson 155295e5e988SJohn Dyson } 155395e5e988SJohn Dyson 155495e5e988SJohn Dyson if (vp->v_usecount == 1) { 15552be70f79SJohn Dyson 15562be70f79SJohn Dyson vp->v_usecount--; 15572be70f79SJohn Dyson if (VSHOULDFREE(vp)) 15582be70f79SJohn Dyson vfree(vp); 15592be70f79SJohn Dyson /* 15602be70f79SJohn Dyson * If we are doing a vput, the node is already locked, and we must 15612be70f79SJohn Dyson * call VOP_INACTIVE with the node locked. So, in the case of 15622be70f79SJohn Dyson * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 15632be70f79SJohn Dyson */ 1564a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 15652be70f79SJohn Dyson VOP_INACTIVE(vp, p); 15662be70f79SJohn Dyson 15672be70f79SJohn Dyson } else { 15682be70f79SJohn Dyson #ifdef DIAGNOSTIC 15692be70f79SJohn Dyson vprint("vput: negative ref count", vp); 15702be70f79SJohn Dyson #endif 15712be70f79SJohn Dyson panic("vput: negative ref cnt"); 15722be70f79SJohn Dyson } 15730d955f71SJohn Dyson } 15740d955f71SJohn Dyson 1575df8bae1dSRodney W. Grimes /* 1576a051452aSPoul-Henning Kamp * Somebody doesn't want the vnode recycled. 1577df8bae1dSRodney W. Grimes */ 157826f9a767SRodney W. Grimes void 157926f9a767SRodney W. Grimes vhold(vp) 1580df8bae1dSRodney W. Grimes register struct vnode *vp; 1581df8bae1dSRodney W. Grimes { 15828293f20aSTor Egge int s; 1583df8bae1dSRodney W. Grimes 15848293f20aSTor Egge s = splbio(); 1585df8bae1dSRodney W. Grimes vp->v_holdcnt++; 1586a051452aSPoul-Henning Kamp if (VSHOULDBUSY(vp)) 1587a051452aSPoul-Henning Kamp vbusy(vp); 15888293f20aSTor Egge splx(s); 1589df8bae1dSRodney W. Grimes } 1590df8bae1dSRodney W. Grimes 1591df8bae1dSRodney W. Grimes /* 1592a863c0fbSEivind Eklund * Note that there is one less who cares about this vnode. vdrop() is the 1593a863c0fbSEivind Eklund * opposite of vhold(). 1594df8bae1dSRodney W. Grimes */ 159526f9a767SRodney W. Grimes void 1596a051452aSPoul-Henning Kamp vdrop(vp) 1597df8bae1dSRodney W. Grimes register struct vnode *vp; 1598df8bae1dSRodney W. Grimes { 15998293f20aSTor Egge int s; 1600df8bae1dSRodney W. Grimes 16018293f20aSTor Egge s = splbio(); 1602df8bae1dSRodney W. Grimes if (vp->v_holdcnt <= 0) 1603b1897c19SJulian Elischer panic("vdrop: holdcnt"); 1604df8bae1dSRodney W. Grimes vp->v_holdcnt--; 1605a051452aSPoul-Henning Kamp if (VSHOULDFREE(vp)) 1606a051452aSPoul-Henning Kamp vfree(vp); 16078293f20aSTor Egge splx(s); 1608df8bae1dSRodney W. Grimes } 1609df8bae1dSRodney W. Grimes 1610df8bae1dSRodney W. Grimes /* 1611df8bae1dSRodney W. Grimes * Remove any vnodes in the vnode table belonging to mount point mp. 1612df8bae1dSRodney W. Grimes * 1613df8bae1dSRodney W. Grimes * If MNT_NOFORCE is specified, there should not be any active ones, 1614df8bae1dSRodney W. Grimes * return error if any are found (nb: this is a user error, not a 1615df8bae1dSRodney W. Grimes * system error). If MNT_FORCE is specified, detach any active vnodes 1616df8bae1dSRodney W. Grimes * that are found. 1617df8bae1dSRodney W. Grimes */ 1618df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC 161927a0b398SPoul-Henning Kamp static int busyprt = 0; /* print out busy vnodes */ 16200f1adf65SBruce Evans SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1621df8bae1dSRodney W. Grimes #endif 1622df8bae1dSRodney W. Grimes 162326f9a767SRodney W. Grimes int 1624df8bae1dSRodney W. Grimes vflush(mp, skipvp, flags) 1625df8bae1dSRodney W. Grimes struct mount *mp; 1626df8bae1dSRodney W. Grimes struct vnode *skipvp; 1627df8bae1dSRodney W. Grimes int flags; 1628df8bae1dSRodney W. Grimes { 1629996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1630996c772fSJohn Dyson struct vnode *vp, *nvp; 1631df8bae1dSRodney W. Grimes int busy = 0; 1632df8bae1dSRodney W. Grimes 1633996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1634df8bae1dSRodney W. Grimes loop: 16351b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 16363d2a8cf3SDavid Greenman /* 16373d2a8cf3SDavid Greenman * Make sure this vnode wasn't reclaimed in getnewvnode(). 16383d2a8cf3SDavid Greenman * Start over if it has (it won't be on the list anymore). 16393d2a8cf3SDavid Greenman */ 1640df8bae1dSRodney W. Grimes if (vp->v_mount != mp) 1641df8bae1dSRodney W. Grimes goto loop; 16421b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 1643df8bae1dSRodney W. Grimes /* 1644df8bae1dSRodney W. Grimes * Skip over a selected vnode. 1645df8bae1dSRodney W. Grimes */ 1646df8bae1dSRodney W. Grimes if (vp == skipvp) 1647df8bae1dSRodney W. Grimes continue; 1648996c772fSJohn Dyson 1649a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1650df8bae1dSRodney W. Grimes /* 1651df8bae1dSRodney W. Grimes * Skip over a vnodes marked VSYSTEM. 1652df8bae1dSRodney W. Grimes */ 1653996c772fSJohn Dyson if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1654a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1655df8bae1dSRodney W. Grimes continue; 1656996c772fSJohn Dyson } 1657df8bae1dSRodney W. Grimes /* 16580d94caffSDavid Greenman * If WRITECLOSE is set, only flush out regular file vnodes 16590d94caffSDavid Greenman * open for writing. 1660df8bae1dSRodney W. Grimes */ 1661df8bae1dSRodney W. Grimes if ((flags & WRITECLOSE) && 1662996c772fSJohn Dyson (vp->v_writecount == 0 || vp->v_type != VREG)) { 1663a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1664df8bae1dSRodney W. Grimes continue; 1665996c772fSJohn Dyson } 16666476c0d2SJohn Dyson 1667df8bae1dSRodney W. Grimes /* 16680d94caffSDavid Greenman * With v_usecount == 0, all we need to do is clear out the 16690d94caffSDavid Greenman * vnode data structures and we are done. 1670df8bae1dSRodney W. Grimes */ 1671df8bae1dSRodney W. Grimes if (vp->v_usecount == 0) { 1672996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1673996c772fSJohn Dyson vgonel(vp, p); 1674996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1675df8bae1dSRodney W. Grimes continue; 1676df8bae1dSRodney W. Grimes } 1677ad980522SJohn Dyson 1678df8bae1dSRodney W. Grimes /* 16790d94caffSDavid Greenman * If FORCECLOSE is set, forcibly close the vnode. For block 16800d94caffSDavid Greenman * or character devices, revert to an anonymous device. For 16810d94caffSDavid Greenman * all other files, just kill them. 1682df8bae1dSRodney W. Grimes */ 1683df8bae1dSRodney W. Grimes if (flags & FORCECLOSE) { 1684996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1685df8bae1dSRodney W. Grimes if (vp->v_type != VBLK && vp->v_type != VCHR) { 1686996c772fSJohn Dyson vgonel(vp, p); 1687df8bae1dSRodney W. Grimes } else { 1688996c772fSJohn Dyson vclean(vp, 0, p); 1689df8bae1dSRodney W. Grimes vp->v_op = spec_vnodeop_p; 1690df8bae1dSRodney W. Grimes insmntque(vp, (struct mount *) 0); 1691df8bae1dSRodney W. Grimes } 1692996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1693df8bae1dSRodney W. Grimes continue; 1694df8bae1dSRodney W. Grimes } 1695df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC 1696df8bae1dSRodney W. Grimes if (busyprt) 1697df8bae1dSRodney W. Grimes vprint("vflush: busy vnode", vp); 1698df8bae1dSRodney W. Grimes #endif 1699a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1700df8bae1dSRodney W. Grimes busy++; 1701df8bae1dSRodney W. Grimes } 1702996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1703df8bae1dSRodney W. Grimes if (busy) 1704df8bae1dSRodney W. Grimes return (EBUSY); 1705df8bae1dSRodney W. Grimes return (0); 1706df8bae1dSRodney W. Grimes } 1707df8bae1dSRodney W. Grimes 1708df8bae1dSRodney W. Grimes /* 1709df8bae1dSRodney W. Grimes * Disassociate the underlying file system from a vnode. 1710df8bae1dSRodney W. Grimes */ 1711996c772fSJohn Dyson static void 1712514ede09SBruce Evans vclean(vp, flags, p) 1713514ede09SBruce Evans struct vnode *vp; 1714514ede09SBruce Evans int flags; 1715514ede09SBruce Evans struct proc *p; 1716df8bae1dSRodney W. Grimes { 171795e5e988SJohn Dyson int active; 1718df8bae1dSRodney W. Grimes 1719df8bae1dSRodney W. Grimes /* 17200d94caffSDavid Greenman * Check to see if the vnode is in use. If so we have to reference it 17210d94caffSDavid Greenman * before we clean it out so that its count cannot fall to zero and 17220d94caffSDavid Greenman * generate a race against ourselves to recycle it. 1723df8bae1dSRodney W. Grimes */ 1724bb56ec4aSPoul-Henning Kamp if ((active = vp->v_usecount)) 1725996c772fSJohn Dyson vp->v_usecount++; 172695e5e988SJohn Dyson 1727df8bae1dSRodney W. Grimes /* 17280d94caffSDavid Greenman * Prevent the vnode from being recycled or brought into use while we 17290d94caffSDavid Greenman * clean it out. 1730df8bae1dSRodney W. Grimes */ 1731df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) 1732df8bae1dSRodney W. Grimes panic("vclean: deadlock"); 1733df8bae1dSRodney W. Grimes vp->v_flag |= VXLOCK; 1734df8bae1dSRodney W. Grimes /* 1735996c772fSJohn Dyson * Even if the count is zero, the VOP_INACTIVE routine may still 1736996c772fSJohn Dyson * have the object locked while it cleans it out. The VOP_LOCK 1737996c772fSJohn Dyson * ensures that the VOP_INACTIVE routine is done with its work. 1738996c772fSJohn Dyson * For active vnodes, it ensures that no other activity can 1739996c772fSJohn Dyson * occur while the underlying object is being cleaned out. 1740996c772fSJohn Dyson */ 1741996c772fSJohn Dyson VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 17423c631446SJohn Dyson 1743996c772fSJohn Dyson /* 1744df8bae1dSRodney W. Grimes * Clean out any buffers associated with the vnode. 174537642196SKirk McKusick * If the flush fails, just toss the buffers. 1746df8bae1dSRodney W. Grimes */ 174737642196SKirk McKusick if (flags & DOCLOSE) { 1748f2a2857bSKirk McKusick if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) 17499b971133SKirk McKusick (void) vn_write_suspend_wait(vp, NULL, V_WAIT); 175037642196SKirk McKusick if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) 175137642196SKirk McKusick vinvalbuf(vp, 0, NOCRED, p, 0, 0); 175237642196SKirk McKusick } 175337642196SKirk McKusick 17549ff5ce6bSBoris Popov VOP_DESTROYVOBJECT(vp); 17553c631446SJohn Dyson 1756df8bae1dSRodney W. Grimes /* 1757996c772fSJohn Dyson * If purging an active vnode, it must be closed and 1758996c772fSJohn Dyson * deactivated before being reclaimed. Note that the 1759996c772fSJohn Dyson * VOP_INACTIVE will unlock the vnode. 1760df8bae1dSRodney W. Grimes */ 1761df8bae1dSRodney W. Grimes if (active) { 1762df8bae1dSRodney W. Grimes if (flags & DOCLOSE) 17634d948813SBruce Evans VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1764996c772fSJohn Dyson VOP_INACTIVE(vp, p); 1765996c772fSJohn Dyson } else { 1766996c772fSJohn Dyson /* 1767996c772fSJohn Dyson * Any other processes trying to obtain this lock must first 1768996c772fSJohn Dyson * wait for VXLOCK to clear, then call the new lock operation. 1769996c772fSJohn Dyson */ 1770996c772fSJohn Dyson VOP_UNLOCK(vp, 0, p); 1771df8bae1dSRodney W. Grimes } 1772df8bae1dSRodney W. Grimes /* 1773df8bae1dSRodney W. Grimes * Reclaim the vnode. 1774df8bae1dSRodney W. Grimes */ 1775996c772fSJohn Dyson if (VOP_RECLAIM(vp, p)) 1776df8bae1dSRodney W. Grimes panic("vclean: cannot reclaim"); 177764d3c7e3SJohn Dyson 17789a2b8fcaSRobert Watson if (active) { 17799a2b8fcaSRobert Watson /* 17809a2b8fcaSRobert Watson * Inline copy of vrele() since VOP_INACTIVE 17819a2b8fcaSRobert Watson * has already been called. 17829a2b8fcaSRobert Watson */ 1783a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 17849a2b8fcaSRobert Watson if (--vp->v_usecount <= 0) { 17859a2b8fcaSRobert Watson #ifdef DIAGNOSTIC 17869a2b8fcaSRobert Watson if (vp->v_usecount < 0 || vp->v_writecount != 0) { 17879a2b8fcaSRobert Watson vprint("vclean: bad ref count", vp); 17889a2b8fcaSRobert Watson panic("vclean: ref cnt"); 17899a2b8fcaSRobert Watson } 17909a2b8fcaSRobert Watson #endif 17919a2b8fcaSRobert Watson vfree(vp); 17929a2b8fcaSRobert Watson } 1793a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 17949a2b8fcaSRobert Watson } 179564d3c7e3SJohn Dyson 1796996c772fSJohn Dyson cache_purge(vp); 1797a18b1f1dSJason Evans if (vp->v_vnlock) { 1798a18b1f1dSJason Evans lockdestroy(vp->v_vnlock); 1799996c772fSJohn Dyson vp->v_vnlock = NULL; 1800a18b1f1dSJason Evans } 1801a18b1f1dSJason Evans lockdestroy(&vp->v_lock); 1802df8bae1dSRodney W. Grimes 180364d3c7e3SJohn Dyson if (VSHOULDFREE(vp)) 180464d3c7e3SJohn Dyson vfree(vp); 180564d3c7e3SJohn Dyson 1806df8bae1dSRodney W. Grimes /* 1807df8bae1dSRodney W. Grimes * Done with purge, notify sleepers of the grim news. 1808df8bae1dSRodney W. Grimes */ 1809df8bae1dSRodney W. Grimes vp->v_op = dead_vnodeop_p; 18101cbbd625SGarrett Wollman vn_pollgone(vp); 1811df8bae1dSRodney W. Grimes vp->v_tag = VT_NON; 1812df8bae1dSRodney W. Grimes vp->v_flag &= ~VXLOCK; 1813df8bae1dSRodney W. Grimes if (vp->v_flag & VXWANT) { 1814df8bae1dSRodney W. Grimes vp->v_flag &= ~VXWANT; 1815df8bae1dSRodney W. Grimes wakeup((caddr_t) vp); 1816df8bae1dSRodney W. Grimes } 1817df8bae1dSRodney W. Grimes } 1818df8bae1dSRodney W. Grimes 1819df8bae1dSRodney W. Grimes /* 1820df8bae1dSRodney W. Grimes * Eliminate all activity associated with the requested vnode 1821df8bae1dSRodney W. Grimes * and with all vnodes aliased to the requested vnode. 1822df8bae1dSRodney W. Grimes */ 1823996c772fSJohn Dyson int 1824996c772fSJohn Dyson vop_revoke(ap) 1825996c772fSJohn Dyson struct vop_revoke_args /* { 1826996c772fSJohn Dyson struct vnode *a_vp; 1827996c772fSJohn Dyson int a_flags; 1828996c772fSJohn Dyson } */ *ap; 1829df8bae1dSRodney W. Grimes { 1830996c772fSJohn Dyson struct vnode *vp, *vq; 1831dbafb366SPoul-Henning Kamp dev_t dev; 1832996c772fSJohn Dyson 18335526d2d9SEivind Eklund KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1834996c772fSJohn Dyson 1835996c772fSJohn Dyson vp = ap->a_vp; 1836df8bae1dSRodney W. Grimes /* 1837996c772fSJohn Dyson * If a vgone (or vclean) is already in progress, 1838996c772fSJohn Dyson * wait until it is done and return. 1839df8bae1dSRodney W. Grimes */ 1840df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) { 1841df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1842a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1843996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1844996c772fSJohn Dyson return (0); 1845df8bae1dSRodney W. Grimes } 1846dbafb366SPoul-Henning Kamp dev = vp->v_rdev; 1847dbafb366SPoul-Henning Kamp for (;;) { 1848996c772fSJohn Dyson simple_lock(&spechash_slock); 1849dbafb366SPoul-Henning Kamp vq = SLIST_FIRST(&dev->si_hlist); 1850996c772fSJohn Dyson simple_unlock(&spechash_slock); 1851dbafb366SPoul-Henning Kamp if (!vq) 1852df8bae1dSRodney W. Grimes break; 1853dbafb366SPoul-Henning Kamp vgone(vq); 1854df8bae1dSRodney W. Grimes } 1855996c772fSJohn Dyson return (0); 1856996c772fSJohn Dyson } 1857996c772fSJohn Dyson 1858996c772fSJohn Dyson /* 1859996c772fSJohn Dyson * Recycle an unused vnode to the front of the free list. 1860996c772fSJohn Dyson * Release the passed interlock if the vnode will be recycled. 1861996c772fSJohn Dyson */ 1862996c772fSJohn Dyson int 1863996c772fSJohn Dyson vrecycle(vp, inter_lkp, p) 1864996c772fSJohn Dyson struct vnode *vp; 1865996c772fSJohn Dyson struct simplelock *inter_lkp; 1866996c772fSJohn Dyson struct proc *p; 1867996c772fSJohn Dyson { 1868996c772fSJohn Dyson 1869a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1870996c772fSJohn Dyson if (vp->v_usecount == 0) { 1871996c772fSJohn Dyson if (inter_lkp) { 1872996c772fSJohn Dyson simple_unlock(inter_lkp); 1873996c772fSJohn Dyson } 1874996c772fSJohn Dyson vgonel(vp, p); 1875996c772fSJohn Dyson return (1); 1876996c772fSJohn Dyson } 1877a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1878996c772fSJohn Dyson return (0); 1879df8bae1dSRodney W. Grimes } 1880df8bae1dSRodney W. Grimes 1881df8bae1dSRodney W. Grimes /* 1882df8bae1dSRodney W. Grimes * Eliminate all activity associated with a vnode 1883df8bae1dSRodney W. Grimes * in preparation for reuse. 1884df8bae1dSRodney W. Grimes */ 188526f9a767SRodney W. Grimes void 188626f9a767SRodney W. Grimes vgone(vp) 1887df8bae1dSRodney W. Grimes register struct vnode *vp; 1888df8bae1dSRodney W. Grimes { 1889996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1890996c772fSJohn Dyson 1891a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 1892996c772fSJohn Dyson vgonel(vp, p); 1893996c772fSJohn Dyson } 1894996c772fSJohn Dyson 1895996c772fSJohn Dyson /* 1896996c772fSJohn Dyson * vgone, with the vp interlock held. 1897996c772fSJohn Dyson */ 1898b7a5f3caSRobert Watson void 1899996c772fSJohn Dyson vgonel(vp, p) 1900996c772fSJohn Dyson struct vnode *vp; 1901996c772fSJohn Dyson struct proc *p; 1902996c772fSJohn Dyson { 1903925a3a41SJohn Dyson int s; 1904df8bae1dSRodney W. Grimes 1905df8bae1dSRodney W. Grimes /* 1906996c772fSJohn Dyson * If a vgone (or vclean) is already in progress, 1907996c772fSJohn Dyson * wait until it is done and return. 1908df8bae1dSRodney W. Grimes */ 1909df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) { 1910df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1911a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1912996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vgone", 0); 1913df8bae1dSRodney W. Grimes return; 1914df8bae1dSRodney W. Grimes } 1915ad980522SJohn Dyson 1916df8bae1dSRodney W. Grimes /* 1917df8bae1dSRodney W. Grimes * Clean out the filesystem specific data. 1918df8bae1dSRodney W. Grimes */ 1919996c772fSJohn Dyson vclean(vp, DOCLOSE, p); 1920a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 19212be70f79SJohn Dyson 1922df8bae1dSRodney W. Grimes /* 1923df8bae1dSRodney W. Grimes * Delete from old mount point vnode list, if on one. 1924df8bae1dSRodney W. Grimes */ 1925996c772fSJohn Dyson if (vp->v_mount != NULL) 1926996c772fSJohn Dyson insmntque(vp, (struct mount *)0); 1927df8bae1dSRodney W. Grimes /* 1928996c772fSJohn Dyson * If special device, remove it from special device alias list 1929996c772fSJohn Dyson * if it is on one. 1930df8bae1dSRodney W. Grimes */ 1931dbafb366SPoul-Henning Kamp if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1932996c772fSJohn Dyson simple_lock(&spechash_slock); 1933b0d17ba6SPoul-Henning Kamp SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext); 1934d137acccSPoul-Henning Kamp freedev(vp->v_rdev); 1935996c772fSJohn Dyson simple_unlock(&spechash_slock); 19364d4f9323SPoul-Henning Kamp vp->v_rdev = NULL; 1937df8bae1dSRodney W. Grimes } 1938996c772fSJohn Dyson 1939df8bae1dSRodney W. Grimes /* 1940996c772fSJohn Dyson * If it is on the freelist and not already at the head, 1941c904bbbdSKirk McKusick * move it to the head of the list. The test of the 1942c904bbbdSKirk McKusick * VDOOMED flag and the reference count of zero is because 1943996c772fSJohn Dyson * it will be removed from the free list by getnewvnode, 1944996c772fSJohn Dyson * but will not have its reference count incremented until 1945996c772fSJohn Dyson * after calling vgone. If the reference count were 1946996c772fSJohn Dyson * incremented first, vgone would (incorrectly) try to 1947996c772fSJohn Dyson * close the previous instance of the underlying object. 1948df8bae1dSRodney W. Grimes */ 1949a051452aSPoul-Henning Kamp if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1950925a3a41SJohn Dyson s = splbio(); 1951996c772fSJohn Dyson simple_lock(&vnode_free_list_slock); 1952c904bbbdSKirk McKusick if (vp->v_flag & VFREE) 1953df8bae1dSRodney W. Grimes TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1954c904bbbdSKirk McKusick else 1955d09a16d8STor Egge freevnodes++; 1956925a3a41SJohn Dyson vp->v_flag |= VFREE; 1957df8bae1dSRodney W. Grimes TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1958996c772fSJohn Dyson simple_unlock(&vnode_free_list_slock); 1959925a3a41SJohn Dyson splx(s); 19600082fb46SJordan K. Hubbard } 1961996c772fSJohn Dyson 1962df8bae1dSRodney W. Grimes vp->v_type = VBAD; 1963a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 1964df8bae1dSRodney W. Grimes } 1965df8bae1dSRodney W. Grimes 1966df8bae1dSRodney W. Grimes /* 1967df8bae1dSRodney W. Grimes * Lookup a vnode by device number. 1968df8bae1dSRodney W. Grimes */ 196926f9a767SRodney W. Grimes int 1970df8bae1dSRodney W. Grimes vfinddev(dev, type, vpp) 1971df8bae1dSRodney W. Grimes dev_t dev; 1972df8bae1dSRodney W. Grimes enum vtype type; 1973df8bae1dSRodney W. Grimes struct vnode **vpp; 1974df8bae1dSRodney W. Grimes { 1975dbafb366SPoul-Henning Kamp struct vnode *vp; 1976df8bae1dSRodney W. Grimes 1977b98afd0dSBruce Evans simple_lock(&spechash_slock); 1978dbafb366SPoul-Henning Kamp SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1979dbafb366SPoul-Henning Kamp if (type == vp->v_type) { 1980df8bae1dSRodney W. Grimes *vpp = vp; 1981dbafb366SPoul-Henning Kamp simple_unlock(&spechash_slock); 1982dbafb366SPoul-Henning Kamp return (1); 1983dbafb366SPoul-Henning Kamp } 1984df8bae1dSRodney W. Grimes } 1985b98afd0dSBruce Evans simple_unlock(&spechash_slock); 1986dbafb366SPoul-Henning Kamp return (0); 1987df8bae1dSRodney W. Grimes } 1988df8bae1dSRodney W. Grimes 1989df8bae1dSRodney W. Grimes /* 1990df8bae1dSRodney W. Grimes * Calculate the total number of references to a special device. 1991df8bae1dSRodney W. Grimes */ 199226f9a767SRodney W. Grimes int 1993df8bae1dSRodney W. Grimes vcount(vp) 1994dbafb366SPoul-Henning Kamp struct vnode *vp; 1995df8bae1dSRodney W. Grimes { 199696267288SPoul-Henning Kamp struct vnode *vq; 1997df8bae1dSRodney W. Grimes int count; 1998df8bae1dSRodney W. Grimes 1999dbafb366SPoul-Henning Kamp count = 0; 2000b98afd0dSBruce Evans simple_lock(&spechash_slock); 2001b0d17ba6SPoul-Henning Kamp SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext) 2002df8bae1dSRodney W. Grimes count += vq->v_usecount; 2003b98afd0dSBruce Evans simple_unlock(&spechash_slock); 2004df8bae1dSRodney W. Grimes return (count); 2005df8bae1dSRodney W. Grimes } 2006dbafb366SPoul-Henning Kamp 20077fab7799SPeter Wemm /* 2008e8359a57SSøren Schmidt * Same as above, but using the dev_t as argument 2009e8359a57SSøren Schmidt */ 2010e8359a57SSøren Schmidt int 2011e8359a57SSøren Schmidt count_dev(dev) 2012e8359a57SSøren Schmidt dev_t dev; 2013e8359a57SSøren Schmidt { 2014e8359a57SSøren Schmidt struct vnode *vp; 2015e8359a57SSøren Schmidt 2016e8359a57SSøren Schmidt vp = SLIST_FIRST(&dev->si_hlist); 2017e8359a57SSøren Schmidt if (vp == NULL) 2018e8359a57SSøren Schmidt return (0); 2019e8359a57SSøren Schmidt return(vcount(vp)); 2020e8359a57SSøren Schmidt } 2021e8359a57SSøren Schmidt 2022e8359a57SSøren Schmidt /* 2023df8bae1dSRodney W. Grimes * Print out a description of a vnode. 2024df8bae1dSRodney W. Grimes */ 2025df8bae1dSRodney W. Grimes static char *typename[] = 2026df8bae1dSRodney W. Grimes {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 2027df8bae1dSRodney W. Grimes 202826f9a767SRodney W. Grimes void 2029df8bae1dSRodney W. Grimes vprint(label, vp) 2030df8bae1dSRodney W. Grimes char *label; 2031dbafb366SPoul-Henning Kamp struct vnode *vp; 2032df8bae1dSRodney W. Grimes { 20332127f260SArchie Cobbs char buf[96]; 2034df8bae1dSRodney W. Grimes 2035df8bae1dSRodney W. Grimes if (label != NULL) 2036ac1e407bSBruce Evans printf("%s: %p: ", label, (void *)vp); 2037de15ef6aSDoug Rabson else 2038ac1e407bSBruce Evans printf("%p: ", (void *)vp); 2039ac1e407bSBruce Evans printf("type %s, usecount %d, writecount %d, refcount %d,", 2040df8bae1dSRodney W. Grimes typename[vp->v_type], vp->v_usecount, vp->v_writecount, 2041df8bae1dSRodney W. Grimes vp->v_holdcnt); 2042df8bae1dSRodney W. Grimes buf[0] = '\0'; 2043df8bae1dSRodney W. Grimes if (vp->v_flag & VROOT) 2044df8bae1dSRodney W. Grimes strcat(buf, "|VROOT"); 2045df8bae1dSRodney W. Grimes if (vp->v_flag & VTEXT) 2046df8bae1dSRodney W. Grimes strcat(buf, "|VTEXT"); 2047df8bae1dSRodney W. Grimes if (vp->v_flag & VSYSTEM) 2048df8bae1dSRodney W. Grimes strcat(buf, "|VSYSTEM"); 2049df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) 2050df8bae1dSRodney W. Grimes strcat(buf, "|VXLOCK"); 2051df8bae1dSRodney W. Grimes if (vp->v_flag & VXWANT) 2052df8bae1dSRodney W. Grimes strcat(buf, "|VXWANT"); 2053df8bae1dSRodney W. Grimes if (vp->v_flag & VBWAIT) 2054df8bae1dSRodney W. Grimes strcat(buf, "|VBWAIT"); 2055a051452aSPoul-Henning Kamp if (vp->v_flag & VDOOMED) 2056a051452aSPoul-Henning Kamp strcat(buf, "|VDOOMED"); 2057a051452aSPoul-Henning Kamp if (vp->v_flag & VFREE) 2058a051452aSPoul-Henning Kamp strcat(buf, "|VFREE"); 205995e5e988SJohn Dyson if (vp->v_flag & VOBJBUF) 206095e5e988SJohn Dyson strcat(buf, "|VOBJBUF"); 2061df8bae1dSRodney W. Grimes if (buf[0] != '\0') 2062df8bae1dSRodney W. Grimes printf(" flags (%s)", &buf[1]); 2063df8bae1dSRodney W. Grimes if (vp->v_data == NULL) { 2064df8bae1dSRodney W. Grimes printf("\n"); 2065df8bae1dSRodney W. Grimes } else { 2066df8bae1dSRodney W. Grimes printf("\n\t"); 2067df8bae1dSRodney W. Grimes VOP_PRINT(vp); 2068df8bae1dSRodney W. Grimes } 2069df8bae1dSRodney W. Grimes } 2070df8bae1dSRodney W. Grimes 20711a477b0cSDavid Greenman #ifdef DDB 2072f5ef029eSPoul-Henning Kamp #include <ddb/ddb.h> 2073df8bae1dSRodney W. Grimes /* 2074df8bae1dSRodney W. Grimes * List all of the locked vnodes in the system. 2075df8bae1dSRodney W. Grimes * Called when debugging the kernel. 2076df8bae1dSRodney W. Grimes */ 2077f5ef029eSPoul-Henning Kamp DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2078df8bae1dSRodney W. Grimes { 2079c35e283aSBruce Evans struct proc *p = curproc; /* XXX */ 2080c35e283aSBruce Evans struct mount *mp, *nmp; 2081c35e283aSBruce Evans struct vnode *vp; 2082df8bae1dSRodney W. Grimes 2083df8bae1dSRodney W. Grimes printf("Locked vnodes\n"); 2084a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 20850429e37aSPoul-Henning Kamp for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2086a18b1f1dSJason Evans if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) { 20870429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2088c35e283aSBruce Evans continue; 2089c35e283aSBruce Evans } 20901b727751SPoul-Henning Kamp LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 20916bdfe06aSEivind Eklund if (VOP_ISLOCKED(vp, NULL)) 2092df8bae1dSRodney W. Grimes vprint((char *)0, vp); 2093df8bae1dSRodney W. Grimes } 2094a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 20950429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2096c35e283aSBruce Evans vfs_unbusy(mp, p); 2097c35e283aSBruce Evans } 2098a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 2099df8bae1dSRodney W. Grimes } 2100df8bae1dSRodney W. Grimes #endif 2101df8bae1dSRodney W. Grimes 21023a76a594SBruce Evans /* 21033a76a594SBruce Evans * Top level filesystem related information gathering. 21043a76a594SBruce Evans */ 210582d9ae4eSPoul-Henning Kamp static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS)); 21063a76a594SBruce Evans 21074a8b9660SBruce Evans static int 210882d9ae4eSPoul-Henning Kamp vfs_sysctl(SYSCTL_HANDLER_ARGS) 2109a896f025SBruce Evans { 21104a8b9660SBruce Evans int *name = (int *)arg1 - 1; /* XXX */ 21114a8b9660SBruce Evans u_int namelen = arg2 + 1; /* XXX */ 2112a896f025SBruce Evans struct vfsconf *vfsp; 2113a896f025SBruce Evans 2114f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2) 21153a76a594SBruce Evans /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 21164a8b9660SBruce Evans if (namelen == 1) 21173a76a594SBruce Evans return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2118dc91a89eSBruce Evans #endif 2119a896f025SBruce Evans 2120a863c0fbSEivind Eklund /* XXX the below code does not compile; vfs_sysctl does not exist. */ 21214a8b9660SBruce Evans #ifdef notyet 21223a76a594SBruce Evans /* all sysctl names at this level are at least name and field */ 21233a76a594SBruce Evans if (namelen < 2) 21243a76a594SBruce Evans return (ENOTDIR); /* overloaded */ 21253a76a594SBruce Evans if (name[0] != VFS_GENERIC) { 21263a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 21273a76a594SBruce Evans if (vfsp->vfc_typenum == name[0]) 21283a76a594SBruce Evans break; 21293a76a594SBruce Evans if (vfsp == NULL) 21303a76a594SBruce Evans return (EOPNOTSUPP); 21313a76a594SBruce Evans return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 21323a76a594SBruce Evans oldp, oldlenp, newp, newlen, p)); 21333a76a594SBruce Evans } 21344a8b9660SBruce Evans #endif 21353a76a594SBruce Evans switch (name[1]) { 21363a76a594SBruce Evans case VFS_MAXTYPENUM: 21373a76a594SBruce Evans if (namelen != 2) 21383a76a594SBruce Evans return (ENOTDIR); 21393a76a594SBruce Evans return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 21403a76a594SBruce Evans case VFS_CONF: 21413a76a594SBruce Evans if (namelen != 3) 21423a76a594SBruce Evans return (ENOTDIR); /* overloaded */ 21433a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 21443a76a594SBruce Evans if (vfsp->vfc_typenum == name[2]) 21453a76a594SBruce Evans break; 21463a76a594SBruce Evans if (vfsp == NULL) 21473a76a594SBruce Evans return (EOPNOTSUPP); 21483a76a594SBruce Evans return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 21493a76a594SBruce Evans } 21503a76a594SBruce Evans return (EOPNOTSUPP); 21513a76a594SBruce Evans } 21523a76a594SBruce Evans 21534a8b9660SBruce Evans SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 21544a8b9660SBruce Evans "Generic filesystem"); 21554a8b9660SBruce Evans 2156f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2) 2157a896f025SBruce Evans 2158a896f025SBruce Evans static int 215982d9ae4eSPoul-Henning Kamp sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) 2160a896f025SBruce Evans { 2161a896f025SBruce Evans int error; 2162a896f025SBruce Evans struct vfsconf *vfsp; 2163a896f025SBruce Evans struct ovfsconf ovfs; 21643a76a594SBruce Evans 21653a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2166a896f025SBruce Evans ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2167a896f025SBruce Evans strcpy(ovfs.vfc_name, vfsp->vfc_name); 2168a896f025SBruce Evans ovfs.vfc_index = vfsp->vfc_typenum; 2169a896f025SBruce Evans ovfs.vfc_refcount = vfsp->vfc_refcount; 2170a896f025SBruce Evans ovfs.vfc_flags = vfsp->vfc_flags; 2171a896f025SBruce Evans error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2172a896f025SBruce Evans if (error) 2173a896f025SBruce Evans return error; 2174a896f025SBruce Evans } 2175a896f025SBruce Evans return 0; 2176a896f025SBruce Evans } 2177a896f025SBruce Evans 2178f5ce6752SBruce Evans #endif /* 1 || COMPAT_PRELITE2 */ 2179a896f025SBruce Evans 2180453aaa0dSEivind Eklund #if COMPILING_LINT 2181df8bae1dSRodney W. Grimes #define KINFO_VNODESLOP 10 2182df8bae1dSRodney W. Grimes /* 2183df8bae1dSRodney W. Grimes * Dump vnode list (via sysctl). 2184df8bae1dSRodney W. Grimes * Copyout address of vnode followed by vnode. 2185df8bae1dSRodney W. Grimes */ 2186df8bae1dSRodney W. Grimes /* ARGSUSED */ 21874b2af45fSPoul-Henning Kamp static int 218882d9ae4eSPoul-Henning Kamp sysctl_vnode(SYSCTL_HANDLER_ARGS) 2189df8bae1dSRodney W. Grimes { 2190996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 2191c35e283aSBruce Evans struct mount *mp, *nmp; 2192c35e283aSBruce Evans struct vnode *nvp, *vp; 2193df8bae1dSRodney W. Grimes int error; 2194df8bae1dSRodney W. Grimes 2195df8bae1dSRodney W. Grimes #define VPTRSZ sizeof (struct vnode *) 2196df8bae1dSRodney W. Grimes #define VNODESZ sizeof (struct vnode) 21974b2af45fSPoul-Henning Kamp 21984b2af45fSPoul-Henning Kamp req->lock = 0; 21992d0b1d70SPoul-Henning Kamp if (!req->oldptr) /* Make an estimate */ 22004b2af45fSPoul-Henning Kamp return (SYSCTL_OUT(req, 0, 22014b2af45fSPoul-Henning Kamp (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2202df8bae1dSRodney W. Grimes 2203a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 22040429e37aSPoul-Henning Kamp for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2205a18b1f1dSJason Evans if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) { 22060429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2207df8bae1dSRodney W. Grimes continue; 2208c35e283aSBruce Evans } 2209df8bae1dSRodney W. Grimes again: 2210c35e283aSBruce Evans simple_lock(&mntvnode_slock); 22111b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2212df8bae1dSRodney W. Grimes vp != NULL; 2213c35e283aSBruce Evans vp = nvp) { 2214df8bae1dSRodney W. Grimes /* 2215c35e283aSBruce Evans * Check that the vp is still associated with 2216c35e283aSBruce Evans * this filesystem. RACE: could have been 2217c35e283aSBruce Evans * recycled onto the same filesystem. 2218df8bae1dSRodney W. Grimes */ 2219df8bae1dSRodney W. Grimes if (vp->v_mount != mp) { 2220c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 2221df8bae1dSRodney W. Grimes goto again; 2222df8bae1dSRodney W. Grimes } 22231b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 2224c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 22254b2af45fSPoul-Henning Kamp if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2226c35e283aSBruce Evans (error = SYSCTL_OUT(req, vp, VNODESZ))) 2227df8bae1dSRodney W. Grimes return (error); 2228c35e283aSBruce Evans simple_lock(&mntvnode_slock); 2229e887950aSBruce Evans } 2230c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 2231a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 22320429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2233996c772fSJohn Dyson vfs_unbusy(mp, p); 2234df8bae1dSRodney W. Grimes } 2235a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 2236df8bae1dSRodney W. Grimes 2237df8bae1dSRodney W. Grimes return (0); 2238df8bae1dSRodney W. Grimes } 2239df8bae1dSRodney W. Grimes 22402e58c0f8SDavid Greenman /* 22412e58c0f8SDavid Greenman * XXX 22422e58c0f8SDavid Greenman * Exporting the vnode list on large systems causes them to crash. 22432e58c0f8SDavid Greenman * Exporting the vnode list on medium systems causes sysctl to coredump. 22442e58c0f8SDavid Greenman */ 224565d0bc13SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 224665d0bc13SPoul-Henning Kamp 0, 0, sysctl_vnode, "S,vnode", ""); 22472e58c0f8SDavid Greenman #endif 22484b2af45fSPoul-Henning Kamp 2249df8bae1dSRodney W. Grimes /* 2250df8bae1dSRodney W. Grimes * Check to see if a filesystem is mounted on a block device. 2251df8bae1dSRodney W. Grimes */ 2252df8bae1dSRodney W. Grimes int 2253df8bae1dSRodney W. Grimes vfs_mountedon(vp) 2254996c772fSJohn Dyson struct vnode *vp; 2255df8bae1dSRodney W. Grimes { 2256df8bae1dSRodney W. Grimes 2257b1897c19SJulian Elischer if (vp->v_specmountpoint != NULL) 2258df8bae1dSRodney W. Grimes return (EBUSY); 2259dbafb366SPoul-Henning Kamp return (0); 2260996c772fSJohn Dyson } 2261996c772fSJohn Dyson 2262996c772fSJohn Dyson /* 2263996c772fSJohn Dyson * Unmount all filesystems. The list is traversed in reverse order 22647c1557c4SBruce Evans * of mounting to avoid dependencies. 2265996c772fSJohn Dyson */ 2266996c772fSJohn Dyson void 2267996c772fSJohn Dyson vfs_unmountall() 2268996c772fSJohn Dyson { 22690429e37aSPoul-Henning Kamp struct mount *mp; 2270cb87a87cSTor Egge struct proc *p; 2271996c772fSJohn Dyson int error; 2272996c772fSJohn Dyson 2273cb87a87cSTor Egge if (curproc != NULL) 2274cb87a87cSTor Egge p = curproc; 2275cb87a87cSTor Egge else 2276cb87a87cSTor Egge p = initproc; /* XXX XXX should this be proc0? */ 22777c1557c4SBruce Evans /* 22787c1557c4SBruce Evans * Since this only runs when rebooting, it is not interlocked. 22797c1557c4SBruce Evans */ 22800429e37aSPoul-Henning Kamp while(!TAILQ_EMPTY(&mountlist)) { 22810429e37aSPoul-Henning Kamp mp = TAILQ_LAST(&mountlist, mntlist); 22827c1557c4SBruce Evans error = dounmount(mp, MNT_FORCE, p); 2283996c772fSJohn Dyson if (error) { 22840429e37aSPoul-Henning Kamp TAILQ_REMOVE(&mountlist, mp, mnt_list); 22857c1557c4SBruce Evans printf("unmount of %s failed (", 22867c1557c4SBruce Evans mp->mnt_stat.f_mntonname); 2287996c772fSJohn Dyson if (error == EBUSY) 2288996c772fSJohn Dyson printf("BUSY)\n"); 2289996c772fSJohn Dyson else 2290996c772fSJohn Dyson printf("%d)\n", error); 22910429e37aSPoul-Henning Kamp } else { 22920429e37aSPoul-Henning Kamp /* The unmount has removed mp from the mountlist */ 2293996c772fSJohn Dyson } 2294996c772fSJohn Dyson } 2295df8bae1dSRodney W. Grimes } 2296df8bae1dSRodney W. Grimes 2297df8bae1dSRodney W. Grimes /* 2298df8bae1dSRodney W. Grimes * Build hash lists of net addresses and hang them off the mount point. 2299df8bae1dSRodney W. Grimes * Called by ufs_mount() to set up the lists of export addresses. 2300df8bae1dSRodney W. Grimes */ 2301df8bae1dSRodney W. Grimes static int 2302514ede09SBruce Evans vfs_hang_addrlist(mp, nep, argp) 2303514ede09SBruce Evans struct mount *mp; 2304514ede09SBruce Evans struct netexport *nep; 2305514ede09SBruce Evans struct export_args *argp; 2306df8bae1dSRodney W. Grimes { 2307df8bae1dSRodney W. Grimes register struct netcred *np; 2308df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2309df8bae1dSRodney W. Grimes register int i; 2310df8bae1dSRodney W. Grimes struct radix_node *rn; 2311df8bae1dSRodney W. Grimes struct sockaddr *saddr, *smask = 0; 2312df8bae1dSRodney W. Grimes struct domain *dom; 2313df8bae1dSRodney W. Grimes int error; 2314df8bae1dSRodney W. Grimes 2315df8bae1dSRodney W. Grimes if (argp->ex_addrlen == 0) { 2316df8bae1dSRodney W. Grimes if (mp->mnt_flag & MNT_DEFEXPORTED) 2317df8bae1dSRodney W. Grimes return (EPERM); 2318df8bae1dSRodney W. Grimes np = &nep->ne_defexported; 2319df8bae1dSRodney W. Grimes np->netc_exflags = argp->ex_flags; 2320df8bae1dSRodney W. Grimes np->netc_anon = argp->ex_anon; 2321df8bae1dSRodney W. Grimes np->netc_anon.cr_ref = 1; 2322df8bae1dSRodney W. Grimes mp->mnt_flag |= MNT_DEFEXPORTED; 2323df8bae1dSRodney W. Grimes return (0); 2324df8bae1dSRodney W. Grimes } 2325df8bae1dSRodney W. Grimes i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2326df8bae1dSRodney W. Grimes np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2327df8bae1dSRodney W. Grimes bzero((caddr_t) np, i); 2328df8bae1dSRodney W. Grimes saddr = (struct sockaddr *) (np + 1); 2329bb56ec4aSPoul-Henning Kamp if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2330df8bae1dSRodney W. Grimes goto out; 2331df8bae1dSRodney W. Grimes if (saddr->sa_len > argp->ex_addrlen) 2332df8bae1dSRodney W. Grimes saddr->sa_len = argp->ex_addrlen; 2333df8bae1dSRodney W. Grimes if (argp->ex_masklen) { 2334df8bae1dSRodney W. Grimes smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 23355f61c81dSPeter Wemm error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2336df8bae1dSRodney W. Grimes if (error) 2337df8bae1dSRodney W. Grimes goto out; 2338df8bae1dSRodney W. Grimes if (smask->sa_len > argp->ex_masklen) 2339df8bae1dSRodney W. Grimes smask->sa_len = argp->ex_masklen; 2340df8bae1dSRodney W. Grimes } 2341df8bae1dSRodney W. Grimes i = saddr->sa_family; 2342df8bae1dSRodney W. Grimes if ((rnh = nep->ne_rtable[i]) == 0) { 2343df8bae1dSRodney W. Grimes /* 23440d94caffSDavid Greenman * Seems silly to initialize every AF when most are not used, 23450d94caffSDavid Greenman * do so on demand here 2346df8bae1dSRodney W. Grimes */ 2347df8bae1dSRodney W. Grimes for (dom = domains; dom; dom = dom->dom_next) 2348df8bae1dSRodney W. Grimes if (dom->dom_family == i && dom->dom_rtattach) { 2349df8bae1dSRodney W. Grimes dom->dom_rtattach((void **) &nep->ne_rtable[i], 2350df8bae1dSRodney W. Grimes dom->dom_rtoffset); 2351df8bae1dSRodney W. Grimes break; 2352df8bae1dSRodney W. Grimes } 2353df8bae1dSRodney W. Grimes if ((rnh = nep->ne_rtable[i]) == 0) { 2354df8bae1dSRodney W. Grimes error = ENOBUFS; 2355df8bae1dSRodney W. Grimes goto out; 2356df8bae1dSRodney W. Grimes } 2357df8bae1dSRodney W. Grimes } 2358df8bae1dSRodney W. Grimes rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2359df8bae1dSRodney W. Grimes np->netc_rnodes); 2360df8bae1dSRodney W. Grimes if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2361df8bae1dSRodney W. Grimes error = EPERM; 2362df8bae1dSRodney W. Grimes goto out; 2363df8bae1dSRodney W. Grimes } 2364df8bae1dSRodney W. Grimes np->netc_exflags = argp->ex_flags; 2365df8bae1dSRodney W. Grimes np->netc_anon = argp->ex_anon; 2366df8bae1dSRodney W. Grimes np->netc_anon.cr_ref = 1; 2367df8bae1dSRodney W. Grimes return (0); 2368df8bae1dSRodney W. Grimes out: 2369df8bae1dSRodney W. Grimes free(np, M_NETADDR); 2370df8bae1dSRodney W. Grimes return (error); 2371df8bae1dSRodney W. Grimes } 2372df8bae1dSRodney W. Grimes 2373a863c0fbSEivind Eklund /* Helper for vfs_free_addrlist. */ 2374df8bae1dSRodney W. Grimes /* ARGSUSED */ 2375df8bae1dSRodney W. Grimes static int 2376514ede09SBruce Evans vfs_free_netcred(rn, w) 2377514ede09SBruce Evans struct radix_node *rn; 2378514ede09SBruce Evans void *w; 2379df8bae1dSRodney W. Grimes { 2380df8bae1dSRodney W. Grimes register struct radix_node_head *rnh = (struct radix_node_head *) w; 2381df8bae1dSRodney W. Grimes 2382df8bae1dSRodney W. Grimes (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2383df8bae1dSRodney W. Grimes free((caddr_t) rn, M_NETADDR); 2384df8bae1dSRodney W. Grimes return (0); 2385df8bae1dSRodney W. Grimes } 2386df8bae1dSRodney W. Grimes 2387df8bae1dSRodney W. Grimes /* 2388df8bae1dSRodney W. Grimes * Free the net address hash lists that are hanging off the mount points. 2389df8bae1dSRodney W. Grimes */ 2390df8bae1dSRodney W. Grimes static void 2391514ede09SBruce Evans vfs_free_addrlist(nep) 2392514ede09SBruce Evans struct netexport *nep; 2393df8bae1dSRodney W. Grimes { 2394df8bae1dSRodney W. Grimes register int i; 2395df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2396df8bae1dSRodney W. Grimes 2397df8bae1dSRodney W. Grimes for (i = 0; i <= AF_MAX; i++) 2398bb56ec4aSPoul-Henning Kamp if ((rnh = nep->ne_rtable[i])) { 2399df8bae1dSRodney W. Grimes (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2400df8bae1dSRodney W. Grimes (caddr_t) rnh); 2401df8bae1dSRodney W. Grimes free((caddr_t) rnh, M_RTABLE); 2402df8bae1dSRodney W. Grimes nep->ne_rtable[i] = 0; 2403df8bae1dSRodney W. Grimes } 2404df8bae1dSRodney W. Grimes } 2405df8bae1dSRodney W. Grimes 240621a90397SAlfred Perlstein /* 240721a90397SAlfred Perlstein * High level function to manipulate export options on a mount point 240821a90397SAlfred Perlstein * and the passed in netexport. 240921a90397SAlfred Perlstein * Struct export_args *argp is the variable used to twiddle options, 241021a90397SAlfred Perlstein * the structure is described in sys/mount.h 241121a90397SAlfred Perlstein */ 2412df8bae1dSRodney W. Grimes int 2413df8bae1dSRodney W. Grimes vfs_export(mp, nep, argp) 2414df8bae1dSRodney W. Grimes struct mount *mp; 2415df8bae1dSRodney W. Grimes struct netexport *nep; 2416df8bae1dSRodney W. Grimes struct export_args *argp; 2417df8bae1dSRodney W. Grimes { 2418df8bae1dSRodney W. Grimes int error; 2419df8bae1dSRodney W. Grimes 2420df8bae1dSRodney W. Grimes if (argp->ex_flags & MNT_DELEXPORT) { 2421f6b4c285SDoug Rabson if (mp->mnt_flag & MNT_EXPUBLIC) { 2422f6b4c285SDoug Rabson vfs_setpublicfs(NULL, NULL, NULL); 2423f6b4c285SDoug Rabson mp->mnt_flag &= ~MNT_EXPUBLIC; 2424f6b4c285SDoug Rabson } 2425df8bae1dSRodney W. Grimes vfs_free_addrlist(nep); 2426df8bae1dSRodney W. Grimes mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2427df8bae1dSRodney W. Grimes } 2428df8bae1dSRodney W. Grimes if (argp->ex_flags & MNT_EXPORTED) { 2429f6b4c285SDoug Rabson if (argp->ex_flags & MNT_EXPUBLIC) { 2430f6b4c285SDoug Rabson if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2431f6b4c285SDoug Rabson return (error); 2432f6b4c285SDoug Rabson mp->mnt_flag |= MNT_EXPUBLIC; 2433f6b4c285SDoug Rabson } 2434bb56ec4aSPoul-Henning Kamp if ((error = vfs_hang_addrlist(mp, nep, argp))) 2435df8bae1dSRodney W. Grimes return (error); 2436df8bae1dSRodney W. Grimes mp->mnt_flag |= MNT_EXPORTED; 2437df8bae1dSRodney W. Grimes } 2438df8bae1dSRodney W. Grimes return (0); 2439df8bae1dSRodney W. Grimes } 2440df8bae1dSRodney W. Grimes 2441f6b4c285SDoug Rabson /* 2442f6b4c285SDoug Rabson * Set the publicly exported filesystem (WebNFS). Currently, only 2443f6b4c285SDoug Rabson * one public filesystem is possible in the spec (RFC 2054 and 2055) 2444f6b4c285SDoug Rabson */ 2445f6b4c285SDoug Rabson int 2446f6b4c285SDoug Rabson vfs_setpublicfs(mp, nep, argp) 2447f6b4c285SDoug Rabson struct mount *mp; 2448f6b4c285SDoug Rabson struct netexport *nep; 2449f6b4c285SDoug Rabson struct export_args *argp; 2450f6b4c285SDoug Rabson { 2451f6b4c285SDoug Rabson int error; 2452f6b4c285SDoug Rabson struct vnode *rvp; 2453f6b4c285SDoug Rabson char *cp; 2454f6b4c285SDoug Rabson 2455f6b4c285SDoug Rabson /* 2456f6b4c285SDoug Rabson * mp == NULL -> invalidate the current info, the FS is 2457f6b4c285SDoug Rabson * no longer exported. May be called from either vfs_export 2458f6b4c285SDoug Rabson * or unmount, so check if it hasn't already been done. 2459f6b4c285SDoug Rabson */ 2460f6b4c285SDoug Rabson if (mp == NULL) { 2461f6b4c285SDoug Rabson if (nfs_pub.np_valid) { 2462f6b4c285SDoug Rabson nfs_pub.np_valid = 0; 2463f6b4c285SDoug Rabson if (nfs_pub.np_index != NULL) { 2464f6b4c285SDoug Rabson FREE(nfs_pub.np_index, M_TEMP); 2465f6b4c285SDoug Rabson nfs_pub.np_index = NULL; 2466f6b4c285SDoug Rabson } 2467f6b4c285SDoug Rabson } 2468f6b4c285SDoug Rabson return (0); 2469f6b4c285SDoug Rabson } 2470f6b4c285SDoug Rabson 2471f6b4c285SDoug Rabson /* 2472f6b4c285SDoug Rabson * Only one allowed at a time. 2473f6b4c285SDoug Rabson */ 2474f6b4c285SDoug Rabson if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2475f6b4c285SDoug Rabson return (EBUSY); 2476f6b4c285SDoug Rabson 2477f6b4c285SDoug Rabson /* 2478f6b4c285SDoug Rabson * Get real filehandle for root of exported FS. 2479f6b4c285SDoug Rabson */ 2480f6b4c285SDoug Rabson bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2481f6b4c285SDoug Rabson nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2482f6b4c285SDoug Rabson 2483f6b4c285SDoug Rabson if ((error = VFS_ROOT(mp, &rvp))) 2484f6b4c285SDoug Rabson return (error); 2485f6b4c285SDoug Rabson 2486f6b4c285SDoug Rabson if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2487f6b4c285SDoug Rabson return (error); 2488f6b4c285SDoug Rabson 2489f6b4c285SDoug Rabson vput(rvp); 2490f6b4c285SDoug Rabson 2491f6b4c285SDoug Rabson /* 2492f6b4c285SDoug Rabson * If an indexfile was specified, pull it in. 2493f6b4c285SDoug Rabson */ 2494f6b4c285SDoug Rabson if (argp->ex_indexfile != NULL) { 2495f6b4c285SDoug Rabson MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2496f6b4c285SDoug Rabson M_WAITOK); 2497f6b4c285SDoug Rabson error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2498f6b4c285SDoug Rabson MAXNAMLEN, (size_t *)0); 2499f6b4c285SDoug Rabson if (!error) { 2500f6b4c285SDoug Rabson /* 2501f6b4c285SDoug Rabson * Check for illegal filenames. 2502f6b4c285SDoug Rabson */ 2503f6b4c285SDoug Rabson for (cp = nfs_pub.np_index; *cp; cp++) { 2504f6b4c285SDoug Rabson if (*cp == '/') { 2505f6b4c285SDoug Rabson error = EINVAL; 2506f6b4c285SDoug Rabson break; 2507f6b4c285SDoug Rabson } 2508f6b4c285SDoug Rabson } 2509f6b4c285SDoug Rabson } 2510f6b4c285SDoug Rabson if (error) { 2511f6b4c285SDoug Rabson FREE(nfs_pub.np_index, M_TEMP); 2512f6b4c285SDoug Rabson return (error); 2513f6b4c285SDoug Rabson } 2514f6b4c285SDoug Rabson } 2515f6b4c285SDoug Rabson 2516f6b4c285SDoug Rabson nfs_pub.np_mount = mp; 2517f6b4c285SDoug Rabson nfs_pub.np_valid = 1; 2518f6b4c285SDoug Rabson return (0); 2519f6b4c285SDoug Rabson } 2520f6b4c285SDoug Rabson 252121a90397SAlfred Perlstein /* 252221a90397SAlfred Perlstein * Used by the filesystems to determine if a given network address 252321a90397SAlfred Perlstein * (passed in 'nam') is present in thier exports list, returns a pointer 252421a90397SAlfred Perlstein * to struct netcred so that the filesystem can examine it for 252521a90397SAlfred Perlstein * access rights (read/write/etc). 252621a90397SAlfred Perlstein */ 2527df8bae1dSRodney W. Grimes struct netcred * 2528df8bae1dSRodney W. Grimes vfs_export_lookup(mp, nep, nam) 2529df8bae1dSRodney W. Grimes register struct mount *mp; 2530df8bae1dSRodney W. Grimes struct netexport *nep; 253157bf258eSGarrett Wollman struct sockaddr *nam; 2532df8bae1dSRodney W. Grimes { 2533df8bae1dSRodney W. Grimes register struct netcred *np; 2534df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2535df8bae1dSRodney W. Grimes struct sockaddr *saddr; 2536df8bae1dSRodney W. Grimes 2537df8bae1dSRodney W. Grimes np = NULL; 2538df8bae1dSRodney W. Grimes if (mp->mnt_flag & MNT_EXPORTED) { 2539df8bae1dSRodney W. Grimes /* 2540df8bae1dSRodney W. Grimes * Lookup in the export list first. 2541df8bae1dSRodney W. Grimes */ 2542df8bae1dSRodney W. Grimes if (nam != NULL) { 254357bf258eSGarrett Wollman saddr = nam; 2544df8bae1dSRodney W. Grimes rnh = nep->ne_rtable[saddr->sa_family]; 2545df8bae1dSRodney W. Grimes if (rnh != NULL) { 2546df8bae1dSRodney W. Grimes np = (struct netcred *) 2547df8bae1dSRodney W. Grimes (*rnh->rnh_matchaddr)((caddr_t)saddr, 2548df8bae1dSRodney W. Grimes rnh); 2549df8bae1dSRodney W. Grimes if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2550df8bae1dSRodney W. Grimes np = NULL; 2551df8bae1dSRodney W. Grimes } 2552df8bae1dSRodney W. Grimes } 2553df8bae1dSRodney W. Grimes /* 2554df8bae1dSRodney W. Grimes * If no address match, use the default if it exists. 2555df8bae1dSRodney W. Grimes */ 2556df8bae1dSRodney W. Grimes if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2557df8bae1dSRodney W. Grimes np = &nep->ne_defexported; 2558df8bae1dSRodney W. Grimes } 2559df8bae1dSRodney W. Grimes return (np); 2560df8bae1dSRodney W. Grimes } 256161f5d510SDavid Greenman 256261f5d510SDavid Greenman /* 256361f5d510SDavid Greenman * perform msync on all vnodes under a mount point 256461f5d510SDavid Greenman * the mount point must be locked. 256561f5d510SDavid Greenman */ 256661f5d510SDavid Greenman void 256761f5d510SDavid Greenman vfs_msync(struct mount *mp, int flags) { 2568a316d390SJohn Dyson struct vnode *vp, *nvp; 256937b8ccd3SPeter Wemm struct vm_object *obj; 257095e5e988SJohn Dyson int anyio, tries; 257195e5e988SJohn Dyson 257295e5e988SJohn Dyson tries = 5; 257361f5d510SDavid Greenman loop: 257495e5e988SJohn Dyson anyio = 0; 25751b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { 257661f5d510SDavid Greenman 25771b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 257895e5e988SJohn Dyson 257995e5e988SJohn Dyson if (vp->v_mount != mp) { 258095e5e988SJohn Dyson goto loop; 258195e5e988SJohn Dyson } 258295e5e988SJohn Dyson 258337b8ccd3SPeter Wemm if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 258437b8ccd3SPeter Wemm continue; 258537b8ccd3SPeter Wemm 258637b8ccd3SPeter Wemm if (flags != MNT_WAIT) { 25879ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) != 0 || 25889ff5ce6bSBoris Popov (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 258937b8ccd3SPeter Wemm continue; 25906bdfe06aSEivind Eklund if (VOP_ISLOCKED(vp, NULL)) 259161f5d510SDavid Greenman continue; 259295e5e988SJohn Dyson } 259395e5e988SJohn Dyson 2594a18b1f1dSJason Evans mtx_enter(&vp->v_interlock, MTX_DEF); 25959ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) == 0 && 25969ff5ce6bSBoris Popov (obj->flags & OBJ_MIGHTBEDIRTY)) { 259795e5e988SJohn Dyson if (!vget(vp, 259895e5e988SJohn Dyson LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 25999ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) == 0) { 26009ff5ce6bSBoris Popov vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 260195e5e988SJohn Dyson anyio = 1; 260295e5e988SJohn Dyson } 260395e5e988SJohn Dyson vput(vp); 260495e5e988SJohn Dyson } 260595e5e988SJohn Dyson } else { 2606a18b1f1dSJason Evans mtx_exit(&vp->v_interlock, MTX_DEF); 260761f5d510SDavid Greenman } 260861f5d510SDavid Greenman } 260995e5e988SJohn Dyson if (anyio && (--tries > 0)) 261095e5e988SJohn Dyson goto loop; 261161f5d510SDavid Greenman } 26126476c0d2SJohn Dyson 26136476c0d2SJohn Dyson /* 26146476c0d2SJohn Dyson * Create the VM object needed for VMIO and mmap support. This 26156476c0d2SJohn Dyson * is done for all VREG files in the system. Some filesystems might 26166476c0d2SJohn Dyson * afford the additional metadata buffering capability of the 26176476c0d2SJohn Dyson * VMIO code by making the device node be VMIO mode also. 261895e5e988SJohn Dyson * 2619fb116777SEivind Eklund * vp must be locked when vfs_object_create is called. 26206476c0d2SJohn Dyson */ 26216476c0d2SJohn Dyson int 2622fb116777SEivind Eklund vfs_object_create(vp, p, cred) 26236476c0d2SJohn Dyson struct vnode *vp; 26246476c0d2SJohn Dyson struct proc *p; 26256476c0d2SJohn Dyson struct ucred *cred; 26266476c0d2SJohn Dyson { 26279ff5ce6bSBoris Popov return (VOP_CREATEVOBJECT(vp, cred, p)); 26286476c0d2SJohn Dyson } 2629b15a966eSPoul-Henning Kamp 2630453aaa0dSEivind Eklund /* 2631453aaa0dSEivind Eklund * Mark a vnode as free, putting it up for recycling. 2632453aaa0dSEivind Eklund */ 2633c904bbbdSKirk McKusick void 2634a051452aSPoul-Henning Kamp vfree(vp) 2635b15a966eSPoul-Henning Kamp struct vnode *vp; 2636b15a966eSPoul-Henning Kamp { 2637925a3a41SJohn Dyson int s; 2638925a3a41SJohn Dyson 2639925a3a41SJohn Dyson s = splbio(); 2640a051452aSPoul-Henning Kamp simple_lock(&vnode_free_list_slock); 2641c904bbbdSKirk McKusick KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); 2642a051452aSPoul-Henning Kamp if (vp->v_flag & VAGE) { 2643a051452aSPoul-Henning Kamp TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2644a051452aSPoul-Henning Kamp } else { 2645b15a966eSPoul-Henning Kamp TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 26468670684aSPoul-Henning Kamp } 2647a051452aSPoul-Henning Kamp freevnodes++; 2648b15a966eSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 2649a051452aSPoul-Henning Kamp vp->v_flag &= ~VAGE; 2650a051452aSPoul-Henning Kamp vp->v_flag |= VFREE; 2651925a3a41SJohn Dyson splx(s); 2652b15a966eSPoul-Henning Kamp } 2653a051452aSPoul-Henning Kamp 2654453aaa0dSEivind Eklund /* 2655453aaa0dSEivind Eklund * Opposite of vfree() - mark a vnode as in use. 2656453aaa0dSEivind Eklund */ 265747221757SJohn Dyson void 2658a051452aSPoul-Henning Kamp vbusy(vp) 2659a051452aSPoul-Henning Kamp struct vnode *vp; 2660a051452aSPoul-Henning Kamp { 2661925a3a41SJohn Dyson int s; 2662925a3a41SJohn Dyson 2663925a3a41SJohn Dyson s = splbio(); 2664a051452aSPoul-Henning Kamp simple_lock(&vnode_free_list_slock); 2665c904bbbdSKirk McKusick KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); 2666a051452aSPoul-Henning Kamp TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2667a051452aSPoul-Henning Kamp freevnodes--; 2668a051452aSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 266964d3c7e3SJohn Dyson vp->v_flag &= ~(VFREE|VAGE); 2670925a3a41SJohn Dyson splx(s); 2671b15a966eSPoul-Henning Kamp } 26721cbbd625SGarrett Wollman 26731cbbd625SGarrett Wollman /* 26741cbbd625SGarrett Wollman * Record a process's interest in events which might happen to 26751cbbd625SGarrett Wollman * a vnode. Because poll uses the historic select-style interface 26761cbbd625SGarrett Wollman * internally, this routine serves as both the ``check for any 26771cbbd625SGarrett Wollman * pending events'' and the ``record my interest in future events'' 26781cbbd625SGarrett Wollman * functions. (These are done together, while the lock is held, 26791cbbd625SGarrett Wollman * to avoid race conditions.) 26801cbbd625SGarrett Wollman */ 26811cbbd625SGarrett Wollman int 26821cbbd625SGarrett Wollman vn_pollrecord(vp, p, events) 26831cbbd625SGarrett Wollman struct vnode *vp; 26841cbbd625SGarrett Wollman struct proc *p; 26851cbbd625SGarrett Wollman short events; 26861cbbd625SGarrett Wollman { 26871cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 26881cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_revents & events) { 26891cbbd625SGarrett Wollman /* 26901cbbd625SGarrett Wollman * This leaves events we are not interested 26911cbbd625SGarrett Wollman * in available for the other process which 26921cbbd625SGarrett Wollman * which presumably had requested them 26931cbbd625SGarrett Wollman * (otherwise they would never have been 26941cbbd625SGarrett Wollman * recorded). 26951cbbd625SGarrett Wollman */ 26961cbbd625SGarrett Wollman events &= vp->v_pollinfo.vpi_revents; 26971cbbd625SGarrett Wollman vp->v_pollinfo.vpi_revents &= ~events; 26981cbbd625SGarrett Wollman 26991cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 27001cbbd625SGarrett Wollman return events; 27011cbbd625SGarrett Wollman } 27021cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events |= events; 27031cbbd625SGarrett Wollman selrecord(p, &vp->v_pollinfo.vpi_selinfo); 27041cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 27051cbbd625SGarrett Wollman return 0; 27061cbbd625SGarrett Wollman } 27071cbbd625SGarrett Wollman 27081cbbd625SGarrett Wollman /* 27091cbbd625SGarrett Wollman * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 27101cbbd625SGarrett Wollman * it is possible for us to miss an event due to race conditions, but 27111cbbd625SGarrett Wollman * that condition is expected to be rare, so for the moment it is the 27121cbbd625SGarrett Wollman * preferred interface. 27131cbbd625SGarrett Wollman */ 27141cbbd625SGarrett Wollman void 27151cbbd625SGarrett Wollman vn_pollevent(vp, events) 27161cbbd625SGarrett Wollman struct vnode *vp; 27171cbbd625SGarrett Wollman short events; 27181cbbd625SGarrett Wollman { 27191cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 27201cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_events & events) { 27211cbbd625SGarrett Wollman /* 27221cbbd625SGarrett Wollman * We clear vpi_events so that we don't 27231cbbd625SGarrett Wollman * call selwakeup() twice if two events are 27241cbbd625SGarrett Wollman * posted before the polling process(es) is 27251cbbd625SGarrett Wollman * awakened. This also ensures that we take at 27261cbbd625SGarrett Wollman * most one selwakeup() if the polling process 27271cbbd625SGarrett Wollman * is no longer interested. However, it does 27281cbbd625SGarrett Wollman * mean that only one event can be noticed at 27291cbbd625SGarrett Wollman * a time. (Perhaps we should only clear those 27301cbbd625SGarrett Wollman * event bits which we note?) XXX 27311cbbd625SGarrett Wollman */ 27321cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 27331cbbd625SGarrett Wollman vp->v_pollinfo.vpi_revents |= events; 27341cbbd625SGarrett Wollman selwakeup(&vp->v_pollinfo.vpi_selinfo); 27351cbbd625SGarrett Wollman } 27361cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 27371cbbd625SGarrett Wollman } 27381cbbd625SGarrett Wollman 27391cbbd625SGarrett Wollman /* 27401cbbd625SGarrett Wollman * Wake up anyone polling on vp because it is being revoked. 27411cbbd625SGarrett Wollman * This depends on dead_poll() returning POLLHUP for correct 27421cbbd625SGarrett Wollman * behavior. 27431cbbd625SGarrett Wollman */ 27441cbbd625SGarrett Wollman void 27451cbbd625SGarrett Wollman vn_pollgone(vp) 27461cbbd625SGarrett Wollman struct vnode *vp; 27471cbbd625SGarrett Wollman { 27481cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 27491cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_events) { 27501cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events = 0; 27511cbbd625SGarrett Wollman selwakeup(&vp->v_pollinfo.vpi_selinfo); 27521cbbd625SGarrett Wollman } 27531cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 27541cbbd625SGarrett Wollman } 2755b1897c19SJulian Elischer 2756b1897c19SJulian Elischer 2757b1897c19SJulian Elischer 2758b1897c19SJulian Elischer /* 2759b1897c19SJulian Elischer * Routine to create and manage a filesystem syncer vnode. 2760b1897c19SJulian Elischer */ 2761b1897c19SJulian Elischer #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 27620df45b5aSEivind Eklund static int sync_fsync __P((struct vop_fsync_args *)); 27630df45b5aSEivind Eklund static int sync_inactive __P((struct vop_inactive_args *)); 27640df45b5aSEivind Eklund static int sync_reclaim __P((struct vop_reclaim_args *)); 2765b1897c19SJulian Elischer #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2766b1897c19SJulian Elischer #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 27670df45b5aSEivind Eklund static int sync_print __P((struct vop_print_args *)); 2768b1897c19SJulian Elischer #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2769b1897c19SJulian Elischer 2770db878ba4SEivind Eklund static vop_t **sync_vnodeop_p; 2771db878ba4SEivind Eklund static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2772b1897c19SJulian Elischer { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2773b1897c19SJulian Elischer { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2774b1897c19SJulian Elischer { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2775b1897c19SJulian Elischer { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2776b1897c19SJulian Elischer { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2777b1897c19SJulian Elischer { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2778b1897c19SJulian Elischer { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2779b1897c19SJulian Elischer { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2780b1897c19SJulian Elischer { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2781b1897c19SJulian Elischer { NULL, NULL } 2782b1897c19SJulian Elischer }; 2783db878ba4SEivind Eklund static struct vnodeopv_desc sync_vnodeop_opv_desc = 2784b1897c19SJulian Elischer { &sync_vnodeop_p, sync_vnodeop_entries }; 2785b1897c19SJulian Elischer 2786b1897c19SJulian Elischer VNODEOP_SET(sync_vnodeop_opv_desc); 2787b1897c19SJulian Elischer 2788b1897c19SJulian Elischer /* 2789b1897c19SJulian Elischer * Create a new filesystem syncer vnode for the specified mount point. 2790b1897c19SJulian Elischer */ 2791b1897c19SJulian Elischer int 2792b1897c19SJulian Elischer vfs_allocate_syncvnode(mp) 2793b1897c19SJulian Elischer struct mount *mp; 2794b1897c19SJulian Elischer { 2795b1897c19SJulian Elischer struct vnode *vp; 2796b1897c19SJulian Elischer static long start, incr, next; 2797b1897c19SJulian Elischer int error; 2798b1897c19SJulian Elischer 2799b1897c19SJulian Elischer /* Allocate a new vnode */ 2800b1897c19SJulian Elischer if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2801b1897c19SJulian Elischer mp->mnt_syncer = NULL; 2802b1897c19SJulian Elischer return (error); 2803b1897c19SJulian Elischer } 2804b1897c19SJulian Elischer vp->v_type = VNON; 2805b1897c19SJulian Elischer /* 2806b1897c19SJulian Elischer * Place the vnode onto the syncer worklist. We attempt to 2807b1897c19SJulian Elischer * scatter them about on the list so that they will go off 2808b1897c19SJulian Elischer * at evenly distributed times even if all the filesystems 2809b1897c19SJulian Elischer * are mounted at once. 2810b1897c19SJulian Elischer */ 2811b1897c19SJulian Elischer next += incr; 2812b1897c19SJulian Elischer if (next == 0 || next > syncer_maxdelay) { 2813b1897c19SJulian Elischer start /= 2; 2814b1897c19SJulian Elischer incr /= 2; 2815b1897c19SJulian Elischer if (start == 0) { 2816b1897c19SJulian Elischer start = syncer_maxdelay / 2; 2817b1897c19SJulian Elischer incr = syncer_maxdelay; 2818b1897c19SJulian Elischer } 2819b1897c19SJulian Elischer next = start; 2820b1897c19SJulian Elischer } 2821b1897c19SJulian Elischer vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2822b1897c19SJulian Elischer mp->mnt_syncer = vp; 2823b1897c19SJulian Elischer return (0); 2824b1897c19SJulian Elischer } 2825b1897c19SJulian Elischer 2826b1897c19SJulian Elischer /* 2827b1897c19SJulian Elischer * Do a lazy sync of the filesystem. 2828b1897c19SJulian Elischer */ 2829db878ba4SEivind Eklund static int 2830b1897c19SJulian Elischer sync_fsync(ap) 2831b1897c19SJulian Elischer struct vop_fsync_args /* { 2832b1897c19SJulian Elischer struct vnode *a_vp; 2833b1897c19SJulian Elischer struct ucred *a_cred; 2834b1897c19SJulian Elischer int a_waitfor; 2835b1897c19SJulian Elischer struct proc *a_p; 2836b1897c19SJulian Elischer } */ *ap; 2837b1897c19SJulian Elischer { 2838b1897c19SJulian Elischer struct vnode *syncvp = ap->a_vp; 2839b1897c19SJulian Elischer struct mount *mp = syncvp->v_mount; 2840b1897c19SJulian Elischer struct proc *p = ap->a_p; 2841b1897c19SJulian Elischer int asyncflag; 2842b1897c19SJulian Elischer 2843b1897c19SJulian Elischer /* 2844b1897c19SJulian Elischer * We only need to do something if this is a lazy evaluation. 2845b1897c19SJulian Elischer */ 2846b1897c19SJulian Elischer if (ap->a_waitfor != MNT_LAZY) 2847b1897c19SJulian Elischer return (0); 2848b1897c19SJulian Elischer 2849b1897c19SJulian Elischer /* 2850b1897c19SJulian Elischer * Move ourselves to the back of the sync list. 2851b1897c19SJulian Elischer */ 2852b1897c19SJulian Elischer vn_syncer_add_to_worklist(syncvp, syncdelay); 2853b1897c19SJulian Elischer 2854b1897c19SJulian Elischer /* 2855b1897c19SJulian Elischer * Walk the list of vnodes pushing all that are dirty and 2856b1897c19SJulian Elischer * not already on the sync list. 2857b1897c19SJulian Elischer */ 2858a18b1f1dSJason Evans mtx_enter(&mountlist_mtx, MTX_DEF); 2859a18b1f1dSJason Evans if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, p) != 0) { 2860a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 2861b1897c19SJulian Elischer return (0); 286271033a8cSTor Egge } 2863f2a2857bSKirk McKusick if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { 2864f2a2857bSKirk McKusick vfs_unbusy(mp, p); 2865a18b1f1dSJason Evans mtx_exit(&mountlist_mtx, MTX_DEF); 2866f2a2857bSKirk McKusick return (0); 2867f2a2857bSKirk McKusick } 2868b1897c19SJulian Elischer asyncflag = mp->mnt_flag & MNT_ASYNC; 2869b1897c19SJulian Elischer mp->mnt_flag &= ~MNT_ASYNC; 2870efdc5523SPeter Wemm vfs_msync(mp, MNT_NOWAIT); 2871b1897c19SJulian Elischer VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2872b1897c19SJulian Elischer if (asyncflag) 2873b1897c19SJulian Elischer mp->mnt_flag |= MNT_ASYNC; 2874f2a2857bSKirk McKusick vn_finished_write(mp); 2875b1897c19SJulian Elischer vfs_unbusy(mp, p); 2876b1897c19SJulian Elischer return (0); 2877b1897c19SJulian Elischer } 2878b1897c19SJulian Elischer 2879b1897c19SJulian Elischer /* 2880b1897c19SJulian Elischer * The syncer vnode is no referenced. 2881b1897c19SJulian Elischer */ 2882db878ba4SEivind Eklund static int 2883b1897c19SJulian Elischer sync_inactive(ap) 2884b1897c19SJulian Elischer struct vop_inactive_args /* { 2885b1897c19SJulian Elischer struct vnode *a_vp; 2886b1897c19SJulian Elischer struct proc *a_p; 2887b1897c19SJulian Elischer } */ *ap; 2888b1897c19SJulian Elischer { 2889b1897c19SJulian Elischer 2890b1897c19SJulian Elischer vgone(ap->a_vp); 2891b1897c19SJulian Elischer return (0); 2892b1897c19SJulian Elischer } 2893b1897c19SJulian Elischer 2894b1897c19SJulian Elischer /* 2895b1897c19SJulian Elischer * The syncer vnode is no longer needed and is being decommissioned. 289642e26d47SMatthew Dillon * 289742e26d47SMatthew Dillon * Modifications to the worklist must be protected at splbio(). 2898b1897c19SJulian Elischer */ 2899db878ba4SEivind Eklund static int 2900b1897c19SJulian Elischer sync_reclaim(ap) 2901b1897c19SJulian Elischer struct vop_reclaim_args /* { 2902b1897c19SJulian Elischer struct vnode *a_vp; 2903b1897c19SJulian Elischer } */ *ap; 2904b1897c19SJulian Elischer { 2905b1897c19SJulian Elischer struct vnode *vp = ap->a_vp; 290642e26d47SMatthew Dillon int s; 2907b1897c19SJulian Elischer 290842e26d47SMatthew Dillon s = splbio(); 2909b1897c19SJulian Elischer vp->v_mount->mnt_syncer = NULL; 2910b1897c19SJulian Elischer if (vp->v_flag & VONWORKLST) { 2911b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 2912b1897c19SJulian Elischer vp->v_flag &= ~VONWORKLST; 2913b1897c19SJulian Elischer } 291442e26d47SMatthew Dillon splx(s); 2915b1897c19SJulian Elischer 2916b1897c19SJulian Elischer return (0); 2917b1897c19SJulian Elischer } 2918b1897c19SJulian Elischer 2919b1897c19SJulian Elischer /* 2920b1897c19SJulian Elischer * Print out a syncer vnode. 2921b1897c19SJulian Elischer */ 2922db878ba4SEivind Eklund static int 2923b1897c19SJulian Elischer sync_print(ap) 2924b1897c19SJulian Elischer struct vop_print_args /* { 2925b1897c19SJulian Elischer struct vnode *a_vp; 2926b1897c19SJulian Elischer } */ *ap; 2927b1897c19SJulian Elischer { 2928b1897c19SJulian Elischer struct vnode *vp = ap->a_vp; 2929b1897c19SJulian Elischer 2930b1897c19SJulian Elischer printf("syncer vnode"); 2931b1897c19SJulian Elischer if (vp->v_vnlock != NULL) 2932b1897c19SJulian Elischer lockmgr_printinfo(vp->v_vnlock); 2933b1897c19SJulian Elischer printf("\n"); 2934b1897c19SJulian Elischer return (0); 2935b1897c19SJulian Elischer } 29366ca54864SPoul-Henning Kamp 29376ca54864SPoul-Henning Kamp /* 29386ca54864SPoul-Henning Kamp * extract the dev_t from a VBLK or VCHR 29396ca54864SPoul-Henning Kamp */ 29406ca54864SPoul-Henning Kamp dev_t 29416ca54864SPoul-Henning Kamp vn_todev(vp) 29426ca54864SPoul-Henning Kamp struct vnode *vp; 29436ca54864SPoul-Henning Kamp { 29446ca54864SPoul-Henning Kamp if (vp->v_type != VBLK && vp->v_type != VCHR) 29456ca54864SPoul-Henning Kamp return (NODEV); 29466ca54864SPoul-Henning Kamp return (vp->v_rdev); 29476ca54864SPoul-Henning Kamp } 294841d2e3e0SPoul-Henning Kamp 294941d2e3e0SPoul-Henning Kamp /* 295041d2e3e0SPoul-Henning Kamp * Check if vnode represents a disk device 295141d2e3e0SPoul-Henning Kamp */ 295241d2e3e0SPoul-Henning Kamp int 2953ba4ad1fcSPoul-Henning Kamp vn_isdisk(vp, errp) 295441d2e3e0SPoul-Henning Kamp struct vnode *vp; 2955ba4ad1fcSPoul-Henning Kamp int *errp; 295641d2e3e0SPoul-Henning Kamp { 295764dc16dfSPoul-Henning Kamp struct cdevsw *cdevsw; 295864dc16dfSPoul-Henning Kamp 2959ba4ad1fcSPoul-Henning Kamp if (vp->v_type != VBLK && vp->v_type != VCHR) { 2960ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2961ba4ad1fcSPoul-Henning Kamp *errp = ENOTBLK; 296241d2e3e0SPoul-Henning Kamp return (0); 2963ba4ad1fcSPoul-Henning Kamp } 2964b081a64aSChris Costello if (vp->v_rdev == NULL) { 2965b081a64aSChris Costello if (errp != NULL) 2966b081a64aSChris Costello *errp = ENXIO; 2967b081a64aSChris Costello return (0); 2968b081a64aSChris Costello } 296964dc16dfSPoul-Henning Kamp cdevsw = devsw(vp->v_rdev); 297064dc16dfSPoul-Henning Kamp if (cdevsw == NULL) { 2971ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2972ba4ad1fcSPoul-Henning Kamp *errp = ENXIO; 297341d2e3e0SPoul-Henning Kamp return (0); 2974ba4ad1fcSPoul-Henning Kamp } 297564dc16dfSPoul-Henning Kamp if (!(cdevsw->d_flags & D_DISK)) { 2976ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2977ba4ad1fcSPoul-Henning Kamp *errp = ENOTBLK; 297841d2e3e0SPoul-Henning Kamp return (0); 2979ba4ad1fcSPoul-Henning Kamp } 2980ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2981ba4ad1fcSPoul-Henning Kamp *errp = 0; 298241d2e3e0SPoul-Henning Kamp return (1); 298341d2e3e0SPoul-Henning Kamp } 298441d2e3e0SPoul-Henning Kamp 2985453aaa0dSEivind Eklund /* 2986a863c0fbSEivind Eklund * Free data allocated by namei(); see namei(9) for details. 2987453aaa0dSEivind Eklund */ 2988e12d97d2SEivind Eklund void 2989e12d97d2SEivind Eklund NDFREE(ndp, flags) 2990e12d97d2SEivind Eklund struct nameidata *ndp; 2991e12d97d2SEivind Eklund const uint flags; 2992e12d97d2SEivind Eklund { 2993e12d97d2SEivind Eklund if (!(flags & NDF_NO_FREE_PNBUF) && 2994e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & HASBUF)) { 2995e12d97d2SEivind Eklund zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 2996e12d97d2SEivind Eklund ndp->ni_cnd.cn_flags &= ~HASBUF; 2997e12d97d2SEivind Eklund } 2998e12d97d2SEivind Eklund if (!(flags & NDF_NO_DVP_UNLOCK) && 2999e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & LOCKPARENT) && 3000e12d97d2SEivind Eklund ndp->ni_dvp != ndp->ni_vp) 3001e12d97d2SEivind Eklund VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); 3002e12d97d2SEivind Eklund if (!(flags & NDF_NO_DVP_RELE) && 3003e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 3004e12d97d2SEivind Eklund vrele(ndp->ni_dvp); 3005e12d97d2SEivind Eklund ndp->ni_dvp = NULL; 3006e12d97d2SEivind Eklund } 3007e12d97d2SEivind Eklund if (!(flags & NDF_NO_VP_UNLOCK) && 3008e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 3009e12d97d2SEivind Eklund VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); 3010e12d97d2SEivind Eklund if (!(flags & NDF_NO_VP_RELE) && 3011e12d97d2SEivind Eklund ndp->ni_vp) { 3012e12d97d2SEivind Eklund vrele(ndp->ni_vp); 3013e12d97d2SEivind Eklund ndp->ni_vp = NULL; 3014e12d97d2SEivind Eklund } 3015e12d97d2SEivind Eklund if (!(flags & NDF_NO_STARTDIR_RELE) && 3016e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & SAVESTART)) { 3017e12d97d2SEivind Eklund vrele(ndp->ni_startdir); 3018e12d97d2SEivind Eklund ndp->ni_startdir = NULL; 3019e12d97d2SEivind Eklund } 3020e12d97d2SEivind Eklund } 3021e39c53edSPoul-Henning Kamp 3022e0848358SRobert Watson /* 3023e0848358SRobert Watson * Common file system object access control check routine. Accepts a 3024e0848358SRobert Watson * vnode's type, "mode", uid and gid, requested access mode, credentials, 3025e0848358SRobert Watson * and optional call-by-reference privused argument allowing vaccess() 3026e0848358SRobert Watson * to indicate to the caller whether privilege was used to satisfy the 3027e0848358SRobert Watson * request. Returns 0 on success, or an errno on failure. 3028e0848358SRobert Watson */ 3029e39c53edSPoul-Henning Kamp int 3030012c643dSRobert Watson vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused) 3031e39c53edSPoul-Henning Kamp enum vtype type; 3032e39c53edSPoul-Henning Kamp mode_t file_mode; 3033012c643dSRobert Watson uid_t file_uid; 3034012c643dSRobert Watson gid_t file_gid; 3035e39c53edSPoul-Henning Kamp mode_t acc_mode; 3036e39c53edSPoul-Henning Kamp struct ucred *cred; 3037012c643dSRobert Watson int *privused; 3038e39c53edSPoul-Henning Kamp { 3039012c643dSRobert Watson mode_t dac_granted; 3040012c643dSRobert Watson #ifdef CAPABILITIES 3041012c643dSRobert Watson mode_t cap_granted; 3042012c643dSRobert Watson #endif 3043e39c53edSPoul-Henning Kamp 3044e39c53edSPoul-Henning Kamp /* 3045012c643dSRobert Watson * Look for a normal, non-privileged way to access the file/directory 3046012c643dSRobert Watson * as requested. If it exists, go with that. 3047e39c53edSPoul-Henning Kamp */ 3048012c643dSRobert Watson 3049012c643dSRobert Watson if (privused != NULL) 3050012c643dSRobert Watson *privused = 0; 3051012c643dSRobert Watson 3052012c643dSRobert Watson dac_granted = 0; 3053012c643dSRobert Watson 3054012c643dSRobert Watson /* Check the owner. */ 3055012c643dSRobert Watson if (cred->cr_uid == file_uid) { 3056012c643dSRobert Watson if (file_mode & S_IXUSR) 3057012c643dSRobert Watson dac_granted |= VEXEC; 3058012c643dSRobert Watson if (file_mode & S_IRUSR) 3059012c643dSRobert Watson dac_granted |= VREAD; 3060012c643dSRobert Watson if (file_mode & S_IWUSR) 3061012c643dSRobert Watson dac_granted |= VWRITE; 3062012c643dSRobert Watson 3063012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 3064e39c53edSPoul-Henning Kamp return (0); 3065e39c53edSPoul-Henning Kamp 3066012c643dSRobert Watson goto privcheck; 3067e39c53edSPoul-Henning Kamp } 3068e39c53edSPoul-Henning Kamp 3069012c643dSRobert Watson /* Otherwise, check the groups (first match) */ 3070012c643dSRobert Watson if (groupmember(file_gid, cred)) { 3071012c643dSRobert Watson if (file_mode & S_IXGRP) 3072012c643dSRobert Watson dac_granted |= VEXEC; 3073012c643dSRobert Watson if (file_mode & S_IRGRP) 3074012c643dSRobert Watson dac_granted |= VREAD; 3075012c643dSRobert Watson if (file_mode & S_IWGRP) 3076012c643dSRobert Watson dac_granted |= VWRITE; 3077012c643dSRobert Watson 3078012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 3079012c643dSRobert Watson return (0); 3080012c643dSRobert Watson 3081012c643dSRobert Watson goto privcheck; 3082e39c53edSPoul-Henning Kamp } 3083e39c53edSPoul-Henning Kamp 3084e39c53edSPoul-Henning Kamp /* Otherwise, check everyone else. */ 3085012c643dSRobert Watson if (file_mode & S_IXOTH) 3086012c643dSRobert Watson dac_granted |= VEXEC; 3087012c643dSRobert Watson if (file_mode & S_IROTH) 3088012c643dSRobert Watson dac_granted |= VREAD; 3089012c643dSRobert Watson if (file_mode & S_IWOTH) 3090012c643dSRobert Watson dac_granted |= VWRITE; 3091012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 3092012c643dSRobert Watson return (0); 3093012c643dSRobert Watson 3094012c643dSRobert Watson privcheck: 3095012c643dSRobert Watson if (!suser_xxx(cred, NULL, PRISON_ROOT)) { 3096012c643dSRobert Watson /* XXX audit: privilege used */ 3097012c643dSRobert Watson if (privused != NULL) 3098012c643dSRobert Watson *privused = 1; 3099012c643dSRobert Watson return (0); 3100012c643dSRobert Watson } 3101012c643dSRobert Watson 3102012c643dSRobert Watson #ifdef CAPABILITIES 3103012c643dSRobert Watson /* 3104012c643dSRobert Watson * Build a capability mask to determine if the set of capabilities 3105012c643dSRobert Watson * satisfies the requirements when combined with the granted mask 3106012c643dSRobert Watson * from above. 3107012c643dSRobert Watson * For each capability, if the capability is required, bitwise 3108012c643dSRobert Watson * or the request type onto the cap_granted mask. 3109012c643dSRobert Watson */ 3110012c643dSRobert Watson cap_granted = 0; 3111012c643dSRobert Watson if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && 3112728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT)) 3113012c643dSRobert Watson cap_granted |= VEXEC; 3114012c643dSRobert Watson 3115012c643dSRobert Watson if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) && 3116728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT)) 3117012c643dSRobert Watson cap_granted |= VREAD; 3118012c643dSRobert Watson 3119012c643dSRobert Watson if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) && 3120728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT)) 3121012c643dSRobert Watson cap_granted |= VWRITE; 3122012c643dSRobert Watson 3123728783c2SRobert Watson if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) { 3124012c643dSRobert Watson /* XXX audit: privilege used */ 3125012c643dSRobert Watson if (privused != NULL) 3126012c643dSRobert Watson *privused = 1; 3127012c643dSRobert Watson return (0); 3128012c643dSRobert Watson } 3129012c643dSRobert Watson #endif 3130012c643dSRobert Watson 3131012c643dSRobert Watson return (EACCES); 3132e39c53edSPoul-Henning Kamp } 3133