1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1989, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38996c772fSJohn Dyson * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39c3aac50fSPeter Wemm * $FreeBSD$ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 42df8bae1dSRodney W. Grimes /* 43df8bae1dSRodney W. Grimes * External virtual filesystem routines 44df8bae1dSRodney W. Grimes */ 450e41ee30SGarrett Wollman #include "opt_ddb.h" 463275cf73SPoul-Henning Kamp #include "opt_ffs.h" 47df8bae1dSRodney W. Grimes 48df8bae1dSRodney W. Grimes #include <sys/param.h> 49df8bae1dSRodney W. Grimes #include <sys/systm.h> 509626b608SPoul-Henning Kamp #include <sys/bio.h> 515e950839SLuoqi Chen #include <sys/buf.h> 525e950839SLuoqi Chen #include <sys/conf.h> 535e950839SLuoqi Chen #include <sys/dirent.h> 545e950839SLuoqi Chen #include <sys/domain.h> 555e950839SLuoqi Chen #include <sys/eventhandler.h> 564d948813SBruce Evans #include <sys/fcntl.h> 57986f4ce7SBruce Evans #include <sys/kernel.h> 589c8b8baaSPeter Wemm #include <sys/kthread.h> 590384fff8SJason Evans #include <sys/ktr.h> 60a1c995b6SPoul-Henning Kamp #include <sys/malloc.h> 61df8bae1dSRodney W. Grimes #include <sys/mount.h> 62e12d97d2SEivind Eklund #include <sys/namei.h> 635e950839SLuoqi Chen #include <sys/proc.h> 645e950839SLuoqi Chen #include <sys/reboot.h> 65771b51efSBruce Evans #include <sys/socket.h> 66df8bae1dSRodney W. Grimes #include <sys/stat.h> 675e950839SLuoqi Chen #include <sys/sysctl.h> 682be70f79SJohn Dyson #include <sys/vmmeter.h> 695e950839SLuoqi Chen #include <sys/vnode.h> 70df8bae1dSRodney W. Grimes 71d3114049SBruce Evans #include <machine/limits.h> 720384fff8SJason Evans #include <machine/mutex.h> 73d3114049SBruce Evans 74df8bae1dSRodney W. Grimes #include <vm/vm.h> 75efeaf95aSDavid Greenman #include <vm/vm_object.h> 76efeaf95aSDavid Greenman #include <vm/vm_extern.h> 771efb74fbSJohn Dyson #include <vm/pmap.h> 781efb74fbSJohn Dyson #include <vm/vm_map.h> 791c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 8047221757SJohn Dyson #include <vm/vm_pager.h> 816476c0d2SJohn Dyson #include <vm/vnode_pager.h> 822d8acc0fSJohn Dyson #include <vm/vm_zone.h> 83df8bae1dSRodney W. Grimes 84a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 8555166637SPoul-Henning Kamp 86cb451ebdSBruce Evans static void insmntque __P((struct vnode *vp, struct mount *mp)); 87996c772fSJohn Dyson static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 88cb451ebdSBruce Evans static unsigned long numvnodes; 89b15a966eSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 9098d93822SBruce Evans 91df8bae1dSRodney W. Grimes enum vtype iftovt_tab[16] = { 92df8bae1dSRodney W. Grimes VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 93df8bae1dSRodney W. Grimes VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 94df8bae1dSRodney W. Grimes }; 95df8bae1dSRodney W. Grimes int vttoif_tab[9] = { 96df8bae1dSRodney W. Grimes 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 97df8bae1dSRodney W. Grimes S_IFSOCK, S_IFIFO, S_IFMT, 98df8bae1dSRodney W. Grimes }; 99df8bae1dSRodney W. Grimes 100e3975643SJake Burkholder static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 101925a3a41SJohn Dyson 10287b1940aSPoul-Henning Kamp static u_long wantfreevnodes = 25; 10300544193SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 104cba2a7c6SBruce Evans static u_long freevnodes = 0; 105a051452aSPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 106fbd6e6c9SPoul-Henning Kamp 107e929c00dSKirk McKusick static int reassignbufcalls; 108e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 109e929c00dSKirk McKusick static int reassignbufloops; 110e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 111e929c00dSKirk McKusick static int reassignbufsortgood; 112e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 113e929c00dSKirk McKusick static int reassignbufsortbad; 114e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 115e929c00dSKirk McKusick static int reassignbufmethod = 1; 116e929c00dSKirk McKusick SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 117e929c00dSKirk McKusick 118bef608bdSJohn Dyson #ifdef ENABLE_VFS_IOOPT 119ad8ac923SKirk McKusick int vfs_ioopt = 0; 12060f8d464SJohn Dyson SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 12126300b34SJohn Dyson #endif 12260f8d464SJohn Dyson 1230429e37aSPoul-Henning Kamp struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ 124996c772fSJohn Dyson struct simplelock mountlist_slock; 125996c772fSJohn Dyson struct simplelock mntvnode_slock; 126500b04a2SBruce Evans int nfs_mount_type = -1; 127289bdf33SBruce Evans #ifndef NULL_SIMPLELOCKS 128289bdf33SBruce Evans static struct simplelock mntid_slock; 129303b270bSEivind Eklund static struct simplelock vnode_free_list_slock; 130996c772fSJohn Dyson static struct simplelock spechash_slock; 131289bdf33SBruce Evans #endif 132f6b4c285SDoug Rabson struct nfs_public nfs_pub; /* publicly exported FS */ 1332d8acc0fSJohn Dyson static vm_zone_t vnode_zone; 134a8b1f9d2SPoul-Henning Kamp int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 135df8bae1dSRodney W. Grimes 136b1897c19SJulian Elischer /* 137b1897c19SJulian Elischer * The workitem queue. 138b1897c19SJulian Elischer */ 139b1897c19SJulian Elischer #define SYNCER_MAXDELAY 32 140db878ba4SEivind Eklund static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 141e4ab40bcSKirk McKusick time_t syncdelay = 30; /* max time to delay syncing data */ 142e4ab40bcSKirk McKusick time_t filedelay = 30; /* time to delay syncing files */ 143e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 14467812eacSKirk McKusick time_t dirdelay = 29; /* time to delay syncing directories */ 145e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 14667812eacSKirk McKusick time_t metadelay = 28; /* time to delay syncing metadata */ 147e4ab40bcSKirk McKusick SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 148e4ab40bcSKirk McKusick static int rushjob; /* number of slots to run ASAP */ 149e4ab40bcSKirk McKusick static int stat_rush_requests; /* number of times I/O speeded up */ 150e4ab40bcSKirk McKusick SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 151b1897c19SJulian Elischer 152b1897c19SJulian Elischer static int syncer_delayno = 0; 153b1897c19SJulian Elischer static long syncer_mask; 154e3975643SJake Burkholder LIST_HEAD(synclist, vnode); 155b1897c19SJulian Elischer static struct synclist *syncer_workitem_pending; 156b1897c19SJulian Elischer 1570d94caffSDavid Greenman int desiredvnodes; 1583d177f46SBill Fumerola SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 1593d177f46SBill Fumerola &desiredvnodes, 0, "Maximum number of vnodes"); 1600d94caffSDavid Greenman 16198d93822SBruce Evans static void vfs_free_addrlist __P((struct netexport *nep)); 16298d93822SBruce Evans static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 16398d93822SBruce Evans static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 16498d93822SBruce Evans struct export_args *argp)); 16598d93822SBruce Evans 166df8bae1dSRodney W. Grimes /* 167df8bae1dSRodney W. Grimes * Initialize the vnode management data structures. 168df8bae1dSRodney W. Grimes */ 16926f9a767SRodney W. Grimes void 170df8bae1dSRodney W. Grimes vntblinit() 171df8bae1dSRodney W. Grimes { 172df8bae1dSRodney W. Grimes 1732be70f79SJohn Dyson desiredvnodes = maxproc + cnt.v_page_count / 4; 174996c772fSJohn Dyson simple_lock_init(&mntvnode_slock); 175996c772fSJohn Dyson simple_lock_init(&mntid_slock); 176996c772fSJohn Dyson simple_lock_init(&spechash_slock); 177df8bae1dSRodney W. Grimes TAILQ_INIT(&vnode_free_list); 178996c772fSJohn Dyson simple_lock_init(&vnode_free_list_slock); 1792d8acc0fSJohn Dyson vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 180b1897c19SJulian Elischer /* 181b1897c19SJulian Elischer * Initialize the filesystem syncer. 182b1897c19SJulian Elischer */ 183b1897c19SJulian Elischer syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 184b1897c19SJulian Elischer &syncer_mask); 185b1897c19SJulian Elischer syncer_maxdelay = syncer_mask + 1; 186df8bae1dSRodney W. Grimes } 187df8bae1dSRodney W. Grimes 188df8bae1dSRodney W. Grimes /* 189996c772fSJohn Dyson * Mark a mount point as busy. Used to synchronize access and to delay 190996c772fSJohn Dyson * unmounting. Interlock is not released on failure. 191df8bae1dSRodney W. Grimes */ 19226f9a767SRodney W. Grimes int 193996c772fSJohn Dyson vfs_busy(mp, flags, interlkp, p) 194996c772fSJohn Dyson struct mount *mp; 195996c772fSJohn Dyson int flags; 196996c772fSJohn Dyson struct simplelock *interlkp; 197996c772fSJohn Dyson struct proc *p; 198df8bae1dSRodney W. Grimes { 199996c772fSJohn Dyson int lkflags; 200df8bae1dSRodney W. Grimes 201b1f4a44bSJulian Elischer if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 202996c772fSJohn Dyson if (flags & LK_NOWAIT) 203996c772fSJohn Dyson return (ENOENT); 204b1f4a44bSJulian Elischer mp->mnt_kern_flag |= MNTK_MWAIT; 205996c772fSJohn Dyson if (interlkp) { 206996c772fSJohn Dyson simple_unlock(interlkp); 207df8bae1dSRodney W. Grimes } 208df8bae1dSRodney W. Grimes /* 209996c772fSJohn Dyson * Since all busy locks are shared except the exclusive 210996c772fSJohn Dyson * lock granted when unmounting, the only place that a 211996c772fSJohn Dyson * wakeup needs to be done is at the release of the 212996c772fSJohn Dyson * exclusive lock at the end of dounmount. 213df8bae1dSRodney W. Grimes */ 214996c772fSJohn Dyson tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 215996c772fSJohn Dyson if (interlkp) { 216996c772fSJohn Dyson simple_lock(interlkp); 217df8bae1dSRodney W. Grimes } 218996c772fSJohn Dyson return (ENOENT); 219df8bae1dSRodney W. Grimes } 2208f9110f6SJohn Dyson lkflags = LK_SHARED | LK_NOPAUSE; 221996c772fSJohn Dyson if (interlkp) 222996c772fSJohn Dyson lkflags |= LK_INTERLOCK; 223996c772fSJohn Dyson if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 224996c772fSJohn Dyson panic("vfs_busy: unexpected lock failure"); 225df8bae1dSRodney W. Grimes return (0); 226df8bae1dSRodney W. Grimes } 227df8bae1dSRodney W. Grimes 228df8bae1dSRodney W. Grimes /* 229df8bae1dSRodney W. Grimes * Free a busy filesystem. 230df8bae1dSRodney W. Grimes */ 23126f9a767SRodney W. Grimes void 232996c772fSJohn Dyson vfs_unbusy(mp, p) 233996c772fSJohn Dyson struct mount *mp; 234996c772fSJohn Dyson struct proc *p; 235df8bae1dSRodney W. Grimes { 236df8bae1dSRodney W. Grimes 237996c772fSJohn Dyson lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 238e0e9c421SDavid Greenman } 239e0e9c421SDavid Greenman 240e0e9c421SDavid Greenman /* 241996c772fSJohn Dyson * Lookup a filesystem type, and if found allocate and initialize 242996c772fSJohn Dyson * a mount structure for it. 243996c772fSJohn Dyson * 244996c772fSJohn Dyson * Devname is usually updated by mount(8) after booting. 245e0e9c421SDavid Greenman */ 246996c772fSJohn Dyson int 247996c772fSJohn Dyson vfs_rootmountalloc(fstypename, devname, mpp) 248996c772fSJohn Dyson char *fstypename; 249996c772fSJohn Dyson char *devname; 250996c772fSJohn Dyson struct mount **mpp; 251e0e9c421SDavid Greenman { 252996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 253996c772fSJohn Dyson struct vfsconf *vfsp; 254996c772fSJohn Dyson struct mount *mp; 255996c772fSJohn Dyson 256ecbb00a2SDoug Rabson if (fstypename == NULL) 257ecbb00a2SDoug Rabson return (ENODEV); 258996c772fSJohn Dyson for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 259996c772fSJohn Dyson if (!strcmp(vfsp->vfc_name, fstypename)) 260996c772fSJohn Dyson break; 261996c772fSJohn Dyson if (vfsp == NULL) 262996c772fSJohn Dyson return (ENODEV); 263996c772fSJohn Dyson mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 264996c772fSJohn Dyson bzero((char *)mp, (u_long)sizeof(struct mount)); 2658f9110f6SJohn Dyson lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 266996c772fSJohn Dyson (void)vfs_busy(mp, LK_NOWAIT, 0, p); 267996c772fSJohn Dyson LIST_INIT(&mp->mnt_vnodelist); 268996c772fSJohn Dyson mp->mnt_vfc = vfsp; 269996c772fSJohn Dyson mp->mnt_op = vfsp->vfc_vfsops; 270996c772fSJohn Dyson mp->mnt_flag = MNT_RDONLY; 271996c772fSJohn Dyson mp->mnt_vnodecovered = NULLVP; 272996c772fSJohn Dyson vfsp->vfc_refcount++; 2731b5464efSPoul-Henning Kamp mp->mnt_iosize_max = DFLTPHYS; 274996c772fSJohn Dyson mp->mnt_stat.f_type = vfsp->vfc_typenum; 275996c772fSJohn Dyson mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 276996c772fSJohn Dyson strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 277996c772fSJohn Dyson mp->mnt_stat.f_mntonname[0] = '/'; 278996c772fSJohn Dyson mp->mnt_stat.f_mntonname[1] = 0; 279996c772fSJohn Dyson (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 280996c772fSJohn Dyson *mpp = mp; 281996c772fSJohn Dyson return (0); 282996c772fSJohn Dyson } 283996c772fSJohn Dyson 284996c772fSJohn Dyson /* 285996c772fSJohn Dyson * Find an appropriate filesystem to use for the root. If a filesystem 286996c772fSJohn Dyson * has not been preselected, walk through the list of known filesystems 287996c772fSJohn Dyson * trying those that have mountroot routines, and try them until one 288996c772fSJohn Dyson * works or we have tried them all. 289996c772fSJohn Dyson */ 290996c772fSJohn Dyson #ifdef notdef /* XXX JH */ 291996c772fSJohn Dyson int 292514ede09SBruce Evans lite2_vfs_mountroot() 293996c772fSJohn Dyson { 294996c772fSJohn Dyson struct vfsconf *vfsp; 295514ede09SBruce Evans extern int (*lite2_mountroot) __P((void)); 296e0e9c421SDavid Greenman int error; 297e0e9c421SDavid Greenman 298996c772fSJohn Dyson if (lite2_mountroot != NULL) 299996c772fSJohn Dyson return ((*lite2_mountroot)()); 300996c772fSJohn Dyson for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 301996c772fSJohn Dyson if (vfsp->vfc_mountroot == NULL) 302e0e9c421SDavid Greenman continue; 303996c772fSJohn Dyson if ((error = (*vfsp->vfc_mountroot)()) == 0) 304996c772fSJohn Dyson return (0); 305996c772fSJohn Dyson printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 306e0e9c421SDavid Greenman } 307996c772fSJohn Dyson return (ENODEV); 308e0e9c421SDavid Greenman } 309996c772fSJohn Dyson #endif 310e0e9c421SDavid Greenman 311df8bae1dSRodney W. Grimes /* 312df8bae1dSRodney W. Grimes * Lookup a mount point by filesystem identifier. 313df8bae1dSRodney W. Grimes */ 314df8bae1dSRodney W. Grimes struct mount * 315996c772fSJohn Dyson vfs_getvfs(fsid) 316df8bae1dSRodney W. Grimes fsid_t *fsid; 317df8bae1dSRodney W. Grimes { 318df8bae1dSRodney W. Grimes register struct mount *mp; 319df8bae1dSRodney W. Grimes 320996c772fSJohn Dyson simple_lock(&mountlist_slock); 3210429e37aSPoul-Henning Kamp TAILQ_FOREACH(mp, &mountlist, mnt_list) { 322df8bae1dSRodney W. Grimes if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 323996c772fSJohn Dyson mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 324996c772fSJohn Dyson simple_unlock(&mountlist_slock); 325df8bae1dSRodney W. Grimes return (mp); 326df8bae1dSRodney W. Grimes } 327996c772fSJohn Dyson } 328996c772fSJohn Dyson simple_unlock(&mountlist_slock); 329df8bae1dSRodney W. Grimes return ((struct mount *) 0); 330df8bae1dSRodney W. Grimes } 331df8bae1dSRodney W. Grimes 332df8bae1dSRodney W. Grimes /* 33305ecdd70SBruce Evans * Get a new unique fsid. Try to make its val[0] unique, since this value 33405ecdd70SBruce Evans * will be used to create fake device numbers for stat(). Also try (but 33505ecdd70SBruce Evans * not so hard) make its val[0] unique mod 2^16, since some emulators only 33605ecdd70SBruce Evans * support 16-bit device numbers. We end up with unique val[0]'s for the 33705ecdd70SBruce Evans * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. 338e6f71111SMatthew Dillon * 33961214975SBruce Evans * Keep in mind that several mounts may be running in parallel. Starting 34005ecdd70SBruce Evans * the search one past where the previous search terminated is both a 34105ecdd70SBruce Evans * micro-optimization and a defense against returning the same fsid to 34205ecdd70SBruce Evans * different mounts. 343df8bae1dSRodney W. Grimes */ 344df8bae1dSRodney W. Grimes void 345996c772fSJohn Dyson vfs_getnewfsid(mp) 346df8bae1dSRodney W. Grimes struct mount *mp; 347df8bae1dSRodney W. Grimes { 34805ecdd70SBruce Evans static u_int16_t mntid_base; 349df8bae1dSRodney W. Grimes fsid_t tfsid; 35005ecdd70SBruce Evans int mtype; 351df8bae1dSRodney W. Grimes 352996c772fSJohn Dyson simple_lock(&mntid_slock); 353996c772fSJohn Dyson mtype = mp->mnt_vfc->vfc_typenum; 354df8bae1dSRodney W. Grimes tfsid.val[1] = mtype; 3553660ebc2SBoris Popov mtype = (mtype & 0xFF) << 24; 35605ecdd70SBruce Evans for (;;) { 3573660ebc2SBoris Popov tfsid.val[0] = makeudev(255, 3583660ebc2SBoris Popov mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); 3593660ebc2SBoris Popov mntid_base++; 360e6f71111SMatthew Dillon if (vfs_getvfs(&tfsid) == NULL) 361e6f71111SMatthew Dillon break; 362df8bae1dSRodney W. Grimes } 363df8bae1dSRodney W. Grimes mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 364e6f71111SMatthew Dillon mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; 365996c772fSJohn Dyson simple_unlock(&mntid_slock); 366df8bae1dSRodney W. Grimes } 367df8bae1dSRodney W. Grimes 368df8bae1dSRodney W. Grimes /* 369a2801b77SJohn Polstra * Knob to control the precision of file timestamps: 370a2801b77SJohn Polstra * 371a2801b77SJohn Polstra * 0 = seconds only; nanoseconds zeroed. 372a2801b77SJohn Polstra * 1 = seconds and nanoseconds, accurate within 1/HZ. 373a2801b77SJohn Polstra * 2 = seconds and nanoseconds, truncated to microseconds. 374a2801b77SJohn Polstra * >=3 = seconds and nanoseconds, maximum precision. 375a2801b77SJohn Polstra */ 376a2801b77SJohn Polstra enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 377a2801b77SJohn Polstra 378a2801b77SJohn Polstra static int timestamp_precision = TSP_SEC; 379a2801b77SJohn Polstra SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 380a2801b77SJohn Polstra ×tamp_precision, 0, ""); 381a2801b77SJohn Polstra 382a2801b77SJohn Polstra /* 383a2801b77SJohn Polstra * Get a current timestamp. 384a2801b77SJohn Polstra */ 385a2801b77SJohn Polstra void 386a2801b77SJohn Polstra vfs_timestamp(tsp) 387a2801b77SJohn Polstra struct timespec *tsp; 388a2801b77SJohn Polstra { 389a2801b77SJohn Polstra struct timeval tv; 390a2801b77SJohn Polstra 391a2801b77SJohn Polstra switch (timestamp_precision) { 392a2801b77SJohn Polstra case TSP_SEC: 393a2801b77SJohn Polstra tsp->tv_sec = time_second; 394a2801b77SJohn Polstra tsp->tv_nsec = 0; 395a2801b77SJohn Polstra break; 396a2801b77SJohn Polstra case TSP_HZ: 397a2801b77SJohn Polstra getnanotime(tsp); 398a2801b77SJohn Polstra break; 399a2801b77SJohn Polstra case TSP_USEC: 400a2801b77SJohn Polstra microtime(&tv); 401a2801b77SJohn Polstra TIMEVAL_TO_TIMESPEC(&tv, tsp); 402a2801b77SJohn Polstra break; 403a2801b77SJohn Polstra case TSP_NSEC: 404a2801b77SJohn Polstra default: 405a2801b77SJohn Polstra nanotime(tsp); 406a2801b77SJohn Polstra break; 407a2801b77SJohn Polstra } 408a2801b77SJohn Polstra } 409a2801b77SJohn Polstra 410a2801b77SJohn Polstra /* 411df8bae1dSRodney W. Grimes * Set vnode attributes to VNOVAL 412df8bae1dSRodney W. Grimes */ 41326f9a767SRodney W. Grimes void 41426f9a767SRodney W. Grimes vattr_null(vap) 415df8bae1dSRodney W. Grimes register struct vattr *vap; 416df8bae1dSRodney W. Grimes { 417df8bae1dSRodney W. Grimes 418df8bae1dSRodney W. Grimes vap->va_type = VNON; 41926f9a767SRodney W. Grimes vap->va_size = VNOVAL; 42026f9a767SRodney W. Grimes vap->va_bytes = VNOVAL; 4217a6c46b5SDoug Rabson vap->va_mode = VNOVAL; 4227a6c46b5SDoug Rabson vap->va_nlink = VNOVAL; 4237a6c46b5SDoug Rabson vap->va_uid = VNOVAL; 4247a6c46b5SDoug Rabson vap->va_gid = VNOVAL; 4257a6c46b5SDoug Rabson vap->va_fsid = VNOVAL; 4267a6c46b5SDoug Rabson vap->va_fileid = VNOVAL; 4277a6c46b5SDoug Rabson vap->va_blocksize = VNOVAL; 4287a6c46b5SDoug Rabson vap->va_rdev = VNOVAL; 4297a6c46b5SDoug Rabson vap->va_atime.tv_sec = VNOVAL; 4307a6c46b5SDoug Rabson vap->va_atime.tv_nsec = VNOVAL; 4317a6c46b5SDoug Rabson vap->va_mtime.tv_sec = VNOVAL; 4327a6c46b5SDoug Rabson vap->va_mtime.tv_nsec = VNOVAL; 4337a6c46b5SDoug Rabson vap->va_ctime.tv_sec = VNOVAL; 4347a6c46b5SDoug Rabson vap->va_ctime.tv_nsec = VNOVAL; 4357a6c46b5SDoug Rabson vap->va_flags = VNOVAL; 4367a6c46b5SDoug Rabson vap->va_gen = VNOVAL; 437df8bae1dSRodney W. Grimes vap->va_vaflags = 0; 438df8bae1dSRodney W. Grimes } 439df8bae1dSRodney W. Grimes 440df8bae1dSRodney W. Grimes /* 441df8bae1dSRodney W. Grimes * Routines having to do with the management of the vnode table. 442df8bae1dSRodney W. Grimes */ 443df8bae1dSRodney W. Grimes 444df8bae1dSRodney W. Grimes /* 445df8bae1dSRodney W. Grimes * Return the next vnode from the free list. 446df8bae1dSRodney W. Grimes */ 44726f9a767SRodney W. Grimes int 448df8bae1dSRodney W. Grimes getnewvnode(tag, mp, vops, vpp) 449df8bae1dSRodney W. Grimes enum vtagtype tag; 450df8bae1dSRodney W. Grimes struct mount *mp; 451f57e6547SBruce Evans vop_t **vops; 452df8bae1dSRodney W. Grimes struct vnode **vpp; 453df8bae1dSRodney W. Grimes { 454c904bbbdSKirk McKusick int s, count; 455996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 456c904bbbdSKirk McKusick struct vnode *vp = NULL; 457f2a2857bSKirk McKusick struct mount *vnmp; 45895e5e988SJohn Dyson vm_object_t object; 459df8bae1dSRodney W. Grimes 460b15a966eSPoul-Henning Kamp /* 461b15a966eSPoul-Henning Kamp * We take the least recently used vnode from the freelist 462b15a966eSPoul-Henning Kamp * if we can get it and it has no cached pages, and no 463b15a966eSPoul-Henning Kamp * namecache entries are relative to it. 464b15a966eSPoul-Henning Kamp * Otherwise we allocate a new vnode 465b15a966eSPoul-Henning Kamp */ 466b15a966eSPoul-Henning Kamp 467925a3a41SJohn Dyson s = splbio(); 468996c772fSJohn Dyson simple_lock(&vnode_free_list_slock); 469925a3a41SJohn Dyson 47000544193SPoul-Henning Kamp if (wantfreevnodes && freevnodes < wantfreevnodes) { 47100544193SPoul-Henning Kamp vp = NULL; 472d047b580SPoul-Henning Kamp } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 473d047b580SPoul-Henning Kamp /* 474d047b580SPoul-Henning Kamp * XXX: this is only here to be backwards compatible 475d047b580SPoul-Henning Kamp */ 47600544193SPoul-Henning Kamp vp = NULL; 477c904bbbdSKirk McKusick } else for (count = 0; count < freevnodes; count++) { 478c904bbbdSKirk McKusick vp = TAILQ_FIRST(&vnode_free_list); 479c904bbbdSKirk McKusick if (vp == NULL || vp->v_usecount) 480c904bbbdSKirk McKusick panic("getnewvnode: free vnode isn't"); 48195e5e988SJohn Dyson TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 482c904bbbdSKirk McKusick /* 483c904bbbdSKirk McKusick * Don't recycle if active in the namecache or 484c904bbbdSKirk McKusick * if it still has cached pages or we cannot get 485c904bbbdSKirk McKusick * its interlock. 486c904bbbdSKirk McKusick */ 487c904bbbdSKirk McKusick if (LIST_FIRST(&vp->v_cache_src) != NULL || 4889ff5ce6bSBoris Popov (VOP_GETVOBJECT(vp, &object) == 0 && 4899ff5ce6bSBoris Popov (object->resident_page_count || object->ref_count)) || 490c904bbbdSKirk McKusick !simple_lock_try(&vp->v_interlock)) { 491c904bbbdSKirk McKusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 492c904bbbdSKirk McKusick vp = NULL; 493b15a966eSPoul-Henning Kamp continue; 494c904bbbdSKirk McKusick } 495f2a2857bSKirk McKusick /* 496f2a2857bSKirk McKusick * Skip over it if its filesystem is being suspended. 497f2a2857bSKirk McKusick */ 498f2a2857bSKirk McKusick if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0) 499b15a966eSPoul-Henning Kamp break; 500f2a2857bSKirk McKusick simple_unlock(&vp->v_interlock); 501f2a2857bSKirk McKusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 502f2a2857bSKirk McKusick vp = NULL; 503b15a966eSPoul-Henning Kamp } 504b15a966eSPoul-Henning Kamp if (vp) { 505a051452aSPoul-Henning Kamp vp->v_flag |= VDOOMED; 506b15a966eSPoul-Henning Kamp freevnodes--; 507996c772fSJohn Dyson simple_unlock(&vnode_free_list_slock); 508a051452aSPoul-Henning Kamp cache_purge(vp); 509df8bae1dSRodney W. Grimes vp->v_lease = NULL; 5102be70f79SJohn Dyson if (vp->v_type != VBAD) { 511996c772fSJohn Dyson vgonel(vp, p); 5122be70f79SJohn Dyson } else { 513996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 514996c772fSJohn Dyson } 515f2a2857bSKirk McKusick vn_finished_write(vnmp); 516bd7e5f99SJohn Dyson 5175526d2d9SEivind Eklund #ifdef INVARIANTS 518797f2d22SPoul-Henning Kamp { 519797f2d22SPoul-Henning Kamp int s; 5200d94caffSDavid Greenman 521df8bae1dSRodney W. Grimes if (vp->v_data) 522df8bae1dSRodney W. Grimes panic("cleaned vnode isn't"); 523df8bae1dSRodney W. Grimes s = splbio(); 524df8bae1dSRodney W. Grimes if (vp->v_numoutput) 525df8bae1dSRodney W. Grimes panic("Clean vnode has pending I/O's"); 526df8bae1dSRodney W. Grimes splx(s); 527f2a2857bSKirk McKusick if (vp->v_writecount != 0) 528f2a2857bSKirk McKusick panic("Non-zero write count"); 529797f2d22SPoul-Henning Kamp } 530df8bae1dSRodney W. Grimes #endif 531df8bae1dSRodney W. Grimes vp->v_flag = 0; 532df8bae1dSRodney W. Grimes vp->v_lastw = 0; 533df8bae1dSRodney W. Grimes vp->v_lasta = 0; 534df8bae1dSRodney W. Grimes vp->v_cstart = 0; 535df8bae1dSRodney W. Grimes vp->v_clen = 0; 536df8bae1dSRodney W. Grimes vp->v_socket = 0; 537b15a966eSPoul-Henning Kamp } else { 538b15a966eSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 5392d8acc0fSJohn Dyson vp = (struct vnode *) zalloc(vnode_zone); 540b15a966eSPoul-Henning Kamp bzero((char *) vp, sizeof *vp); 54195e5e988SJohn Dyson simple_lock_init(&vp->v_interlock); 542b15a966eSPoul-Henning Kamp vp->v_dd = vp; 543a051452aSPoul-Henning Kamp cache_purge(vp); 544b15a966eSPoul-Henning Kamp LIST_INIT(&vp->v_cache_src); 545b15a966eSPoul-Henning Kamp TAILQ_INIT(&vp->v_cache_dst); 546b15a966eSPoul-Henning Kamp numvnodes++; 547df8bae1dSRodney W. Grimes } 548b15a966eSPoul-Henning Kamp 54916e9e530SPeter Wemm TAILQ_INIT(&vp->v_cleanblkhd); 55016e9e530SPeter Wemm TAILQ_INIT(&vp->v_dirtyblkhd); 551f9ceb7c7SDavid Greenman vp->v_type = VNON; 552df8bae1dSRodney W. Grimes vp->v_tag = tag; 553df8bae1dSRodney W. Grimes vp->v_op = vops; 554df8bae1dSRodney W. Grimes insmntque(vp, mp); 555df8bae1dSRodney W. Grimes *vpp = vp; 556df8bae1dSRodney W. Grimes vp->v_usecount = 1; 557df8bae1dSRodney W. Grimes vp->v_data = 0; 558925a3a41SJohn Dyson splx(s); 55964d3c7e3SJohn Dyson 560fb116777SEivind Eklund vfs_object_create(vp, p, p->p_ucred); 561df8bae1dSRodney W. Grimes return (0); 562df8bae1dSRodney W. Grimes } 563df8bae1dSRodney W. Grimes 564df8bae1dSRodney W. Grimes /* 565df8bae1dSRodney W. Grimes * Move a vnode from one mount queue to another. 566df8bae1dSRodney W. Grimes */ 567cb451ebdSBruce Evans static void 568df8bae1dSRodney W. Grimes insmntque(vp, mp) 569df8bae1dSRodney W. Grimes register struct vnode *vp; 570df8bae1dSRodney W. Grimes register struct mount *mp; 571df8bae1dSRodney W. Grimes { 572df8bae1dSRodney W. Grimes 573996c772fSJohn Dyson simple_lock(&mntvnode_slock); 574df8bae1dSRodney W. Grimes /* 575df8bae1dSRodney W. Grimes * Delete from old mount point vnode list, if on one. 576df8bae1dSRodney W. Grimes */ 577df8bae1dSRodney W. Grimes if (vp->v_mount != NULL) 578df8bae1dSRodney W. Grimes LIST_REMOVE(vp, v_mntvnodes); 579df8bae1dSRodney W. Grimes /* 580df8bae1dSRodney W. Grimes * Insert into list of vnodes for the new mount point, if available. 581df8bae1dSRodney W. Grimes */ 582996c772fSJohn Dyson if ((vp->v_mount = mp) == NULL) { 583996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 584df8bae1dSRodney W. Grimes return; 585996c772fSJohn Dyson } 586df8bae1dSRodney W. Grimes LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 587996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 588df8bae1dSRodney W. Grimes } 589df8bae1dSRodney W. Grimes 590df8bae1dSRodney W. Grimes /* 591df8bae1dSRodney W. Grimes * Update outstanding I/O count and do wakeup if requested. 592df8bae1dSRodney W. Grimes */ 59326f9a767SRodney W. Grimes void 594df8bae1dSRodney W. Grimes vwakeup(bp) 595df8bae1dSRodney W. Grimes register struct buf *bp; 596df8bae1dSRodney W. Grimes { 597df8bae1dSRodney W. Grimes register struct vnode *vp; 598df8bae1dSRodney W. Grimes 599df8bae1dSRodney W. Grimes bp->b_flags &= ~B_WRITEINPROG; 600bb56ec4aSPoul-Henning Kamp if ((vp = bp->b_vp)) { 601df8bae1dSRodney W. Grimes vp->v_numoutput--; 602df8bae1dSRodney W. Grimes if (vp->v_numoutput < 0) 603df8bae1dSRodney W. Grimes panic("vwakeup: neg numoutput"); 604a3a8bb29SDavid Greenman if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 605df8bae1dSRodney W. Grimes vp->v_flag &= ~VBWAIT; 606df8bae1dSRodney W. Grimes wakeup((caddr_t) &vp->v_numoutput); 607df8bae1dSRodney W. Grimes } 608df8bae1dSRodney W. Grimes } 609df8bae1dSRodney W. Grimes } 610df8bae1dSRodney W. Grimes 611df8bae1dSRodney W. Grimes /* 612df8bae1dSRodney W. Grimes * Flush out and invalidate all buffers associated with a vnode. 613df8bae1dSRodney W. Grimes * Called with the underlying object locked. 614df8bae1dSRodney W. Grimes */ 615df8bae1dSRodney W. Grimes int 616df8bae1dSRodney W. Grimes vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 617df8bae1dSRodney W. Grimes register struct vnode *vp; 618df8bae1dSRodney W. Grimes int flags; 619df8bae1dSRodney W. Grimes struct ucred *cred; 620df8bae1dSRodney W. Grimes struct proc *p; 621df8bae1dSRodney W. Grimes int slpflag, slptimeo; 622df8bae1dSRodney W. Grimes { 623df8bae1dSRodney W. Grimes register struct buf *bp; 624df8bae1dSRodney W. Grimes struct buf *nbp, *blist; 625df8bae1dSRodney W. Grimes int s, error; 6261cdeb653SDavid Greenman vm_object_t object; 627df8bae1dSRodney W. Grimes 62828913ebeSJulian Elischer if (flags & V_SAVE) { 62928913ebeSJulian Elischer s = splbio(); 63028913ebeSJulian Elischer while (vp->v_numoutput) { 63128913ebeSJulian Elischer vp->v_flag |= VBWAIT; 63229c98cd8SEivind Eklund error = tsleep((caddr_t)&vp->v_numoutput, 63329c98cd8SEivind Eklund slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 63429c98cd8SEivind Eklund if (error) { 63529c98cd8SEivind Eklund splx(s); 63629c98cd8SEivind Eklund return (error); 63729c98cd8SEivind Eklund } 63828913ebeSJulian Elischer } 63916e9e530SPeter Wemm if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 64028913ebeSJulian Elischer splx(s); 64128913ebeSJulian Elischer if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 642df8bae1dSRodney W. Grimes return (error); 64328913ebeSJulian Elischer s = splbio(); 64428913ebeSJulian Elischer if (vp->v_numoutput > 0 || 64516e9e530SPeter Wemm !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 646df8bae1dSRodney W. Grimes panic("vinvalbuf: dirty bufs"); 647df8bae1dSRodney W. Grimes } 64828913ebeSJulian Elischer splx(s); 64928913ebeSJulian Elischer } 6506476c0d2SJohn Dyson s = splbio(); 651df8bae1dSRodney W. Grimes for (;;) { 65216e9e530SPeter Wemm blist = TAILQ_FIRST(&vp->v_cleanblkhd); 65320f02ef5SPeter Wemm if (!blist) 65416e9e530SPeter Wemm blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 655df8bae1dSRodney W. Grimes if (!blist) 656df8bae1dSRodney W. Grimes break; 657df8bae1dSRodney W. Grimes 658df8bae1dSRodney W. Grimes for (bp = blist; bp; bp = nbp) { 65916e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 66067812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 66167812eacSKirk McKusick error = BUF_TIMELOCK(bp, 66267812eacSKirk McKusick LK_EXCLUSIVE | LK_SLEEPFAIL, 66367812eacSKirk McKusick "vinvalbuf", slpflag, slptimeo); 66467812eacSKirk McKusick if (error == ENOLCK) 66567812eacSKirk McKusick break; 666df8bae1dSRodney W. Grimes splx(s); 667df8bae1dSRodney W. Grimes return (error); 6682f2160daSDavid Greenman } 669df8bae1dSRodney W. Grimes /* 6700d94caffSDavid Greenman * XXX Since there are no node locks for NFS, I 6710d94caffSDavid Greenman * believe there is a slight chance that a delayed 6720d94caffSDavid Greenman * write will occur while sleeping just above, so 67352c64c95SJohn Dyson * check for it. Note that vfs_bio_awrite expects 67452c64c95SJohn Dyson * buffers to reside on a queue, while VOP_BWRITE and 67552c64c95SJohn Dyson * brelse do not. 676df8bae1dSRodney W. Grimes */ 67752c64c95SJohn Dyson if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 67852c64c95SJohn Dyson (flags & V_SAVE)) { 67952c64c95SJohn Dyson 68095e5e988SJohn Dyson if (bp->b_vp == vp) { 68195e5e988SJohn Dyson if (bp->b_flags & B_CLUSTEROK) { 68267812eacSKirk McKusick BUF_UNLOCK(bp); 68395e5e988SJohn Dyson vfs_bio_awrite(bp); 68495e5e988SJohn Dyson } else { 68552c64c95SJohn Dyson bremfree(bp); 68667812eacSKirk McKusick bp->b_flags |= B_ASYNC; 687b99c307aSPoul-Henning Kamp BUF_WRITE(bp); 68895e5e988SJohn Dyson } 68995e5e988SJohn Dyson } else { 69052c64c95SJohn Dyson bremfree(bp); 691b99c307aSPoul-Henning Kamp (void) BUF_WRITE(bp); 69295e5e988SJohn Dyson } 693df8bae1dSRodney W. Grimes break; 694df8bae1dSRodney W. Grimes } 69552c64c95SJohn Dyson bremfree(bp); 69667812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 697bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 698df8bae1dSRodney W. Grimes brelse(bp); 699df8bae1dSRodney W. Grimes } 700df8bae1dSRodney W. Grimes } 7011cdeb653SDavid Greenman 7020d94caffSDavid Greenman while (vp->v_numoutput > 0) { 7030d94caffSDavid Greenman vp->v_flag |= VBWAIT; 7040d94caffSDavid Greenman tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 7050d94caffSDavid Greenman } 7062f2160daSDavid Greenman 7070d94caffSDavid Greenman splx(s); 7080d94caffSDavid Greenman 709ff769afcSDavid Greenman /* 710ff769afcSDavid Greenman * Destroy the copy in the VM cache, too. 711ff769afcSDavid Greenman */ 71295e5e988SJohn Dyson simple_lock(&vp->v_interlock); 7139ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &object) == 0) { 71495e5e988SJohn Dyson vm_object_page_remove(object, 0, 0, 71595e5e988SJohn Dyson (flags & V_SAVE) ? TRUE : FALSE); 7161cdeb653SDavid Greenman } 71795e5e988SJohn Dyson simple_unlock(&vp->v_interlock); 71895e5e988SJohn Dyson 71916e9e530SPeter Wemm if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 720df8bae1dSRodney W. Grimes panic("vinvalbuf: flush failed"); 721df8bae1dSRodney W. Grimes return (0); 722df8bae1dSRodney W. Grimes } 723df8bae1dSRodney W. Grimes 724df8bae1dSRodney W. Grimes /* 725bef608bdSJohn Dyson * Truncate a file's buffer and pages to a specified length. This 726bef608bdSJohn Dyson * is in lieu of the old vinvalbuf mechanism, which performed unneeded 727bef608bdSJohn Dyson * sync activity. 728bef608bdSJohn Dyson */ 729bef608bdSJohn Dyson int 730bef608bdSJohn Dyson vtruncbuf(vp, cred, p, length, blksize) 731bef608bdSJohn Dyson register struct vnode *vp; 732bef608bdSJohn Dyson struct ucred *cred; 733bef608bdSJohn Dyson struct proc *p; 734bef608bdSJohn Dyson off_t length; 735bef608bdSJohn Dyson int blksize; 736bef608bdSJohn Dyson { 737bef608bdSJohn Dyson register struct buf *bp; 738f5ef029eSPoul-Henning Kamp struct buf *nbp; 739f5ef029eSPoul-Henning Kamp int s, anyfreed; 740bef608bdSJohn Dyson int trunclbn; 741bef608bdSJohn Dyson 742bef608bdSJohn Dyson /* 743bef608bdSJohn Dyson * Round up to the *next* lbn. 744bef608bdSJohn Dyson */ 7451c77c6b7SJohn Dyson trunclbn = (length + blksize - 1) / blksize; 746bef608bdSJohn Dyson 747bef608bdSJohn Dyson s = splbio(); 748bef608bdSJohn Dyson restart: 749bef608bdSJohn Dyson anyfreed = 1; 750bef608bdSJohn Dyson for (;anyfreed;) { 751bef608bdSJohn Dyson anyfreed = 0; 75216e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 75316e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 754bef608bdSJohn Dyson if (bp->b_lblkno >= trunclbn) { 75567812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 75667812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 75752c64c95SJohn Dyson goto restart; 758bef608bdSJohn Dyson } else { 759bef608bdSJohn Dyson bremfree(bp); 76067812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_RELBUF); 761bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 762bef608bdSJohn Dyson brelse(bp); 763bef608bdSJohn Dyson anyfreed = 1; 764bef608bdSJohn Dyson } 76502b00854SKirk McKusick if (nbp && 76602b00854SKirk McKusick (((nbp->b_xflags & BX_VNCLEAN) == 0) || 76752c64c95SJohn Dyson (nbp->b_vp != vp) || 768bef608bdSJohn Dyson (nbp->b_flags & B_DELWRI))) { 769bef608bdSJohn Dyson goto restart; 770bef608bdSJohn Dyson } 771bef608bdSJohn Dyson } 772bef608bdSJohn Dyson } 773bef608bdSJohn Dyson 77416e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 77516e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 776bef608bdSJohn Dyson if (bp->b_lblkno >= trunclbn) { 77767812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 77867812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 77952c64c95SJohn Dyson goto restart; 780bef608bdSJohn Dyson } else { 781bef608bdSJohn Dyson bremfree(bp); 78267812eacSKirk McKusick bp->b_flags |= (B_INVAL | B_RELBUF); 783bef608bdSJohn Dyson bp->b_flags &= ~B_ASYNC; 784bef608bdSJohn Dyson brelse(bp); 785bef608bdSJohn Dyson anyfreed = 1; 786bef608bdSJohn Dyson } 78702b00854SKirk McKusick if (nbp && 78802b00854SKirk McKusick (((nbp->b_xflags & BX_VNDIRTY) == 0) || 78952c64c95SJohn Dyson (nbp->b_vp != vp) || 790bef608bdSJohn Dyson (nbp->b_flags & B_DELWRI) == 0)) { 791bef608bdSJohn Dyson goto restart; 792bef608bdSJohn Dyson } 793bef608bdSJohn Dyson } 794bef608bdSJohn Dyson } 795bef608bdSJohn Dyson } 7962deb5d04SJohn Dyson 79752c64c95SJohn Dyson if (length > 0) { 79852c64c95SJohn Dyson restartsync: 79916e9e530SPeter Wemm for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 80016e9e530SPeter Wemm nbp = TAILQ_NEXT(bp, b_vnbufs); 8012deb5d04SJohn Dyson if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 80267812eacSKirk McKusick if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 80367812eacSKirk McKusick BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 80467812eacSKirk McKusick goto restart; 8052deb5d04SJohn Dyson } else { 8062deb5d04SJohn Dyson bremfree(bp); 80752c64c95SJohn Dyson if (bp->b_vp == vp) { 80852c64c95SJohn Dyson bp->b_flags |= B_ASYNC; 80952c64c95SJohn Dyson } else { 81052c64c95SJohn Dyson bp->b_flags &= ~B_ASYNC; 81152c64c95SJohn Dyson } 812b99c307aSPoul-Henning Kamp BUF_WRITE(bp); 8132deb5d04SJohn Dyson } 81452c64c95SJohn Dyson goto restartsync; 8152deb5d04SJohn Dyson } 81652c64c95SJohn Dyson 8172deb5d04SJohn Dyson } 8182deb5d04SJohn Dyson } 8192deb5d04SJohn Dyson 8202deb5d04SJohn Dyson while (vp->v_numoutput > 0) { 8212deb5d04SJohn Dyson vp->v_flag |= VBWAIT; 8222deb5d04SJohn Dyson tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 8232deb5d04SJohn Dyson } 8242deb5d04SJohn Dyson 825bef608bdSJohn Dyson splx(s); 826bef608bdSJohn Dyson 827bef608bdSJohn Dyson vnode_pager_setsize(vp, length); 828bef608bdSJohn Dyson 829bef608bdSJohn Dyson return (0); 830bef608bdSJohn Dyson } 831bef608bdSJohn Dyson 832bef608bdSJohn Dyson /* 833df8bae1dSRodney W. Grimes * Associate a buffer with a vnode. 834df8bae1dSRodney W. Grimes */ 83526f9a767SRodney W. Grimes void 836df8bae1dSRodney W. Grimes bgetvp(vp, bp) 837df8bae1dSRodney W. Grimes register struct vnode *vp; 838df8bae1dSRodney W. Grimes register struct buf *bp; 839df8bae1dSRodney W. Grimes { 840602d2b48SDavid Greenman int s; 841df8bae1dSRodney W. Grimes 8425526d2d9SEivind Eklund KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 843219cbf59SEivind Eklund 844a051452aSPoul-Henning Kamp vhold(vp); 845df8bae1dSRodney W. Grimes bp->b_vp = vp; 84641d2e3e0SPoul-Henning Kamp bp->b_dev = vn_todev(vp); 847df8bae1dSRodney W. Grimes /* 848df8bae1dSRodney W. Grimes * Insert onto list for new vnode. 849df8bae1dSRodney W. Grimes */ 850602d2b48SDavid Greenman s = splbio(); 85102b00854SKirk McKusick bp->b_xflags |= BX_VNCLEAN; 85202b00854SKirk McKusick bp->b_xflags &= ~BX_VNDIRTY; 85316e9e530SPeter Wemm TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 854602d2b48SDavid Greenman splx(s); 855df8bae1dSRodney W. Grimes } 856df8bae1dSRodney W. Grimes 857df8bae1dSRodney W. Grimes /* 858df8bae1dSRodney W. Grimes * Disassociate a buffer from a vnode. 859df8bae1dSRodney W. Grimes */ 86026f9a767SRodney W. Grimes void 861df8bae1dSRodney W. Grimes brelvp(bp) 862df8bae1dSRodney W. Grimes register struct buf *bp; 863df8bae1dSRodney W. Grimes { 864df8bae1dSRodney W. Grimes struct vnode *vp; 86516e9e530SPeter Wemm struct buflists *listheadp; 866602d2b48SDavid Greenman int s; 867df8bae1dSRodney W. Grimes 8685526d2d9SEivind Eklund KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 8692be70f79SJohn Dyson 870df8bae1dSRodney W. Grimes /* 871df8bae1dSRodney W. Grimes * Delete from old vnode list, if on one. 872df8bae1dSRodney W. Grimes */ 873b1897c19SJulian Elischer vp = bp->b_vp; 874602d2b48SDavid Greenman s = splbio(); 87502b00854SKirk McKusick if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 87602b00854SKirk McKusick if (bp->b_xflags & BX_VNDIRTY) 87716e9e530SPeter Wemm listheadp = &vp->v_dirtyblkhd; 87816e9e530SPeter Wemm else 87916e9e530SPeter Wemm listheadp = &vp->v_cleanblkhd; 88016e9e530SPeter Wemm TAILQ_REMOVE(listheadp, bp, b_vnbufs); 88102b00854SKirk McKusick bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 88216e9e530SPeter Wemm } 88316e9e530SPeter Wemm if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 884b1897c19SJulian Elischer vp->v_flag &= ~VONWORKLST; 885b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 886b1897c19SJulian Elischer } 887602d2b48SDavid Greenman splx(s); 888df8bae1dSRodney W. Grimes bp->b_vp = (struct vnode *) 0; 889a051452aSPoul-Henning Kamp vdrop(vp); 890df8bae1dSRodney W. Grimes } 891df8bae1dSRodney W. Grimes 892df8bae1dSRodney W. Grimes /* 893b1897c19SJulian Elischer * The workitem queue. 894b1897c19SJulian Elischer * 895b1897c19SJulian Elischer * It is useful to delay writes of file data and filesystem metadata 896b1897c19SJulian Elischer * for tens of seconds so that quickly created and deleted files need 897b1897c19SJulian Elischer * not waste disk bandwidth being created and removed. To realize this, 898b1897c19SJulian Elischer * we append vnodes to a "workitem" queue. When running with a soft 899b1897c19SJulian Elischer * updates implementation, most pending metadata dependencies should 900b1897c19SJulian Elischer * not wait for more than a few seconds. Thus, mounted on block devices 901b1897c19SJulian Elischer * are delayed only about a half the time that file data is delayed. 902b1897c19SJulian Elischer * Similarly, directory updates are more critical, so are only delayed 903b1897c19SJulian Elischer * about a third the time that file data is delayed. Thus, there are 904b1897c19SJulian Elischer * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 905e7647e6cSKris Kennaway * one each second (driven off the filesystem syncer process). The 906b1897c19SJulian Elischer * syncer_delayno variable indicates the next queue that is to be processed. 907b1897c19SJulian Elischer * Items that need to be processed soon are placed in this queue: 908b1897c19SJulian Elischer * 909b1897c19SJulian Elischer * syncer_workitem_pending[syncer_delayno] 910b1897c19SJulian Elischer * 911b1897c19SJulian Elischer * A delay of fifteen seconds is done by placing the request fifteen 912b1897c19SJulian Elischer * entries later in the queue: 913b1897c19SJulian Elischer * 914b1897c19SJulian Elischer * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 915b1897c19SJulian Elischer * 916b1897c19SJulian Elischer */ 917b1897c19SJulian Elischer 918b1897c19SJulian Elischer /* 919b1897c19SJulian Elischer * Add an item to the syncer work queue. 920b1897c19SJulian Elischer */ 92142e26d47SMatthew Dillon static void 92242e26d47SMatthew Dillon vn_syncer_add_to_worklist(struct vnode *vp, int delay) 923b1897c19SJulian Elischer { 924b1897c19SJulian Elischer int s, slot; 925b1897c19SJulian Elischer 926b1897c19SJulian Elischer s = splbio(); 927b1897c19SJulian Elischer 928b1897c19SJulian Elischer if (vp->v_flag & VONWORKLST) { 929b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 930b1897c19SJulian Elischer } 931b1897c19SJulian Elischer 932b1897c19SJulian Elischer if (delay > syncer_maxdelay - 2) 933b1897c19SJulian Elischer delay = syncer_maxdelay - 2; 934b1897c19SJulian Elischer slot = (syncer_delayno + delay) & syncer_mask; 935b1897c19SJulian Elischer 936b1897c19SJulian Elischer LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 937b1897c19SJulian Elischer vp->v_flag |= VONWORKLST; 938b1897c19SJulian Elischer splx(s); 939b1897c19SJulian Elischer } 940b1897c19SJulian Elischer 9414ef2094eSJulian Elischer struct proc *updateproc; 942155f87daSMatthew Dillon static void sched_sync __P((void)); 9439c8b8baaSPeter Wemm static struct kproc_desc up_kp = { 944b1897c19SJulian Elischer "syncer", 945b1897c19SJulian Elischer sched_sync, 946b1897c19SJulian Elischer &updateproc 947b1897c19SJulian Elischer }; 9489c8b8baaSPeter Wemm SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 949b1897c19SJulian Elischer 950b1897c19SJulian Elischer /* 951b1897c19SJulian Elischer * System filesystem synchronizer daemon. 952b1897c19SJulian Elischer */ 953b1897c19SJulian Elischer void 954b1897c19SJulian Elischer sched_sync(void) 955b1897c19SJulian Elischer { 956b1897c19SJulian Elischer struct synclist *slp; 957b1897c19SJulian Elischer struct vnode *vp; 958f2a2857bSKirk McKusick struct mount *mp; 959b1897c19SJulian Elischer long starttime; 960b1897c19SJulian Elischer int s; 961b1897c19SJulian Elischer struct proc *p = updateproc; 962b1897c19SJulian Elischer 9630384fff8SJason Evans mtx_enter(&Giant, MTX_DEF); 9640384fff8SJason Evans 9655e950839SLuoqi Chen EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, 9665e950839SLuoqi Chen SHUTDOWN_PRI_LAST); 9675e950839SLuoqi Chen 968b1897c19SJulian Elischer for (;;) { 9695e950839SLuoqi Chen kproc_suspend_loop(p); 9705e950839SLuoqi Chen 971227ee8a1SPoul-Henning Kamp starttime = time_second; 972b1897c19SJulian Elischer 973b1897c19SJulian Elischer /* 97442e26d47SMatthew Dillon * Push files whose dirty time has expired. Be careful 97542e26d47SMatthew Dillon * of interrupt race on slp queue. 976b1897c19SJulian Elischer */ 977b1897c19SJulian Elischer s = splbio(); 978b1897c19SJulian Elischer slp = &syncer_workitem_pending[syncer_delayno]; 979b1897c19SJulian Elischer syncer_delayno += 1; 980b1897c19SJulian Elischer if (syncer_delayno == syncer_maxdelay) 981b1897c19SJulian Elischer syncer_delayno = 0; 982b1897c19SJulian Elischer splx(s); 983b1897c19SJulian Elischer 984b1897c19SJulian Elischer while ((vp = LIST_FIRST(slp)) != NULL) { 985f2a2857bSKirk McKusick if (VOP_ISLOCKED(vp, NULL) == 0 && 986f2a2857bSKirk McKusick vn_start_write(vp, &mp, V_NOWAIT) == 0) { 987b1897c19SJulian Elischer vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 988b1897c19SJulian Elischer (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 989b1897c19SJulian Elischer VOP_UNLOCK(vp, 0, p); 990f2a2857bSKirk McKusick vn_finished_write(mp); 9914ef2094eSJulian Elischer } 99242e26d47SMatthew Dillon s = splbio(); 993b1897c19SJulian Elischer if (LIST_FIRST(slp) == vp) { 9944ef2094eSJulian Elischer /* 9954ef2094eSJulian Elischer * Note: v_tag VT_VFS vps can remain on the 9964ef2094eSJulian Elischer * worklist too with no dirty blocks, but 9974ef2094eSJulian Elischer * since sync_fsync() moves it to a different 9984ef2094eSJulian Elischer * slot we are safe. 9994ef2094eSJulian Elischer */ 100016e9e530SPeter Wemm if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 1001ba4ad1fcSPoul-Henning Kamp !vn_isdisk(vp, NULL)) 100242e26d47SMatthew Dillon panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 1003b1897c19SJulian Elischer /* 100442e26d47SMatthew Dillon * Put us back on the worklist. The worklist 100542e26d47SMatthew Dillon * routine will remove us from our current 100642e26d47SMatthew Dillon * position and then add us back in at a later 100742e26d47SMatthew Dillon * position. 1008b1897c19SJulian Elischer */ 1009b1897c19SJulian Elischer vn_syncer_add_to_worklist(vp, syncdelay); 1010b1897c19SJulian Elischer } 101142e26d47SMatthew Dillon splx(s); 1012b1897c19SJulian Elischer } 1013b1897c19SJulian Elischer 1014b1897c19SJulian Elischer /* 1015b1897c19SJulian Elischer * Do soft update processing. 1016b1897c19SJulian Elischer */ 10173275cf73SPoul-Henning Kamp #ifdef SOFTUPDATES 1018a2e7a027SPoul-Henning Kamp softdep_process_worklist(NULL); 10193275cf73SPoul-Henning Kamp #endif 1020b1897c19SJulian Elischer 1021b1897c19SJulian Elischer /* 1022b1897c19SJulian Elischer * The variable rushjob allows the kernel to speed up the 1023b1897c19SJulian Elischer * processing of the filesystem syncer process. A rushjob 1024b1897c19SJulian Elischer * value of N tells the filesystem syncer to process the next 1025b1897c19SJulian Elischer * N seconds worth of work on its queue ASAP. Currently rushjob 1026b1897c19SJulian Elischer * is used by the soft update code to speed up the filesystem 1027b1897c19SJulian Elischer * syncer process when the incore state is getting so far 1028b1897c19SJulian Elischer * ahead of the disk that the kernel memory pool is being 1029b1897c19SJulian Elischer * threatened with exhaustion. 1030b1897c19SJulian Elischer */ 1031b1897c19SJulian Elischer if (rushjob > 0) { 1032b1897c19SJulian Elischer rushjob -= 1; 1033b1897c19SJulian Elischer continue; 1034b1897c19SJulian Elischer } 1035b1897c19SJulian Elischer /* 1036b1897c19SJulian Elischer * If it has taken us less than a second to process the 1037b1897c19SJulian Elischer * current work, then wait. Otherwise start right over 1038b1897c19SJulian Elischer * again. We can still lose time if any single round 1039b1897c19SJulian Elischer * takes more than two seconds, but it does not really 1040b1897c19SJulian Elischer * matter as we are just trying to generally pace the 1041b1897c19SJulian Elischer * filesystem activity. 1042b1897c19SJulian Elischer */ 1043227ee8a1SPoul-Henning Kamp if (time_second == starttime) 1044b1897c19SJulian Elischer tsleep(&lbolt, PPAUSE, "syncer", 0); 1045b1897c19SJulian Elischer } 1046b1897c19SJulian Elischer } 1047b1897c19SJulian Elischer 1048b1897c19SJulian Elischer /* 1049e4ab40bcSKirk McKusick * Request the syncer daemon to speed up its work. 1050e4ab40bcSKirk McKusick * We never push it to speed up more than half of its 1051e4ab40bcSKirk McKusick * normal turn time, otherwise it could take over the cpu. 1052e4ab40bcSKirk McKusick */ 1053e4ab40bcSKirk McKusick int 1054e4ab40bcSKirk McKusick speedup_syncer() 1055e4ab40bcSKirk McKusick { 1056e4ab40bcSKirk McKusick int s; 1057e4ab40bcSKirk McKusick 1058e4ab40bcSKirk McKusick s = splhigh(); 1059e4ab40bcSKirk McKusick if (updateproc->p_wchan == &lbolt) 1060e4ab40bcSKirk McKusick setrunnable(updateproc); 1061e4ab40bcSKirk McKusick splx(s); 1062e4ab40bcSKirk McKusick if (rushjob < syncdelay / 2) { 1063e4ab40bcSKirk McKusick rushjob += 1; 1064e4ab40bcSKirk McKusick stat_rush_requests += 1; 1065e4ab40bcSKirk McKusick return (1); 1066e4ab40bcSKirk McKusick } 1067e4ab40bcSKirk McKusick return(0); 1068e4ab40bcSKirk McKusick } 1069e4ab40bcSKirk McKusick 1070e4ab40bcSKirk McKusick /* 10710d94caffSDavid Greenman * Associate a p-buffer with a vnode. 10721c7c3c6aSMatthew Dillon * 10731c7c3c6aSMatthew Dillon * Also sets B_PAGING flag to indicate that vnode is not fully associated 10741c7c3c6aSMatthew Dillon * with the buffer. i.e. the bp has not been linked into the vnode or 10751c7c3c6aSMatthew Dillon * ref-counted. 10760d94caffSDavid Greenman */ 10770d94caffSDavid Greenman void 10780d94caffSDavid Greenman pbgetvp(vp, bp) 10790d94caffSDavid Greenman register struct vnode *vp; 10800d94caffSDavid Greenman register struct buf *bp; 10810d94caffSDavid Greenman { 1082219cbf59SEivind Eklund 10835526d2d9SEivind Eklund KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 10845526d2d9SEivind Eklund 10850d94caffSDavid Greenman bp->b_vp = vp; 10861c7c3c6aSMatthew Dillon bp->b_flags |= B_PAGING; 108741d2e3e0SPoul-Henning Kamp bp->b_dev = vn_todev(vp); 10880d94caffSDavid Greenman } 10890d94caffSDavid Greenman 10900d94caffSDavid Greenman /* 10910d94caffSDavid Greenman * Disassociate a p-buffer from a vnode. 10920d94caffSDavid Greenman */ 10930d94caffSDavid Greenman void 10940d94caffSDavid Greenman pbrelvp(bp) 10950d94caffSDavid Greenman register struct buf *bp; 10960d94caffSDavid Greenman { 10970d94caffSDavid Greenman 10985526d2d9SEivind Eklund KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 10990d94caffSDavid Greenman 11001c7c3c6aSMatthew Dillon /* XXX REMOVE ME */ 11011c7c3c6aSMatthew Dillon if (bp->b_vnbufs.tqe_next != NULL) { 11021c7c3c6aSMatthew Dillon panic( 11031c7c3c6aSMatthew Dillon "relpbuf(): b_vp was probably reassignbuf()d %p %x", 11041c7c3c6aSMatthew Dillon bp, 11051c7c3c6aSMatthew Dillon (int)bp->b_flags 11061c7c3c6aSMatthew Dillon ); 11071c7c3c6aSMatthew Dillon } 11080d94caffSDavid Greenman bp->b_vp = (struct vnode *) 0; 11091c7c3c6aSMatthew Dillon bp->b_flags &= ~B_PAGING; 11101c7c3c6aSMatthew Dillon } 11111c7c3c6aSMatthew Dillon 11121c7c3c6aSMatthew Dillon void 11131c7c3c6aSMatthew Dillon pbreassignbuf(bp, newvp) 11141c7c3c6aSMatthew Dillon struct buf *bp; 11151c7c3c6aSMatthew Dillon struct vnode *newvp; 11161c7c3c6aSMatthew Dillon { 11171c7c3c6aSMatthew Dillon if ((bp->b_flags & B_PAGING) == 0) { 11181c7c3c6aSMatthew Dillon panic( 11191c7c3c6aSMatthew Dillon "pbreassignbuf() on non phys bp %p", 11201c7c3c6aSMatthew Dillon bp 11211c7c3c6aSMatthew Dillon ); 11221c7c3c6aSMatthew Dillon } 11231c7c3c6aSMatthew Dillon bp->b_vp = newvp; 11240d94caffSDavid Greenman } 11250d94caffSDavid Greenman 11260d94caffSDavid Greenman /* 1127df8bae1dSRodney W. Grimes * Reassign a buffer from one vnode to another. 1128df8bae1dSRodney W. Grimes * Used to assign file specific control information 1129df8bae1dSRodney W. Grimes * (indirect blocks) to the vnode to which they belong. 1130df8bae1dSRodney W. Grimes */ 113126f9a767SRodney W. Grimes void 1132df8bae1dSRodney W. Grimes reassignbuf(bp, newvp) 1133df8bae1dSRodney W. Grimes register struct buf *bp; 1134df8bae1dSRodney W. Grimes register struct vnode *newvp; 1135df8bae1dSRodney W. Grimes { 1136b1897c19SJulian Elischer struct buflists *listheadp; 1137b1897c19SJulian Elischer int delay; 1138619594e8SJohn Dyson int s; 1139df8bae1dSRodney W. Grimes 1140df8bae1dSRodney W. Grimes if (newvp == NULL) { 1141df8bae1dSRodney W. Grimes printf("reassignbuf: NULL"); 1142df8bae1dSRodney W. Grimes return; 1143df8bae1dSRodney W. Grimes } 1144e929c00dSKirk McKusick ++reassignbufcalls; 1145619594e8SJohn Dyson 11461c7c3c6aSMatthew Dillon /* 11471c7c3c6aSMatthew Dillon * B_PAGING flagged buffers cannot be reassigned because their vp 11481c7c3c6aSMatthew Dillon * is not fully linked in. 11491c7c3c6aSMatthew Dillon */ 11501c7c3c6aSMatthew Dillon if (bp->b_flags & B_PAGING) 11511c7c3c6aSMatthew Dillon panic("cannot reassign paging buffer"); 11521c7c3c6aSMatthew Dillon 1153619594e8SJohn Dyson s = splbio(); 1154df8bae1dSRodney W. Grimes /* 1155df8bae1dSRodney W. Grimes * Delete from old vnode list, if on one. 1156df8bae1dSRodney W. Grimes */ 115702b00854SKirk McKusick if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 115802b00854SKirk McKusick if (bp->b_xflags & BX_VNDIRTY) 11594ef2094eSJulian Elischer listheadp = &bp->b_vp->v_dirtyblkhd; 116016e9e530SPeter Wemm else 11614ef2094eSJulian Elischer listheadp = &bp->b_vp->v_cleanblkhd; 116216e9e530SPeter Wemm TAILQ_REMOVE(listheadp, bp, b_vnbufs); 116302b00854SKirk McKusick bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 11644ef2094eSJulian Elischer if (bp->b_vp != newvp) { 11654ef2094eSJulian Elischer vdrop(bp->b_vp); 11664ef2094eSJulian Elischer bp->b_vp = NULL; /* for clarification */ 11674ef2094eSJulian Elischer } 1168a051452aSPoul-Henning Kamp } 1169df8bae1dSRodney W. Grimes /* 11700d94caffSDavid Greenman * If dirty, put on list of dirty buffers; otherwise insert onto list 11710d94caffSDavid Greenman * of clean buffers. 1172df8bae1dSRodney W. Grimes */ 11730d94caffSDavid Greenman if (bp->b_flags & B_DELWRI) { 11740d94caffSDavid Greenman struct buf *tbp; 11750d94caffSDavid Greenman 1176b1897c19SJulian Elischer listheadp = &newvp->v_dirtyblkhd; 1177b1897c19SJulian Elischer if ((newvp->v_flag & VONWORKLST) == 0) { 1178b1897c19SJulian Elischer switch (newvp->v_type) { 1179b1897c19SJulian Elischer case VDIR: 1180e4ab40bcSKirk McKusick delay = dirdelay; 1181b1897c19SJulian Elischer break; 118238224dcdSPoul-Henning Kamp case VCHR: 1183b1897c19SJulian Elischer case VBLK: 1184b1897c19SJulian Elischer if (newvp->v_specmountpoint != NULL) { 1185e4ab40bcSKirk McKusick delay = metadelay; 1186b1897c19SJulian Elischer break; 1187b1897c19SJulian Elischer } 1188b1897c19SJulian Elischer /* fall through */ 1189b1897c19SJulian Elischer default: 1190e4ab40bcSKirk McKusick delay = filedelay; 1191b1897c19SJulian Elischer } 1192b1897c19SJulian Elischer vn_syncer_add_to_worklist(newvp, delay); 1193b1897c19SJulian Elischer } 119402b00854SKirk McKusick bp->b_xflags |= BX_VNDIRTY; 119516e9e530SPeter Wemm tbp = TAILQ_FIRST(listheadp); 119616e9e530SPeter Wemm if (tbp == NULL || 1197e929c00dSKirk McKusick bp->b_lblkno == 0 || 1198c37c9620SMatthew Dillon (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || 1199e929c00dSKirk McKusick (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 120016e9e530SPeter Wemm TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1201e929c00dSKirk McKusick ++reassignbufsortgood; 1202e929c00dSKirk McKusick } else if (bp->b_lblkno < 0) { 1203e929c00dSKirk McKusick TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1204e929c00dSKirk McKusick ++reassignbufsortgood; 1205e929c00dSKirk McKusick } else if (reassignbufmethod == 1) { 1206e929c00dSKirk McKusick /* 1207e929c00dSKirk McKusick * New sorting algorithm, only handle sequential case, 1208c37c9620SMatthew Dillon * otherwise append to end (but before metadata) 1209e929c00dSKirk McKusick */ 1210e929c00dSKirk McKusick if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 121102b00854SKirk McKusick (tbp->b_xflags & BX_VNDIRTY)) { 1212c37c9620SMatthew Dillon /* 1213c37c9620SMatthew Dillon * Found the best place to insert the buffer 1214c37c9620SMatthew Dillon */ 1215e929c00dSKirk McKusick TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1216e929c00dSKirk McKusick ++reassignbufsortgood; 12170d94caffSDavid Greenman } else { 1218c37c9620SMatthew Dillon /* 1219c37c9620SMatthew Dillon * Missed, append to end, but before meta-data. 1220c37c9620SMatthew Dillon * We know that the head buffer in the list is 1221c37c9620SMatthew Dillon * not meta-data due to prior conditionals. 1222c37c9620SMatthew Dillon * 1223c37c9620SMatthew Dillon * Indirect effects: NFS second stage write 1224c37c9620SMatthew Dillon * tends to wind up here, giving maximum 1225c37c9620SMatthew Dillon * distance between the unstable write and the 1226c37c9620SMatthew Dillon * commit rpc. 1227c37c9620SMatthew Dillon */ 1228c37c9620SMatthew Dillon tbp = TAILQ_LAST(listheadp, buflists); 1229c37c9620SMatthew Dillon while (tbp && tbp->b_lblkno < 0) 1230c37c9620SMatthew Dillon tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); 1231c37c9620SMatthew Dillon TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1232e929c00dSKirk McKusick ++reassignbufsortbad; 1233e929c00dSKirk McKusick } 1234e929c00dSKirk McKusick } else { 1235e929c00dSKirk McKusick /* 1236e929c00dSKirk McKusick * Old sorting algorithm, scan queue and insert 1237e929c00dSKirk McKusick */ 123816e9e530SPeter Wemm struct buf *ttbp; 123916e9e530SPeter Wemm while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 124016e9e530SPeter Wemm (ttbp->b_lblkno < bp->b_lblkno)) { 1241e929c00dSKirk McKusick ++reassignbufloops; 124216e9e530SPeter Wemm tbp = ttbp; 12430d94caffSDavid Greenman } 124416e9e530SPeter Wemm TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 12450d94caffSDavid Greenman } 12460d94caffSDavid Greenman } else { 124702b00854SKirk McKusick bp->b_xflags |= BX_VNCLEAN; 124816e9e530SPeter Wemm TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1249b1897c19SJulian Elischer if ((newvp->v_flag & VONWORKLST) && 125016e9e530SPeter Wemm TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1251b1897c19SJulian Elischer newvp->v_flag &= ~VONWORKLST; 1252b1897c19SJulian Elischer LIST_REMOVE(newvp, v_synclist); 1253b1897c19SJulian Elischer } 1254df8bae1dSRodney W. Grimes } 12554ef2094eSJulian Elischer if (bp->b_vp != newvp) { 1256a051452aSPoul-Henning Kamp bp->b_vp = newvp; 1257a051452aSPoul-Henning Kamp vhold(bp->b_vp); 12584ef2094eSJulian Elischer } 1259619594e8SJohn Dyson splx(s); 12600d94caffSDavid Greenman } 1261df8bae1dSRodney W. Grimes 1262df8bae1dSRodney W. Grimes /* 1263df8bae1dSRodney W. Grimes * Create a vnode for a block device. 126441fadeebSBruce Evans * Used for mounting the root file system. 126501f76720SJeroen Ruigrok van der Werven * XXX: This now changed to a VCHR due to the block/char merging. 1266df8bae1dSRodney W. Grimes */ 126726f9a767SRodney W. Grimes int 1268df8bae1dSRodney W. Grimes bdevvp(dev, vpp) 1269df8bae1dSRodney W. Grimes dev_t dev; 1270df8bae1dSRodney W. Grimes struct vnode **vpp; 1271df8bae1dSRodney W. Grimes { 1272df8bae1dSRodney W. Grimes register struct vnode *vp; 1273df8bae1dSRodney W. Grimes struct vnode *nvp; 1274df8bae1dSRodney W. Grimes int error; 1275df8bae1dSRodney W. Grimes 12762447bec8SPoul-Henning Kamp if (dev == NODEV) { 127737906c68SBruce Evans *vpp = NULLVP; 127837906c68SBruce Evans return (ENXIO); 127937906c68SBruce Evans } 1280df8bae1dSRodney W. Grimes error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1281df8bae1dSRodney W. Grimes if (error) { 128237906c68SBruce Evans *vpp = NULLVP; 1283df8bae1dSRodney W. Grimes return (error); 1284df8bae1dSRodney W. Grimes } 1285df8bae1dSRodney W. Grimes vp = nvp; 128601f76720SJeroen Ruigrok van der Werven vp->v_type = VCHR; 1287dbafb366SPoul-Henning Kamp addalias(vp, dev); 1288df8bae1dSRodney W. Grimes *vpp = vp; 1289df8bae1dSRodney W. Grimes return (0); 1290df8bae1dSRodney W. Grimes } 1291df8bae1dSRodney W. Grimes 1292df8bae1dSRodney W. Grimes /* 1293dbafb366SPoul-Henning Kamp * Add vnode to the alias list hung off the dev_t. 1294dbafb366SPoul-Henning Kamp * 1295dbafb366SPoul-Henning Kamp * The reason for this gunk is that multiple vnodes can reference 1296dbafb366SPoul-Henning Kamp * the same physical device, so checking vp->v_usecount to see 1297dbafb366SPoul-Henning Kamp * how many users there are is inadequate; the v_usecount for 1298dbafb366SPoul-Henning Kamp * the vnodes need to be accumulated. vcount() does that. 1299df8bae1dSRodney W. Grimes */ 13009b971133SKirk McKusick struct vnode * 1301dbafb366SPoul-Henning Kamp addaliasu(nvp, nvp_rdev) 1302dbafb366SPoul-Henning Kamp struct vnode *nvp; 1303bfbb9ce6SPoul-Henning Kamp udev_t nvp_rdev; 1304df8bae1dSRodney W. Grimes { 13059b971133SKirk McKusick struct vnode *ovp; 13069b971133SKirk McKusick vop_t **ops; 13079b971133SKirk McKusick dev_t dev; 1308df8bae1dSRodney W. Grimes 1309df8bae1dSRodney W. Grimes if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1310dbafb366SPoul-Henning Kamp panic("addaliasu on non-special vnode"); 13119b971133SKirk McKusick dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); 13129b971133SKirk McKusick /* 13139b971133SKirk McKusick * Check to see if we have a bdevvp vnode with no associated 13149b971133SKirk McKusick * filesystem. If so, we want to associate the filesystem of 13159b971133SKirk McKusick * the new newly instigated vnode with the bdevvp vnode and 13169b971133SKirk McKusick * discard the newly created vnode rather than leaving the 13179b971133SKirk McKusick * bdevvp vnode lying around with no associated filesystem. 13189b971133SKirk McKusick */ 13199b971133SKirk McKusick if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) { 13209b971133SKirk McKusick addalias(nvp, dev); 13219b971133SKirk McKusick return (nvp); 13229b971133SKirk McKusick } 13239b971133SKirk McKusick /* 13249b971133SKirk McKusick * Discard unneeded vnode, but save its node specific data. 13259b971133SKirk McKusick * Note that if there is a lock, it is carried over in the 13269b971133SKirk McKusick * node specific data to the replacement vnode. 13279b971133SKirk McKusick */ 13289b971133SKirk McKusick vref(ovp); 13299b971133SKirk McKusick ovp->v_data = nvp->v_data; 13309b971133SKirk McKusick ovp->v_tag = nvp->v_tag; 13319b971133SKirk McKusick nvp->v_data = NULL; 13329b971133SKirk McKusick ops = nvp->v_op; 13339b971133SKirk McKusick nvp->v_op = ovp->v_op; 13349b971133SKirk McKusick ovp->v_op = ops; 13359b971133SKirk McKusick insmntque(ovp, nvp->v_mount); 13369b971133SKirk McKusick vrele(nvp); 13379b971133SKirk McKusick vgone(nvp); 13389b971133SKirk McKusick return (ovp); 1339df8bae1dSRodney W. Grimes } 1340155f87daSMatthew Dillon 1341dbafb366SPoul-Henning Kamp void 1342dbafb366SPoul-Henning Kamp addalias(nvp, dev) 1343dbafb366SPoul-Henning Kamp struct vnode *nvp; 1344dbafb366SPoul-Henning Kamp dev_t dev; 1345dbafb366SPoul-Henning Kamp { 1346155f87daSMatthew Dillon 1347dbafb366SPoul-Henning Kamp if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1348dbafb366SPoul-Henning Kamp panic("addalias on non-special vnode"); 1349dbafb366SPoul-Henning Kamp 1350dbafb366SPoul-Henning Kamp nvp->v_rdev = dev; 1351dbafb366SPoul-Henning Kamp simple_lock(&spechash_slock); 1352dbafb366SPoul-Henning Kamp SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1353996c772fSJohn Dyson simple_unlock(&spechash_slock); 1354df8bae1dSRodney W. Grimes } 1355df8bae1dSRodney W. Grimes 1356df8bae1dSRodney W. Grimes /* 1357df8bae1dSRodney W. Grimes * Grab a particular vnode from the free list, increment its 1358e7647e6cSKris Kennaway * reference count and lock it. The vnode lock bit is set if the 1359df8bae1dSRodney W. Grimes * vnode is being eliminated in vgone. The process is awakened 1360df8bae1dSRodney W. Grimes * when the transition is completed, and an error returned to 1361df8bae1dSRodney W. Grimes * indicate that the vnode is no longer usable (possibly having 1362df8bae1dSRodney W. Grimes * been changed to a new file system type). 1363df8bae1dSRodney W. Grimes */ 136426f9a767SRodney W. Grimes int 1365996c772fSJohn Dyson vget(vp, flags, p) 1366df8bae1dSRodney W. Grimes register struct vnode *vp; 1367996c772fSJohn Dyson int flags; 1368996c772fSJohn Dyson struct proc *p; 1369df8bae1dSRodney W. Grimes { 1370996c772fSJohn Dyson int error; 1371df8bae1dSRodney W. Grimes 1372df8bae1dSRodney W. Grimes /* 1373996c772fSJohn Dyson * If the vnode is in the process of being cleaned out for 1374996c772fSJohn Dyson * another use, we wait for the cleaning to finish and then 1375996c772fSJohn Dyson * return failure. Cleaning is determined by checking that 1376996c772fSJohn Dyson * the VXLOCK flag is set. 1377df8bae1dSRodney W. Grimes */ 1378996c772fSJohn Dyson if ((flags & LK_INTERLOCK) == 0) { 1379996c772fSJohn Dyson simple_lock(&vp->v_interlock); 1380996c772fSJohn Dyson } 1381996c772fSJohn Dyson if (vp->v_flag & VXLOCK) { 1382df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1383996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1384996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vget", 0); 1385996c772fSJohn Dyson return (ENOENT); 1386df8bae1dSRodney W. Grimes } 13872be70f79SJohn Dyson 1388df8bae1dSRodney W. Grimes vp->v_usecount++; 13892be70f79SJohn Dyson 1390a051452aSPoul-Henning Kamp if (VSHOULDBUSY(vp)) 1391a051452aSPoul-Henning Kamp vbusy(vp); 1392996c772fSJohn Dyson if (flags & LK_TYPE_MASK) { 139364d3c7e3SJohn Dyson if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 139464d3c7e3SJohn Dyson /* 139564d3c7e3SJohn Dyson * must expand vrele here because we do not want 139664d3c7e3SJohn Dyson * to call VOP_INACTIVE if the reference count 139764d3c7e3SJohn Dyson * drops back to zero since it was never really 139864d3c7e3SJohn Dyson * active. We must remove it from the free list 139964d3c7e3SJohn Dyson * before sleeping so that multiple processes do 140064d3c7e3SJohn Dyson * not try to recycle it. 140164d3c7e3SJohn Dyson */ 140264d3c7e3SJohn Dyson simple_lock(&vp->v_interlock); 140364d3c7e3SJohn Dyson vp->v_usecount--; 140464d3c7e3SJohn Dyson if (VSHOULDFREE(vp)) 140564d3c7e3SJohn Dyson vfree(vp); 140664d3c7e3SJohn Dyson simple_unlock(&vp->v_interlock); 140764d3c7e3SJohn Dyson } 1408996c772fSJohn Dyson return (error); 1409996c772fSJohn Dyson } 1410996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1411df8bae1dSRodney W. Grimes return (0); 1412df8bae1dSRodney W. Grimes } 1413df8bae1dSRodney W. Grimes 1414483140eaSJohn Dyson void 1415483140eaSJohn Dyson vref(struct vnode *vp) 1416483140eaSJohn Dyson { 1417483140eaSJohn Dyson simple_lock(&vp->v_interlock); 1418483140eaSJohn Dyson vp->v_usecount++; 1419483140eaSJohn Dyson simple_unlock(&vp->v_interlock); 1420483140eaSJohn Dyson } 1421483140eaSJohn Dyson 1422df8bae1dSRodney W. Grimes /* 14230d955f71SJohn Dyson * Vnode put/release. 1424df8bae1dSRodney W. Grimes * If count drops to zero, call inactive routine and return to freelist. 1425df8bae1dSRodney W. Grimes */ 14262be70f79SJohn Dyson void 14272be70f79SJohn Dyson vrele(vp) 1428996c772fSJohn Dyson struct vnode *vp; 1429df8bae1dSRodney W. Grimes { 1430996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1431df8bae1dSRodney W. Grimes 1432219cbf59SEivind Eklund KASSERT(vp != NULL, ("vrele: null vp")); 1433f2a2857bSKirk McKusick KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close")); 14345526d2d9SEivind Eklund 1435996c772fSJohn Dyson simple_lock(&vp->v_interlock); 14366476c0d2SJohn Dyson 1437a051452aSPoul-Henning Kamp if (vp->v_usecount > 1) { 14382be70f79SJohn Dyson 1439a051452aSPoul-Henning Kamp vp->v_usecount--; 1440fd7f690fSJohn Dyson simple_unlock(&vp->v_interlock); 14416476c0d2SJohn Dyson 144295e5e988SJohn Dyson return; 144395e5e988SJohn Dyson } 144495e5e988SJohn Dyson 144595e5e988SJohn Dyson if (vp->v_usecount == 1) { 1446a051452aSPoul-Henning Kamp 14477cb22688SPoul-Henning Kamp vp->v_usecount--; 1448fd9d9ff1SPoul-Henning Kamp if (VSHOULDFREE(vp)) 1449fd9d9ff1SPoul-Henning Kamp vfree(vp); 14500d955f71SJohn Dyson /* 14510d955f71SJohn Dyson * If we are doing a vput, the node is already locked, and we must 14520d955f71SJohn Dyson * call VOP_INACTIVE with the node locked. So, in the case of 14530d955f71SJohn Dyson * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 14540d955f71SJohn Dyson */ 14552be70f79SJohn Dyson if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1456996c772fSJohn Dyson VOP_INACTIVE(vp, p); 14572be70f79SJohn Dyson } 14582be70f79SJohn Dyson 14592be70f79SJohn Dyson } else { 14602be70f79SJohn Dyson #ifdef DIAGNOSTIC 14612be70f79SJohn Dyson vprint("vrele: negative ref count", vp); 146295e5e988SJohn Dyson simple_unlock(&vp->v_interlock); 14632be70f79SJohn Dyson #endif 14642be70f79SJohn Dyson panic("vrele: negative ref cnt"); 1465fd7f690fSJohn Dyson } 1466df8bae1dSRodney W. Grimes } 1467df8bae1dSRodney W. Grimes 14680d955f71SJohn Dyson void 14690d955f71SJohn Dyson vput(vp) 14700d955f71SJohn Dyson struct vnode *vp; 14710d955f71SJohn Dyson { 14722be70f79SJohn Dyson struct proc *p = curproc; /* XXX */ 14730d955f71SJohn Dyson 14745526d2d9SEivind Eklund KASSERT(vp != NULL, ("vput: null vp")); 1475f2a2857bSKirk McKusick KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close")); 147695e5e988SJohn Dyson 14772be70f79SJohn Dyson simple_lock(&vp->v_interlock); 14782be70f79SJohn Dyson 14792be70f79SJohn Dyson if (vp->v_usecount > 1) { 14802be70f79SJohn Dyson 14812be70f79SJohn Dyson vp->v_usecount--; 14822be70f79SJohn Dyson VOP_UNLOCK(vp, LK_INTERLOCK, p); 148395e5e988SJohn Dyson return; 14842be70f79SJohn Dyson 148595e5e988SJohn Dyson } 148695e5e988SJohn Dyson 148795e5e988SJohn Dyson if (vp->v_usecount == 1) { 14882be70f79SJohn Dyson 14892be70f79SJohn Dyson vp->v_usecount--; 14902be70f79SJohn Dyson if (VSHOULDFREE(vp)) 14912be70f79SJohn Dyson vfree(vp); 14922be70f79SJohn Dyson /* 14932be70f79SJohn Dyson * If we are doing a vput, the node is already locked, and we must 14942be70f79SJohn Dyson * call VOP_INACTIVE with the node locked. So, in the case of 14952be70f79SJohn Dyson * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 14962be70f79SJohn Dyson */ 14972be70f79SJohn Dyson simple_unlock(&vp->v_interlock); 14982be70f79SJohn Dyson VOP_INACTIVE(vp, p); 14992be70f79SJohn Dyson 15002be70f79SJohn Dyson } else { 15012be70f79SJohn Dyson #ifdef DIAGNOSTIC 15022be70f79SJohn Dyson vprint("vput: negative ref count", vp); 15032be70f79SJohn Dyson #endif 15042be70f79SJohn Dyson panic("vput: negative ref cnt"); 15052be70f79SJohn Dyson } 15060d955f71SJohn Dyson } 15070d955f71SJohn Dyson 1508df8bae1dSRodney W. Grimes /* 1509a051452aSPoul-Henning Kamp * Somebody doesn't want the vnode recycled. 1510df8bae1dSRodney W. Grimes */ 151126f9a767SRodney W. Grimes void 151226f9a767SRodney W. Grimes vhold(vp) 1513df8bae1dSRodney W. Grimes register struct vnode *vp; 1514df8bae1dSRodney W. Grimes { 15158293f20aSTor Egge int s; 1516df8bae1dSRodney W. Grimes 15178293f20aSTor Egge s = splbio(); 1518df8bae1dSRodney W. Grimes vp->v_holdcnt++; 1519a051452aSPoul-Henning Kamp if (VSHOULDBUSY(vp)) 1520a051452aSPoul-Henning Kamp vbusy(vp); 15218293f20aSTor Egge splx(s); 1522df8bae1dSRodney W. Grimes } 1523df8bae1dSRodney W. Grimes 1524df8bae1dSRodney W. Grimes /* 1525a051452aSPoul-Henning Kamp * One less who cares about this vnode. 1526df8bae1dSRodney W. Grimes */ 152726f9a767SRodney W. Grimes void 1528a051452aSPoul-Henning Kamp vdrop(vp) 1529df8bae1dSRodney W. Grimes register struct vnode *vp; 1530df8bae1dSRodney W. Grimes { 15318293f20aSTor Egge int s; 1532df8bae1dSRodney W. Grimes 15338293f20aSTor Egge s = splbio(); 1534df8bae1dSRodney W. Grimes if (vp->v_holdcnt <= 0) 1535b1897c19SJulian Elischer panic("vdrop: holdcnt"); 1536df8bae1dSRodney W. Grimes vp->v_holdcnt--; 1537a051452aSPoul-Henning Kamp if (VSHOULDFREE(vp)) 1538a051452aSPoul-Henning Kamp vfree(vp); 15398293f20aSTor Egge splx(s); 1540df8bae1dSRodney W. Grimes } 1541df8bae1dSRodney W. Grimes 1542df8bae1dSRodney W. Grimes /* 1543df8bae1dSRodney W. Grimes * Remove any vnodes in the vnode table belonging to mount point mp. 1544df8bae1dSRodney W. Grimes * 1545df8bae1dSRodney W. Grimes * If MNT_NOFORCE is specified, there should not be any active ones, 1546df8bae1dSRodney W. Grimes * return error if any are found (nb: this is a user error, not a 1547df8bae1dSRodney W. Grimes * system error). If MNT_FORCE is specified, detach any active vnodes 1548df8bae1dSRodney W. Grimes * that are found. 1549df8bae1dSRodney W. Grimes */ 1550df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC 155127a0b398SPoul-Henning Kamp static int busyprt = 0; /* print out busy vnodes */ 15520f1adf65SBruce Evans SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1553df8bae1dSRodney W. Grimes #endif 1554df8bae1dSRodney W. Grimes 155526f9a767SRodney W. Grimes int 1556df8bae1dSRodney W. Grimes vflush(mp, skipvp, flags) 1557df8bae1dSRodney W. Grimes struct mount *mp; 1558df8bae1dSRodney W. Grimes struct vnode *skipvp; 1559df8bae1dSRodney W. Grimes int flags; 1560df8bae1dSRodney W. Grimes { 1561996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1562996c772fSJohn Dyson struct vnode *vp, *nvp; 1563df8bae1dSRodney W. Grimes int busy = 0; 1564df8bae1dSRodney W. Grimes 1565996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1566df8bae1dSRodney W. Grimes loop: 15671b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 15683d2a8cf3SDavid Greenman /* 15693d2a8cf3SDavid Greenman * Make sure this vnode wasn't reclaimed in getnewvnode(). 15703d2a8cf3SDavid Greenman * Start over if it has (it won't be on the list anymore). 15713d2a8cf3SDavid Greenman */ 1572df8bae1dSRodney W. Grimes if (vp->v_mount != mp) 1573df8bae1dSRodney W. Grimes goto loop; 15741b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 1575df8bae1dSRodney W. Grimes /* 1576df8bae1dSRodney W. Grimes * Skip over a selected vnode. 1577df8bae1dSRodney W. Grimes */ 1578df8bae1dSRodney W. Grimes if (vp == skipvp) 1579df8bae1dSRodney W. Grimes continue; 1580996c772fSJohn Dyson 1581996c772fSJohn Dyson simple_lock(&vp->v_interlock); 1582df8bae1dSRodney W. Grimes /* 1583df8bae1dSRodney W. Grimes * Skip over a vnodes marked VSYSTEM. 1584df8bae1dSRodney W. Grimes */ 1585996c772fSJohn Dyson if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1586996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1587df8bae1dSRodney W. Grimes continue; 1588996c772fSJohn Dyson } 1589df8bae1dSRodney W. Grimes /* 15900d94caffSDavid Greenman * If WRITECLOSE is set, only flush out regular file vnodes 15910d94caffSDavid Greenman * open for writing. 1592df8bae1dSRodney W. Grimes */ 1593df8bae1dSRodney W. Grimes if ((flags & WRITECLOSE) && 1594996c772fSJohn Dyson (vp->v_writecount == 0 || vp->v_type != VREG)) { 1595996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1596df8bae1dSRodney W. Grimes continue; 1597996c772fSJohn Dyson } 15986476c0d2SJohn Dyson 1599df8bae1dSRodney W. Grimes /* 16000d94caffSDavid Greenman * With v_usecount == 0, all we need to do is clear out the 16010d94caffSDavid Greenman * vnode data structures and we are done. 1602df8bae1dSRodney W. Grimes */ 1603df8bae1dSRodney W. Grimes if (vp->v_usecount == 0) { 1604996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1605996c772fSJohn Dyson vgonel(vp, p); 1606996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1607df8bae1dSRodney W. Grimes continue; 1608df8bae1dSRodney W. Grimes } 1609ad980522SJohn Dyson 1610df8bae1dSRodney W. Grimes /* 16110d94caffSDavid Greenman * If FORCECLOSE is set, forcibly close the vnode. For block 16120d94caffSDavid Greenman * or character devices, revert to an anonymous device. For 16130d94caffSDavid Greenman * all other files, just kill them. 1614df8bae1dSRodney W. Grimes */ 1615df8bae1dSRodney W. Grimes if (flags & FORCECLOSE) { 1616996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1617df8bae1dSRodney W. Grimes if (vp->v_type != VBLK && vp->v_type != VCHR) { 1618996c772fSJohn Dyson vgonel(vp, p); 1619df8bae1dSRodney W. Grimes } else { 1620996c772fSJohn Dyson vclean(vp, 0, p); 1621df8bae1dSRodney W. Grimes vp->v_op = spec_vnodeop_p; 1622df8bae1dSRodney W. Grimes insmntque(vp, (struct mount *) 0); 1623df8bae1dSRodney W. Grimes } 1624996c772fSJohn Dyson simple_lock(&mntvnode_slock); 1625df8bae1dSRodney W. Grimes continue; 1626df8bae1dSRodney W. Grimes } 1627df8bae1dSRodney W. Grimes #ifdef DIAGNOSTIC 1628df8bae1dSRodney W. Grimes if (busyprt) 1629df8bae1dSRodney W. Grimes vprint("vflush: busy vnode", vp); 1630df8bae1dSRodney W. Grimes #endif 1631996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1632df8bae1dSRodney W. Grimes busy++; 1633df8bae1dSRodney W. Grimes } 1634996c772fSJohn Dyson simple_unlock(&mntvnode_slock); 1635df8bae1dSRodney W. Grimes if (busy) 1636df8bae1dSRodney W. Grimes return (EBUSY); 1637df8bae1dSRodney W. Grimes return (0); 1638df8bae1dSRodney W. Grimes } 1639df8bae1dSRodney W. Grimes 1640df8bae1dSRodney W. Grimes /* 1641df8bae1dSRodney W. Grimes * Disassociate the underlying file system from a vnode. 1642df8bae1dSRodney W. Grimes */ 1643996c772fSJohn Dyson static void 1644514ede09SBruce Evans vclean(vp, flags, p) 1645514ede09SBruce Evans struct vnode *vp; 1646514ede09SBruce Evans int flags; 1647514ede09SBruce Evans struct proc *p; 1648df8bae1dSRodney W. Grimes { 164995e5e988SJohn Dyson int active; 1650df8bae1dSRodney W. Grimes 1651df8bae1dSRodney W. Grimes /* 16520d94caffSDavid Greenman * Check to see if the vnode is in use. If so we have to reference it 16530d94caffSDavid Greenman * before we clean it out so that its count cannot fall to zero and 16540d94caffSDavid Greenman * generate a race against ourselves to recycle it. 1655df8bae1dSRodney W. Grimes */ 1656bb56ec4aSPoul-Henning Kamp if ((active = vp->v_usecount)) 1657996c772fSJohn Dyson vp->v_usecount++; 165895e5e988SJohn Dyson 1659df8bae1dSRodney W. Grimes /* 16600d94caffSDavid Greenman * Prevent the vnode from being recycled or brought into use while we 16610d94caffSDavid Greenman * clean it out. 1662df8bae1dSRodney W. Grimes */ 1663df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) 1664df8bae1dSRodney W. Grimes panic("vclean: deadlock"); 1665df8bae1dSRodney W. Grimes vp->v_flag |= VXLOCK; 1666df8bae1dSRodney W. Grimes /* 1667996c772fSJohn Dyson * Even if the count is zero, the VOP_INACTIVE routine may still 1668996c772fSJohn Dyson * have the object locked while it cleans it out. The VOP_LOCK 1669996c772fSJohn Dyson * ensures that the VOP_INACTIVE routine is done with its work. 1670996c772fSJohn Dyson * For active vnodes, it ensures that no other activity can 1671996c772fSJohn Dyson * occur while the underlying object is being cleaned out. 1672996c772fSJohn Dyson */ 1673996c772fSJohn Dyson VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 16743c631446SJohn Dyson 1675996c772fSJohn Dyson /* 1676df8bae1dSRodney W. Grimes * Clean out any buffers associated with the vnode. 167737642196SKirk McKusick * If the flush fails, just toss the buffers. 1678df8bae1dSRodney W. Grimes */ 167937642196SKirk McKusick if (flags & DOCLOSE) { 1680f2a2857bSKirk McKusick if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) 16819b971133SKirk McKusick (void) vn_write_suspend_wait(vp, NULL, V_WAIT); 168237642196SKirk McKusick if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) 168337642196SKirk McKusick vinvalbuf(vp, 0, NOCRED, p, 0, 0); 168437642196SKirk McKusick } 168537642196SKirk McKusick 16869ff5ce6bSBoris Popov VOP_DESTROYVOBJECT(vp); 16873c631446SJohn Dyson 1688df8bae1dSRodney W. Grimes /* 1689996c772fSJohn Dyson * If purging an active vnode, it must be closed and 1690996c772fSJohn Dyson * deactivated before being reclaimed. Note that the 1691996c772fSJohn Dyson * VOP_INACTIVE will unlock the vnode. 1692df8bae1dSRodney W. Grimes */ 1693df8bae1dSRodney W. Grimes if (active) { 1694df8bae1dSRodney W. Grimes if (flags & DOCLOSE) 16954d948813SBruce Evans VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1696996c772fSJohn Dyson VOP_INACTIVE(vp, p); 1697996c772fSJohn Dyson } else { 1698996c772fSJohn Dyson /* 1699996c772fSJohn Dyson * Any other processes trying to obtain this lock must first 1700996c772fSJohn Dyson * wait for VXLOCK to clear, then call the new lock operation. 1701996c772fSJohn Dyson */ 1702996c772fSJohn Dyson VOP_UNLOCK(vp, 0, p); 1703df8bae1dSRodney W. Grimes } 1704df8bae1dSRodney W. Grimes /* 1705df8bae1dSRodney W. Grimes * Reclaim the vnode. 1706df8bae1dSRodney W. Grimes */ 1707996c772fSJohn Dyson if (VOP_RECLAIM(vp, p)) 1708df8bae1dSRodney W. Grimes panic("vclean: cannot reclaim"); 170964d3c7e3SJohn Dyson 17109a2b8fcaSRobert Watson if (active) { 17119a2b8fcaSRobert Watson /* 17129a2b8fcaSRobert Watson * Inline copy of vrele() since VOP_INACTIVE 17139a2b8fcaSRobert Watson * has already been called. 17149a2b8fcaSRobert Watson */ 17159a2b8fcaSRobert Watson simple_lock(&vp->v_interlock); 17169a2b8fcaSRobert Watson if (--vp->v_usecount <= 0) { 17179a2b8fcaSRobert Watson #ifdef DIAGNOSTIC 17189a2b8fcaSRobert Watson if (vp->v_usecount < 0 || vp->v_writecount != 0) { 17199a2b8fcaSRobert Watson vprint("vclean: bad ref count", vp); 17209a2b8fcaSRobert Watson panic("vclean: ref cnt"); 17219a2b8fcaSRobert Watson } 17229a2b8fcaSRobert Watson #endif 17239a2b8fcaSRobert Watson vfree(vp); 17249a2b8fcaSRobert Watson } 17259a2b8fcaSRobert Watson simple_unlock(&vp->v_interlock); 17269a2b8fcaSRobert Watson } 172764d3c7e3SJohn Dyson 1728996c772fSJohn Dyson cache_purge(vp); 1729996c772fSJohn Dyson if (vp->v_vnlock) { 1730996c772fSJohn Dyson FREE(vp->v_vnlock, M_VNODE); 1731996c772fSJohn Dyson vp->v_vnlock = NULL; 1732996c772fSJohn Dyson } 1733df8bae1dSRodney W. Grimes 173464d3c7e3SJohn Dyson if (VSHOULDFREE(vp)) 173564d3c7e3SJohn Dyson vfree(vp); 173664d3c7e3SJohn Dyson 1737df8bae1dSRodney W. Grimes /* 1738df8bae1dSRodney W. Grimes * Done with purge, notify sleepers of the grim news. 1739df8bae1dSRodney W. Grimes */ 1740df8bae1dSRodney W. Grimes vp->v_op = dead_vnodeop_p; 17411cbbd625SGarrett Wollman vn_pollgone(vp); 1742df8bae1dSRodney W. Grimes vp->v_tag = VT_NON; 1743df8bae1dSRodney W. Grimes vp->v_flag &= ~VXLOCK; 1744df8bae1dSRodney W. Grimes if (vp->v_flag & VXWANT) { 1745df8bae1dSRodney W. Grimes vp->v_flag &= ~VXWANT; 1746df8bae1dSRodney W. Grimes wakeup((caddr_t) vp); 1747df8bae1dSRodney W. Grimes } 1748df8bae1dSRodney W. Grimes } 1749df8bae1dSRodney W. Grimes 1750df8bae1dSRodney W. Grimes /* 1751df8bae1dSRodney W. Grimes * Eliminate all activity associated with the requested vnode 1752df8bae1dSRodney W. Grimes * and with all vnodes aliased to the requested vnode. 1753df8bae1dSRodney W. Grimes */ 1754996c772fSJohn Dyson int 1755996c772fSJohn Dyson vop_revoke(ap) 1756996c772fSJohn Dyson struct vop_revoke_args /* { 1757996c772fSJohn Dyson struct vnode *a_vp; 1758996c772fSJohn Dyson int a_flags; 1759996c772fSJohn Dyson } */ *ap; 1760df8bae1dSRodney W. Grimes { 1761996c772fSJohn Dyson struct vnode *vp, *vq; 1762dbafb366SPoul-Henning Kamp dev_t dev; 1763996c772fSJohn Dyson 17645526d2d9SEivind Eklund KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1765996c772fSJohn Dyson 1766996c772fSJohn Dyson vp = ap->a_vp; 1767df8bae1dSRodney W. Grimes /* 1768996c772fSJohn Dyson * If a vgone (or vclean) is already in progress, 1769996c772fSJohn Dyson * wait until it is done and return. 1770df8bae1dSRodney W. Grimes */ 1771df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) { 1772df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1773996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1774996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1775996c772fSJohn Dyson return (0); 1776df8bae1dSRodney W. Grimes } 1777dbafb366SPoul-Henning Kamp dev = vp->v_rdev; 1778dbafb366SPoul-Henning Kamp for (;;) { 1779996c772fSJohn Dyson simple_lock(&spechash_slock); 1780dbafb366SPoul-Henning Kamp vq = SLIST_FIRST(&dev->si_hlist); 1781996c772fSJohn Dyson simple_unlock(&spechash_slock); 1782dbafb366SPoul-Henning Kamp if (!vq) 1783df8bae1dSRodney W. Grimes break; 1784dbafb366SPoul-Henning Kamp vgone(vq); 1785df8bae1dSRodney W. Grimes } 1786996c772fSJohn Dyson return (0); 1787996c772fSJohn Dyson } 1788996c772fSJohn Dyson 1789996c772fSJohn Dyson /* 1790996c772fSJohn Dyson * Recycle an unused vnode to the front of the free list. 1791996c772fSJohn Dyson * Release the passed interlock if the vnode will be recycled. 1792996c772fSJohn Dyson */ 1793996c772fSJohn Dyson int 1794996c772fSJohn Dyson vrecycle(vp, inter_lkp, p) 1795996c772fSJohn Dyson struct vnode *vp; 1796996c772fSJohn Dyson struct simplelock *inter_lkp; 1797996c772fSJohn Dyson struct proc *p; 1798996c772fSJohn Dyson { 1799996c772fSJohn Dyson 1800996c772fSJohn Dyson simple_lock(&vp->v_interlock); 1801996c772fSJohn Dyson if (vp->v_usecount == 0) { 1802996c772fSJohn Dyson if (inter_lkp) { 1803996c772fSJohn Dyson simple_unlock(inter_lkp); 1804996c772fSJohn Dyson } 1805996c772fSJohn Dyson vgonel(vp, p); 1806996c772fSJohn Dyson return (1); 1807996c772fSJohn Dyson } 1808996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1809996c772fSJohn Dyson return (0); 1810df8bae1dSRodney W. Grimes } 1811df8bae1dSRodney W. Grimes 1812df8bae1dSRodney W. Grimes /* 1813df8bae1dSRodney W. Grimes * Eliminate all activity associated with a vnode 1814df8bae1dSRodney W. Grimes * in preparation for reuse. 1815df8bae1dSRodney W. Grimes */ 181626f9a767SRodney W. Grimes void 181726f9a767SRodney W. Grimes vgone(vp) 1818df8bae1dSRodney W. Grimes register struct vnode *vp; 1819df8bae1dSRodney W. Grimes { 1820996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 1821996c772fSJohn Dyson 1822996c772fSJohn Dyson simple_lock(&vp->v_interlock); 1823996c772fSJohn Dyson vgonel(vp, p); 1824996c772fSJohn Dyson } 1825996c772fSJohn Dyson 1826996c772fSJohn Dyson /* 1827996c772fSJohn Dyson * vgone, with the vp interlock held. 1828996c772fSJohn Dyson */ 1829b7a5f3caSRobert Watson void 1830996c772fSJohn Dyson vgonel(vp, p) 1831996c772fSJohn Dyson struct vnode *vp; 1832996c772fSJohn Dyson struct proc *p; 1833996c772fSJohn Dyson { 1834925a3a41SJohn Dyson int s; 1835df8bae1dSRodney W. Grimes 1836df8bae1dSRodney W. Grimes /* 1837996c772fSJohn Dyson * If a vgone (or vclean) is already in progress, 1838996c772fSJohn Dyson * wait until it is done and return. 1839df8bae1dSRodney W. Grimes */ 1840df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) { 1841df8bae1dSRodney W. Grimes vp->v_flag |= VXWANT; 1842996c772fSJohn Dyson simple_unlock(&vp->v_interlock); 1843996c772fSJohn Dyson tsleep((caddr_t)vp, PINOD, "vgone", 0); 1844df8bae1dSRodney W. Grimes return; 1845df8bae1dSRodney W. Grimes } 1846ad980522SJohn Dyson 1847df8bae1dSRodney W. Grimes /* 1848df8bae1dSRodney W. Grimes * Clean out the filesystem specific data. 1849df8bae1dSRodney W. Grimes */ 1850996c772fSJohn Dyson vclean(vp, DOCLOSE, p); 18512d8acc0fSJohn Dyson simple_lock(&vp->v_interlock); 18522be70f79SJohn Dyson 1853df8bae1dSRodney W. Grimes /* 1854df8bae1dSRodney W. Grimes * Delete from old mount point vnode list, if on one. 1855df8bae1dSRodney W. Grimes */ 1856996c772fSJohn Dyson if (vp->v_mount != NULL) 1857996c772fSJohn Dyson insmntque(vp, (struct mount *)0); 1858df8bae1dSRodney W. Grimes /* 1859996c772fSJohn Dyson * If special device, remove it from special device alias list 1860996c772fSJohn Dyson * if it is on one. 1861df8bae1dSRodney W. Grimes */ 1862dbafb366SPoul-Henning Kamp if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1863996c772fSJohn Dyson simple_lock(&spechash_slock); 1864b0d17ba6SPoul-Henning Kamp SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext); 1865d137acccSPoul-Henning Kamp freedev(vp->v_rdev); 1866996c772fSJohn Dyson simple_unlock(&spechash_slock); 18674d4f9323SPoul-Henning Kamp vp->v_rdev = NULL; 1868df8bae1dSRodney W. Grimes } 1869996c772fSJohn Dyson 1870df8bae1dSRodney W. Grimes /* 1871996c772fSJohn Dyson * If it is on the freelist and not already at the head, 1872c904bbbdSKirk McKusick * move it to the head of the list. The test of the 1873c904bbbdSKirk McKusick * VDOOMED flag and the reference count of zero is because 1874996c772fSJohn Dyson * it will be removed from the free list by getnewvnode, 1875996c772fSJohn Dyson * but will not have its reference count incremented until 1876996c772fSJohn Dyson * after calling vgone. If the reference count were 1877996c772fSJohn Dyson * incremented first, vgone would (incorrectly) try to 1878996c772fSJohn Dyson * close the previous instance of the underlying object. 1879df8bae1dSRodney W. Grimes */ 1880a051452aSPoul-Henning Kamp if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1881925a3a41SJohn Dyson s = splbio(); 1882996c772fSJohn Dyson simple_lock(&vnode_free_list_slock); 1883c904bbbdSKirk McKusick if (vp->v_flag & VFREE) 1884df8bae1dSRodney W. Grimes TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1885c904bbbdSKirk McKusick else 1886d09a16d8STor Egge freevnodes++; 1887925a3a41SJohn Dyson vp->v_flag |= VFREE; 1888df8bae1dSRodney W. Grimes TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1889996c772fSJohn Dyson simple_unlock(&vnode_free_list_slock); 1890925a3a41SJohn Dyson splx(s); 18910082fb46SJordan K. Hubbard } 1892996c772fSJohn Dyson 1893df8bae1dSRodney W. Grimes vp->v_type = VBAD; 189495e5e988SJohn Dyson simple_unlock(&vp->v_interlock); 1895df8bae1dSRodney W. Grimes } 1896df8bae1dSRodney W. Grimes 1897df8bae1dSRodney W. Grimes /* 1898df8bae1dSRodney W. Grimes * Lookup a vnode by device number. 1899df8bae1dSRodney W. Grimes */ 190026f9a767SRodney W. Grimes int 1901df8bae1dSRodney W. Grimes vfinddev(dev, type, vpp) 1902df8bae1dSRodney W. Grimes dev_t dev; 1903df8bae1dSRodney W. Grimes enum vtype type; 1904df8bae1dSRodney W. Grimes struct vnode **vpp; 1905df8bae1dSRodney W. Grimes { 1906dbafb366SPoul-Henning Kamp struct vnode *vp; 1907df8bae1dSRodney W. Grimes 1908b98afd0dSBruce Evans simple_lock(&spechash_slock); 1909dbafb366SPoul-Henning Kamp SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1910dbafb366SPoul-Henning Kamp if (type == vp->v_type) { 1911df8bae1dSRodney W. Grimes *vpp = vp; 1912dbafb366SPoul-Henning Kamp simple_unlock(&spechash_slock); 1913dbafb366SPoul-Henning Kamp return (1); 1914dbafb366SPoul-Henning Kamp } 1915df8bae1dSRodney W. Grimes } 1916b98afd0dSBruce Evans simple_unlock(&spechash_slock); 1917dbafb366SPoul-Henning Kamp return (0); 1918df8bae1dSRodney W. Grimes } 1919df8bae1dSRodney W. Grimes 1920df8bae1dSRodney W. Grimes /* 1921df8bae1dSRodney W. Grimes * Calculate the total number of references to a special device. 1922df8bae1dSRodney W. Grimes */ 192326f9a767SRodney W. Grimes int 1924df8bae1dSRodney W. Grimes vcount(vp) 1925dbafb366SPoul-Henning Kamp struct vnode *vp; 1926df8bae1dSRodney W. Grimes { 192796267288SPoul-Henning Kamp struct vnode *vq; 1928df8bae1dSRodney W. Grimes int count; 1929df8bae1dSRodney W. Grimes 1930dbafb366SPoul-Henning Kamp count = 0; 1931b98afd0dSBruce Evans simple_lock(&spechash_slock); 1932b0d17ba6SPoul-Henning Kamp SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext) 1933df8bae1dSRodney W. Grimes count += vq->v_usecount; 1934b98afd0dSBruce Evans simple_unlock(&spechash_slock); 1935df8bae1dSRodney W. Grimes return (count); 1936df8bae1dSRodney W. Grimes } 1937dbafb366SPoul-Henning Kamp 19387fab7799SPeter Wemm /* 1939e8359a57SSøren Schmidt * Same as above, but using the dev_t as argument 1940e8359a57SSøren Schmidt */ 1941e8359a57SSøren Schmidt 1942e8359a57SSøren Schmidt int 1943e8359a57SSøren Schmidt count_dev(dev) 1944e8359a57SSøren Schmidt dev_t dev; 1945e8359a57SSøren Schmidt { 1946e8359a57SSøren Schmidt struct vnode *vp; 1947e8359a57SSøren Schmidt 1948e8359a57SSøren Schmidt vp = SLIST_FIRST(&dev->si_hlist); 1949e8359a57SSøren Schmidt if (vp == NULL) 1950e8359a57SSøren Schmidt return (0); 1951e8359a57SSøren Schmidt return(vcount(vp)); 1952e8359a57SSøren Schmidt } 1953e8359a57SSøren Schmidt 1954e8359a57SSøren Schmidt /* 1955df8bae1dSRodney W. Grimes * Print out a description of a vnode. 1956df8bae1dSRodney W. Grimes */ 1957df8bae1dSRodney W. Grimes static char *typename[] = 1958df8bae1dSRodney W. Grimes {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1959df8bae1dSRodney W. Grimes 196026f9a767SRodney W. Grimes void 1961df8bae1dSRodney W. Grimes vprint(label, vp) 1962df8bae1dSRodney W. Grimes char *label; 1963dbafb366SPoul-Henning Kamp struct vnode *vp; 1964df8bae1dSRodney W. Grimes { 19652127f260SArchie Cobbs char buf[96]; 1966df8bae1dSRodney W. Grimes 1967df8bae1dSRodney W. Grimes if (label != NULL) 1968ac1e407bSBruce Evans printf("%s: %p: ", label, (void *)vp); 1969de15ef6aSDoug Rabson else 1970ac1e407bSBruce Evans printf("%p: ", (void *)vp); 1971ac1e407bSBruce Evans printf("type %s, usecount %d, writecount %d, refcount %d,", 1972df8bae1dSRodney W. Grimes typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1973df8bae1dSRodney W. Grimes vp->v_holdcnt); 1974df8bae1dSRodney W. Grimes buf[0] = '\0'; 1975df8bae1dSRodney W. Grimes if (vp->v_flag & VROOT) 1976df8bae1dSRodney W. Grimes strcat(buf, "|VROOT"); 1977df8bae1dSRodney W. Grimes if (vp->v_flag & VTEXT) 1978df8bae1dSRodney W. Grimes strcat(buf, "|VTEXT"); 1979df8bae1dSRodney W. Grimes if (vp->v_flag & VSYSTEM) 1980df8bae1dSRodney W. Grimes strcat(buf, "|VSYSTEM"); 1981df8bae1dSRodney W. Grimes if (vp->v_flag & VXLOCK) 1982df8bae1dSRodney W. Grimes strcat(buf, "|VXLOCK"); 1983df8bae1dSRodney W. Grimes if (vp->v_flag & VXWANT) 1984df8bae1dSRodney W. Grimes strcat(buf, "|VXWANT"); 1985df8bae1dSRodney W. Grimes if (vp->v_flag & VBWAIT) 1986df8bae1dSRodney W. Grimes strcat(buf, "|VBWAIT"); 1987a051452aSPoul-Henning Kamp if (vp->v_flag & VDOOMED) 1988a051452aSPoul-Henning Kamp strcat(buf, "|VDOOMED"); 1989a051452aSPoul-Henning Kamp if (vp->v_flag & VFREE) 1990a051452aSPoul-Henning Kamp strcat(buf, "|VFREE"); 199195e5e988SJohn Dyson if (vp->v_flag & VOBJBUF) 199295e5e988SJohn Dyson strcat(buf, "|VOBJBUF"); 1993df8bae1dSRodney W. Grimes if (buf[0] != '\0') 1994df8bae1dSRodney W. Grimes printf(" flags (%s)", &buf[1]); 1995df8bae1dSRodney W. Grimes if (vp->v_data == NULL) { 1996df8bae1dSRodney W. Grimes printf("\n"); 1997df8bae1dSRodney W. Grimes } else { 1998df8bae1dSRodney W. Grimes printf("\n\t"); 1999df8bae1dSRodney W. Grimes VOP_PRINT(vp); 2000df8bae1dSRodney W. Grimes } 2001df8bae1dSRodney W. Grimes } 2002df8bae1dSRodney W. Grimes 20031a477b0cSDavid Greenman #ifdef DDB 2004f5ef029eSPoul-Henning Kamp #include <ddb/ddb.h> 2005df8bae1dSRodney W. Grimes /* 2006df8bae1dSRodney W. Grimes * List all of the locked vnodes in the system. 2007df8bae1dSRodney W. Grimes * Called when debugging the kernel. 2008df8bae1dSRodney W. Grimes */ 2009f5ef029eSPoul-Henning Kamp DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2010df8bae1dSRodney W. Grimes { 2011c35e283aSBruce Evans struct proc *p = curproc; /* XXX */ 2012c35e283aSBruce Evans struct mount *mp, *nmp; 2013c35e283aSBruce Evans struct vnode *vp; 2014df8bae1dSRodney W. Grimes 2015df8bae1dSRodney W. Grimes printf("Locked vnodes\n"); 2016c35e283aSBruce Evans simple_lock(&mountlist_slock); 20170429e37aSPoul-Henning Kamp for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2018c35e283aSBruce Evans if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 20190429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2020c35e283aSBruce Evans continue; 2021c35e283aSBruce Evans } 20221b727751SPoul-Henning Kamp LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 20236bdfe06aSEivind Eklund if (VOP_ISLOCKED(vp, NULL)) 2024df8bae1dSRodney W. Grimes vprint((char *)0, vp); 2025df8bae1dSRodney W. Grimes } 2026c35e283aSBruce Evans simple_lock(&mountlist_slock); 20270429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2028c35e283aSBruce Evans vfs_unbusy(mp, p); 2029c35e283aSBruce Evans } 2030c35e283aSBruce Evans simple_unlock(&mountlist_slock); 2031df8bae1dSRodney W. Grimes } 2032df8bae1dSRodney W. Grimes #endif 2033df8bae1dSRodney W. Grimes 20343a76a594SBruce Evans /* 20353a76a594SBruce Evans * Top level filesystem related information gathering. 20363a76a594SBruce Evans */ 203782d9ae4eSPoul-Henning Kamp static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS)); 20383a76a594SBruce Evans 20394a8b9660SBruce Evans static int 204082d9ae4eSPoul-Henning Kamp vfs_sysctl(SYSCTL_HANDLER_ARGS) 2041a896f025SBruce Evans { 20424a8b9660SBruce Evans int *name = (int *)arg1 - 1; /* XXX */ 20434a8b9660SBruce Evans u_int namelen = arg2 + 1; /* XXX */ 2044a896f025SBruce Evans struct vfsconf *vfsp; 2045a896f025SBruce Evans 2046f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2) 20473a76a594SBruce Evans /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 20484a8b9660SBruce Evans if (namelen == 1) 20493a76a594SBruce Evans return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2050dc91a89eSBruce Evans #endif 2051a896f025SBruce Evans 20524a8b9660SBruce Evans #ifdef notyet 20533a76a594SBruce Evans /* all sysctl names at this level are at least name and field */ 20543a76a594SBruce Evans if (namelen < 2) 20553a76a594SBruce Evans return (ENOTDIR); /* overloaded */ 20563a76a594SBruce Evans if (name[0] != VFS_GENERIC) { 20573a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 20583a76a594SBruce Evans if (vfsp->vfc_typenum == name[0]) 20593a76a594SBruce Evans break; 20603a76a594SBruce Evans if (vfsp == NULL) 20613a76a594SBruce Evans return (EOPNOTSUPP); 20623a76a594SBruce Evans return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 20633a76a594SBruce Evans oldp, oldlenp, newp, newlen, p)); 20643a76a594SBruce Evans } 20654a8b9660SBruce Evans #endif 20663a76a594SBruce Evans switch (name[1]) { 20673a76a594SBruce Evans case VFS_MAXTYPENUM: 20683a76a594SBruce Evans if (namelen != 2) 20693a76a594SBruce Evans return (ENOTDIR); 20703a76a594SBruce Evans return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 20713a76a594SBruce Evans case VFS_CONF: 20723a76a594SBruce Evans if (namelen != 3) 20733a76a594SBruce Evans return (ENOTDIR); /* overloaded */ 20743a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 20753a76a594SBruce Evans if (vfsp->vfc_typenum == name[2]) 20763a76a594SBruce Evans break; 20773a76a594SBruce Evans if (vfsp == NULL) 20783a76a594SBruce Evans return (EOPNOTSUPP); 20793a76a594SBruce Evans return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 20803a76a594SBruce Evans } 20813a76a594SBruce Evans return (EOPNOTSUPP); 20823a76a594SBruce Evans } 20833a76a594SBruce Evans 20844a8b9660SBruce Evans SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 20854a8b9660SBruce Evans "Generic filesystem"); 20864a8b9660SBruce Evans 2087f5ce6752SBruce Evans #if 1 || defined(COMPAT_PRELITE2) 2088a896f025SBruce Evans 2089a896f025SBruce Evans static int 209082d9ae4eSPoul-Henning Kamp sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) 2091a896f025SBruce Evans { 2092a896f025SBruce Evans int error; 2093a896f025SBruce Evans struct vfsconf *vfsp; 2094a896f025SBruce Evans struct ovfsconf ovfs; 20953a76a594SBruce Evans 20963a76a594SBruce Evans for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2097a896f025SBruce Evans ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2098a896f025SBruce Evans strcpy(ovfs.vfc_name, vfsp->vfc_name); 2099a896f025SBruce Evans ovfs.vfc_index = vfsp->vfc_typenum; 2100a896f025SBruce Evans ovfs.vfc_refcount = vfsp->vfc_refcount; 2101a896f025SBruce Evans ovfs.vfc_flags = vfsp->vfc_flags; 2102a896f025SBruce Evans error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2103a896f025SBruce Evans if (error) 2104a896f025SBruce Evans return error; 2105a896f025SBruce Evans } 2106a896f025SBruce Evans return 0; 2107a896f025SBruce Evans } 2108a896f025SBruce Evans 2109f5ce6752SBruce Evans #endif /* 1 || COMPAT_PRELITE2 */ 2110a896f025SBruce Evans 21114a11ca4eSPoul-Henning Kamp #if 0 2112df8bae1dSRodney W. Grimes #define KINFO_VNODESLOP 10 2113df8bae1dSRodney W. Grimes /* 2114df8bae1dSRodney W. Grimes * Dump vnode list (via sysctl). 2115df8bae1dSRodney W. Grimes * Copyout address of vnode followed by vnode. 2116df8bae1dSRodney W. Grimes */ 2117df8bae1dSRodney W. Grimes /* ARGSUSED */ 21184b2af45fSPoul-Henning Kamp static int 211982d9ae4eSPoul-Henning Kamp sysctl_vnode(SYSCTL_HANDLER_ARGS) 2120df8bae1dSRodney W. Grimes { 2121996c772fSJohn Dyson struct proc *p = curproc; /* XXX */ 2122c35e283aSBruce Evans struct mount *mp, *nmp; 2123c35e283aSBruce Evans struct vnode *nvp, *vp; 2124df8bae1dSRodney W. Grimes int error; 2125df8bae1dSRodney W. Grimes 2126df8bae1dSRodney W. Grimes #define VPTRSZ sizeof (struct vnode *) 2127df8bae1dSRodney W. Grimes #define VNODESZ sizeof (struct vnode) 21284b2af45fSPoul-Henning Kamp 21294b2af45fSPoul-Henning Kamp req->lock = 0; 21302d0b1d70SPoul-Henning Kamp if (!req->oldptr) /* Make an estimate */ 21314b2af45fSPoul-Henning Kamp return (SYSCTL_OUT(req, 0, 21324b2af45fSPoul-Henning Kamp (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2133df8bae1dSRodney W. Grimes 2134c35e283aSBruce Evans simple_lock(&mountlist_slock); 21350429e37aSPoul-Henning Kamp for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2136c35e283aSBruce Evans if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 21370429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2138df8bae1dSRodney W. Grimes continue; 2139c35e283aSBruce Evans } 2140df8bae1dSRodney W. Grimes again: 2141c35e283aSBruce Evans simple_lock(&mntvnode_slock); 21421b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2143df8bae1dSRodney W. Grimes vp != NULL; 2144c35e283aSBruce Evans vp = nvp) { 2145df8bae1dSRodney W. Grimes /* 2146c35e283aSBruce Evans * Check that the vp is still associated with 2147c35e283aSBruce Evans * this filesystem. RACE: could have been 2148c35e283aSBruce Evans * recycled onto the same filesystem. 2149df8bae1dSRodney W. Grimes */ 2150df8bae1dSRodney W. Grimes if (vp->v_mount != mp) { 2151c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 2152df8bae1dSRodney W. Grimes goto again; 2153df8bae1dSRodney W. Grimes } 21541b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 2155c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 21564b2af45fSPoul-Henning Kamp if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2157c35e283aSBruce Evans (error = SYSCTL_OUT(req, vp, VNODESZ))) 2158df8bae1dSRodney W. Grimes return (error); 2159c35e283aSBruce Evans simple_lock(&mntvnode_slock); 2160e887950aSBruce Evans } 2161c35e283aSBruce Evans simple_unlock(&mntvnode_slock); 2162c35e283aSBruce Evans simple_lock(&mountlist_slock); 21630429e37aSPoul-Henning Kamp nmp = TAILQ_NEXT(mp, mnt_list); 2164996c772fSJohn Dyson vfs_unbusy(mp, p); 2165df8bae1dSRodney W. Grimes } 2166c35e283aSBruce Evans simple_unlock(&mountlist_slock); 2167df8bae1dSRodney W. Grimes 2168df8bae1dSRodney W. Grimes return (0); 2169df8bae1dSRodney W. Grimes } 21704a11ca4eSPoul-Henning Kamp #endif 2171df8bae1dSRodney W. Grimes 21722e58c0f8SDavid Greenman /* 21732e58c0f8SDavid Greenman * XXX 21742e58c0f8SDavid Greenman * Exporting the vnode list on large systems causes them to crash. 21752e58c0f8SDavid Greenman * Exporting the vnode list on medium systems causes sysctl to coredump. 21762e58c0f8SDavid Greenman */ 21772e58c0f8SDavid Greenman #if 0 217865d0bc13SPoul-Henning Kamp SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 217965d0bc13SPoul-Henning Kamp 0, 0, sysctl_vnode, "S,vnode", ""); 21802e58c0f8SDavid Greenman #endif 21814b2af45fSPoul-Henning Kamp 2182df8bae1dSRodney W. Grimes /* 2183df8bae1dSRodney W. Grimes * Check to see if a filesystem is mounted on a block device. 2184df8bae1dSRodney W. Grimes */ 2185df8bae1dSRodney W. Grimes int 2186df8bae1dSRodney W. Grimes vfs_mountedon(vp) 2187996c772fSJohn Dyson struct vnode *vp; 2188df8bae1dSRodney W. Grimes { 2189df8bae1dSRodney W. Grimes 2190b1897c19SJulian Elischer if (vp->v_specmountpoint != NULL) 2191df8bae1dSRodney W. Grimes return (EBUSY); 2192dbafb366SPoul-Henning Kamp return (0); 2193996c772fSJohn Dyson } 2194996c772fSJohn Dyson 2195996c772fSJohn Dyson /* 2196996c772fSJohn Dyson * Unmount all filesystems. The list is traversed in reverse order 21977c1557c4SBruce Evans * of mounting to avoid dependencies. 2198996c772fSJohn Dyson */ 2199996c772fSJohn Dyson void 2200996c772fSJohn Dyson vfs_unmountall() 2201996c772fSJohn Dyson { 22020429e37aSPoul-Henning Kamp struct mount *mp; 2203cb87a87cSTor Egge struct proc *p; 2204996c772fSJohn Dyson int error; 2205996c772fSJohn Dyson 2206cb87a87cSTor Egge if (curproc != NULL) 2207cb87a87cSTor Egge p = curproc; 2208cb87a87cSTor Egge else 2209cb87a87cSTor Egge p = initproc; /* XXX XXX should this be proc0? */ 22107c1557c4SBruce Evans /* 22117c1557c4SBruce Evans * Since this only runs when rebooting, it is not interlocked. 22127c1557c4SBruce Evans */ 22130429e37aSPoul-Henning Kamp while(!TAILQ_EMPTY(&mountlist)) { 22140429e37aSPoul-Henning Kamp mp = TAILQ_LAST(&mountlist, mntlist); 22157c1557c4SBruce Evans error = dounmount(mp, MNT_FORCE, p); 2216996c772fSJohn Dyson if (error) { 22170429e37aSPoul-Henning Kamp TAILQ_REMOVE(&mountlist, mp, mnt_list); 22187c1557c4SBruce Evans printf("unmount of %s failed (", 22197c1557c4SBruce Evans mp->mnt_stat.f_mntonname); 2220996c772fSJohn Dyson if (error == EBUSY) 2221996c772fSJohn Dyson printf("BUSY)\n"); 2222996c772fSJohn Dyson else 2223996c772fSJohn Dyson printf("%d)\n", error); 22240429e37aSPoul-Henning Kamp } else { 22250429e37aSPoul-Henning Kamp /* The unmount has removed mp from the mountlist */ 2226996c772fSJohn Dyson } 2227996c772fSJohn Dyson } 2228df8bae1dSRodney W. Grimes } 2229df8bae1dSRodney W. Grimes 2230df8bae1dSRodney W. Grimes /* 2231df8bae1dSRodney W. Grimes * Build hash lists of net addresses and hang them off the mount point. 2232df8bae1dSRodney W. Grimes * Called by ufs_mount() to set up the lists of export addresses. 2233df8bae1dSRodney W. Grimes */ 2234df8bae1dSRodney W. Grimes static int 2235514ede09SBruce Evans vfs_hang_addrlist(mp, nep, argp) 2236514ede09SBruce Evans struct mount *mp; 2237514ede09SBruce Evans struct netexport *nep; 2238514ede09SBruce Evans struct export_args *argp; 2239df8bae1dSRodney W. Grimes { 2240df8bae1dSRodney W. Grimes register struct netcred *np; 2241df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2242df8bae1dSRodney W. Grimes register int i; 2243df8bae1dSRodney W. Grimes struct radix_node *rn; 2244df8bae1dSRodney W. Grimes struct sockaddr *saddr, *smask = 0; 2245df8bae1dSRodney W. Grimes struct domain *dom; 2246df8bae1dSRodney W. Grimes int error; 2247df8bae1dSRodney W. Grimes 2248df8bae1dSRodney W. Grimes if (argp->ex_addrlen == 0) { 2249df8bae1dSRodney W. Grimes if (mp->mnt_flag & MNT_DEFEXPORTED) 2250df8bae1dSRodney W. Grimes return (EPERM); 2251df8bae1dSRodney W. Grimes np = &nep->ne_defexported; 2252df8bae1dSRodney W. Grimes np->netc_exflags = argp->ex_flags; 2253df8bae1dSRodney W. Grimes np->netc_anon = argp->ex_anon; 2254df8bae1dSRodney W. Grimes np->netc_anon.cr_ref = 1; 2255df8bae1dSRodney W. Grimes mp->mnt_flag |= MNT_DEFEXPORTED; 2256df8bae1dSRodney W. Grimes return (0); 2257df8bae1dSRodney W. Grimes } 2258df8bae1dSRodney W. Grimes i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2259df8bae1dSRodney W. Grimes np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2260df8bae1dSRodney W. Grimes bzero((caddr_t) np, i); 2261df8bae1dSRodney W. Grimes saddr = (struct sockaddr *) (np + 1); 2262bb56ec4aSPoul-Henning Kamp if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2263df8bae1dSRodney W. Grimes goto out; 2264df8bae1dSRodney W. Grimes if (saddr->sa_len > argp->ex_addrlen) 2265df8bae1dSRodney W. Grimes saddr->sa_len = argp->ex_addrlen; 2266df8bae1dSRodney W. Grimes if (argp->ex_masklen) { 2267df8bae1dSRodney W. Grimes smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 22685f61c81dSPeter Wemm error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2269df8bae1dSRodney W. Grimes if (error) 2270df8bae1dSRodney W. Grimes goto out; 2271df8bae1dSRodney W. Grimes if (smask->sa_len > argp->ex_masklen) 2272df8bae1dSRodney W. Grimes smask->sa_len = argp->ex_masklen; 2273df8bae1dSRodney W. Grimes } 2274df8bae1dSRodney W. Grimes i = saddr->sa_family; 2275df8bae1dSRodney W. Grimes if ((rnh = nep->ne_rtable[i]) == 0) { 2276df8bae1dSRodney W. Grimes /* 22770d94caffSDavid Greenman * Seems silly to initialize every AF when most are not used, 22780d94caffSDavid Greenman * do so on demand here 2279df8bae1dSRodney W. Grimes */ 2280df8bae1dSRodney W. Grimes for (dom = domains; dom; dom = dom->dom_next) 2281df8bae1dSRodney W. Grimes if (dom->dom_family == i && dom->dom_rtattach) { 2282df8bae1dSRodney W. Grimes dom->dom_rtattach((void **) &nep->ne_rtable[i], 2283df8bae1dSRodney W. Grimes dom->dom_rtoffset); 2284df8bae1dSRodney W. Grimes break; 2285df8bae1dSRodney W. Grimes } 2286df8bae1dSRodney W. Grimes if ((rnh = nep->ne_rtable[i]) == 0) { 2287df8bae1dSRodney W. Grimes error = ENOBUFS; 2288df8bae1dSRodney W. Grimes goto out; 2289df8bae1dSRodney W. Grimes } 2290df8bae1dSRodney W. Grimes } 2291df8bae1dSRodney W. Grimes rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2292df8bae1dSRodney W. Grimes np->netc_rnodes); 2293df8bae1dSRodney W. Grimes if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2294df8bae1dSRodney W. Grimes error = EPERM; 2295df8bae1dSRodney W. Grimes goto out; 2296df8bae1dSRodney W. Grimes } 2297df8bae1dSRodney W. Grimes np->netc_exflags = argp->ex_flags; 2298df8bae1dSRodney W. Grimes np->netc_anon = argp->ex_anon; 2299df8bae1dSRodney W. Grimes np->netc_anon.cr_ref = 1; 2300df8bae1dSRodney W. Grimes return (0); 2301df8bae1dSRodney W. Grimes out: 2302df8bae1dSRodney W. Grimes free(np, M_NETADDR); 2303df8bae1dSRodney W. Grimes return (error); 2304df8bae1dSRodney W. Grimes } 2305df8bae1dSRodney W. Grimes 2306df8bae1dSRodney W. Grimes /* ARGSUSED */ 2307df8bae1dSRodney W. Grimes static int 2308514ede09SBruce Evans vfs_free_netcred(rn, w) 2309514ede09SBruce Evans struct radix_node *rn; 2310514ede09SBruce Evans void *w; 2311df8bae1dSRodney W. Grimes { 2312df8bae1dSRodney W. Grimes register struct radix_node_head *rnh = (struct radix_node_head *) w; 2313df8bae1dSRodney W. Grimes 2314df8bae1dSRodney W. Grimes (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2315df8bae1dSRodney W. Grimes free((caddr_t) rn, M_NETADDR); 2316df8bae1dSRodney W. Grimes return (0); 2317df8bae1dSRodney W. Grimes } 2318df8bae1dSRodney W. Grimes 2319df8bae1dSRodney W. Grimes /* 2320df8bae1dSRodney W. Grimes * Free the net address hash lists that are hanging off the mount points. 2321df8bae1dSRodney W. Grimes */ 2322df8bae1dSRodney W. Grimes static void 2323514ede09SBruce Evans vfs_free_addrlist(nep) 2324514ede09SBruce Evans struct netexport *nep; 2325df8bae1dSRodney W. Grimes { 2326df8bae1dSRodney W. Grimes register int i; 2327df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2328df8bae1dSRodney W. Grimes 2329df8bae1dSRodney W. Grimes for (i = 0; i <= AF_MAX; i++) 2330bb56ec4aSPoul-Henning Kamp if ((rnh = nep->ne_rtable[i])) { 2331df8bae1dSRodney W. Grimes (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2332df8bae1dSRodney W. Grimes (caddr_t) rnh); 2333df8bae1dSRodney W. Grimes free((caddr_t) rnh, M_RTABLE); 2334df8bae1dSRodney W. Grimes nep->ne_rtable[i] = 0; 2335df8bae1dSRodney W. Grimes } 2336df8bae1dSRodney W. Grimes } 2337df8bae1dSRodney W. Grimes 233821a90397SAlfred Perlstein /* 233921a90397SAlfred Perlstein * High level function to manipulate export options on a mount point 234021a90397SAlfred Perlstein * and the passed in netexport. 234121a90397SAlfred Perlstein * Struct export_args *argp is the variable used to twiddle options, 234221a90397SAlfred Perlstein * the structure is described in sys/mount.h 234321a90397SAlfred Perlstein */ 2344df8bae1dSRodney W. Grimes int 2345df8bae1dSRodney W. Grimes vfs_export(mp, nep, argp) 2346df8bae1dSRodney W. Grimes struct mount *mp; 2347df8bae1dSRodney W. Grimes struct netexport *nep; 2348df8bae1dSRodney W. Grimes struct export_args *argp; 2349df8bae1dSRodney W. Grimes { 2350df8bae1dSRodney W. Grimes int error; 2351df8bae1dSRodney W. Grimes 2352df8bae1dSRodney W. Grimes if (argp->ex_flags & MNT_DELEXPORT) { 2353f6b4c285SDoug Rabson if (mp->mnt_flag & MNT_EXPUBLIC) { 2354f6b4c285SDoug Rabson vfs_setpublicfs(NULL, NULL, NULL); 2355f6b4c285SDoug Rabson mp->mnt_flag &= ~MNT_EXPUBLIC; 2356f6b4c285SDoug Rabson } 2357df8bae1dSRodney W. Grimes vfs_free_addrlist(nep); 2358df8bae1dSRodney W. Grimes mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2359df8bae1dSRodney W. Grimes } 2360df8bae1dSRodney W. Grimes if (argp->ex_flags & MNT_EXPORTED) { 2361f6b4c285SDoug Rabson if (argp->ex_flags & MNT_EXPUBLIC) { 2362f6b4c285SDoug Rabson if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2363f6b4c285SDoug Rabson return (error); 2364f6b4c285SDoug Rabson mp->mnt_flag |= MNT_EXPUBLIC; 2365f6b4c285SDoug Rabson } 2366bb56ec4aSPoul-Henning Kamp if ((error = vfs_hang_addrlist(mp, nep, argp))) 2367df8bae1dSRodney W. Grimes return (error); 2368df8bae1dSRodney W. Grimes mp->mnt_flag |= MNT_EXPORTED; 2369df8bae1dSRodney W. Grimes } 2370df8bae1dSRodney W. Grimes return (0); 2371df8bae1dSRodney W. Grimes } 2372df8bae1dSRodney W. Grimes 2373f6b4c285SDoug Rabson /* 2374f6b4c285SDoug Rabson * Set the publicly exported filesystem (WebNFS). Currently, only 2375f6b4c285SDoug Rabson * one public filesystem is possible in the spec (RFC 2054 and 2055) 2376f6b4c285SDoug Rabson */ 2377f6b4c285SDoug Rabson int 2378f6b4c285SDoug Rabson vfs_setpublicfs(mp, nep, argp) 2379f6b4c285SDoug Rabson struct mount *mp; 2380f6b4c285SDoug Rabson struct netexport *nep; 2381f6b4c285SDoug Rabson struct export_args *argp; 2382f6b4c285SDoug Rabson { 2383f6b4c285SDoug Rabson int error; 2384f6b4c285SDoug Rabson struct vnode *rvp; 2385f6b4c285SDoug Rabson char *cp; 2386f6b4c285SDoug Rabson 2387f6b4c285SDoug Rabson /* 2388f6b4c285SDoug Rabson * mp == NULL -> invalidate the current info, the FS is 2389f6b4c285SDoug Rabson * no longer exported. May be called from either vfs_export 2390f6b4c285SDoug Rabson * or unmount, so check if it hasn't already been done. 2391f6b4c285SDoug Rabson */ 2392f6b4c285SDoug Rabson if (mp == NULL) { 2393f6b4c285SDoug Rabson if (nfs_pub.np_valid) { 2394f6b4c285SDoug Rabson nfs_pub.np_valid = 0; 2395f6b4c285SDoug Rabson if (nfs_pub.np_index != NULL) { 2396f6b4c285SDoug Rabson FREE(nfs_pub.np_index, M_TEMP); 2397f6b4c285SDoug Rabson nfs_pub.np_index = NULL; 2398f6b4c285SDoug Rabson } 2399f6b4c285SDoug Rabson } 2400f6b4c285SDoug Rabson return (0); 2401f6b4c285SDoug Rabson } 2402f6b4c285SDoug Rabson 2403f6b4c285SDoug Rabson /* 2404f6b4c285SDoug Rabson * Only one allowed at a time. 2405f6b4c285SDoug Rabson */ 2406f6b4c285SDoug Rabson if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2407f6b4c285SDoug Rabson return (EBUSY); 2408f6b4c285SDoug Rabson 2409f6b4c285SDoug Rabson /* 2410f6b4c285SDoug Rabson * Get real filehandle for root of exported FS. 2411f6b4c285SDoug Rabson */ 2412f6b4c285SDoug Rabson bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2413f6b4c285SDoug Rabson nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2414f6b4c285SDoug Rabson 2415f6b4c285SDoug Rabson if ((error = VFS_ROOT(mp, &rvp))) 2416f6b4c285SDoug Rabson return (error); 2417f6b4c285SDoug Rabson 2418f6b4c285SDoug Rabson if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2419f6b4c285SDoug Rabson return (error); 2420f6b4c285SDoug Rabson 2421f6b4c285SDoug Rabson vput(rvp); 2422f6b4c285SDoug Rabson 2423f6b4c285SDoug Rabson /* 2424f6b4c285SDoug Rabson * If an indexfile was specified, pull it in. 2425f6b4c285SDoug Rabson */ 2426f6b4c285SDoug Rabson if (argp->ex_indexfile != NULL) { 2427f6b4c285SDoug Rabson MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2428f6b4c285SDoug Rabson M_WAITOK); 2429f6b4c285SDoug Rabson error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2430f6b4c285SDoug Rabson MAXNAMLEN, (size_t *)0); 2431f6b4c285SDoug Rabson if (!error) { 2432f6b4c285SDoug Rabson /* 2433f6b4c285SDoug Rabson * Check for illegal filenames. 2434f6b4c285SDoug Rabson */ 2435f6b4c285SDoug Rabson for (cp = nfs_pub.np_index; *cp; cp++) { 2436f6b4c285SDoug Rabson if (*cp == '/') { 2437f6b4c285SDoug Rabson error = EINVAL; 2438f6b4c285SDoug Rabson break; 2439f6b4c285SDoug Rabson } 2440f6b4c285SDoug Rabson } 2441f6b4c285SDoug Rabson } 2442f6b4c285SDoug Rabson if (error) { 2443f6b4c285SDoug Rabson FREE(nfs_pub.np_index, M_TEMP); 2444f6b4c285SDoug Rabson return (error); 2445f6b4c285SDoug Rabson } 2446f6b4c285SDoug Rabson } 2447f6b4c285SDoug Rabson 2448f6b4c285SDoug Rabson nfs_pub.np_mount = mp; 2449f6b4c285SDoug Rabson nfs_pub.np_valid = 1; 2450f6b4c285SDoug Rabson return (0); 2451f6b4c285SDoug Rabson } 2452f6b4c285SDoug Rabson 245321a90397SAlfred Perlstein /* 245421a90397SAlfred Perlstein * Used by the filesystems to determine if a given network address 245521a90397SAlfred Perlstein * (passed in 'nam') is present in thier exports list, returns a pointer 245621a90397SAlfred Perlstein * to struct netcred so that the filesystem can examine it for 245721a90397SAlfred Perlstein * access rights (read/write/etc). 245821a90397SAlfred Perlstein */ 2459df8bae1dSRodney W. Grimes struct netcred * 2460df8bae1dSRodney W. Grimes vfs_export_lookup(mp, nep, nam) 2461df8bae1dSRodney W. Grimes register struct mount *mp; 2462df8bae1dSRodney W. Grimes struct netexport *nep; 246357bf258eSGarrett Wollman struct sockaddr *nam; 2464df8bae1dSRodney W. Grimes { 2465df8bae1dSRodney W. Grimes register struct netcred *np; 2466df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 2467df8bae1dSRodney W. Grimes struct sockaddr *saddr; 2468df8bae1dSRodney W. Grimes 2469df8bae1dSRodney W. Grimes np = NULL; 2470df8bae1dSRodney W. Grimes if (mp->mnt_flag & MNT_EXPORTED) { 2471df8bae1dSRodney W. Grimes /* 2472df8bae1dSRodney W. Grimes * Lookup in the export list first. 2473df8bae1dSRodney W. Grimes */ 2474df8bae1dSRodney W. Grimes if (nam != NULL) { 247557bf258eSGarrett Wollman saddr = nam; 2476df8bae1dSRodney W. Grimes rnh = nep->ne_rtable[saddr->sa_family]; 2477df8bae1dSRodney W. Grimes if (rnh != NULL) { 2478df8bae1dSRodney W. Grimes np = (struct netcred *) 2479df8bae1dSRodney W. Grimes (*rnh->rnh_matchaddr)((caddr_t)saddr, 2480df8bae1dSRodney W. Grimes rnh); 2481df8bae1dSRodney W. Grimes if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2482df8bae1dSRodney W. Grimes np = NULL; 2483df8bae1dSRodney W. Grimes } 2484df8bae1dSRodney W. Grimes } 2485df8bae1dSRodney W. Grimes /* 2486df8bae1dSRodney W. Grimes * If no address match, use the default if it exists. 2487df8bae1dSRodney W. Grimes */ 2488df8bae1dSRodney W. Grimes if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2489df8bae1dSRodney W. Grimes np = &nep->ne_defexported; 2490df8bae1dSRodney W. Grimes } 2491df8bae1dSRodney W. Grimes return (np); 2492df8bae1dSRodney W. Grimes } 249361f5d510SDavid Greenman 249461f5d510SDavid Greenman /* 249561f5d510SDavid Greenman * perform msync on all vnodes under a mount point 249661f5d510SDavid Greenman * the mount point must be locked. 249761f5d510SDavid Greenman */ 249861f5d510SDavid Greenman void 249961f5d510SDavid Greenman vfs_msync(struct mount *mp, int flags) { 2500a316d390SJohn Dyson struct vnode *vp, *nvp; 250137b8ccd3SPeter Wemm struct vm_object *obj; 250295e5e988SJohn Dyson int anyio, tries; 250395e5e988SJohn Dyson 250495e5e988SJohn Dyson tries = 5; 250561f5d510SDavid Greenman loop: 250695e5e988SJohn Dyson anyio = 0; 25071b727751SPoul-Henning Kamp for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { 250861f5d510SDavid Greenman 25091b727751SPoul-Henning Kamp nvp = LIST_NEXT(vp, v_mntvnodes); 251095e5e988SJohn Dyson 251195e5e988SJohn Dyson if (vp->v_mount != mp) { 251295e5e988SJohn Dyson goto loop; 251395e5e988SJohn Dyson } 251495e5e988SJohn Dyson 251537b8ccd3SPeter Wemm if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 251637b8ccd3SPeter Wemm continue; 251737b8ccd3SPeter Wemm 251837b8ccd3SPeter Wemm if (flags != MNT_WAIT) { 25199ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) != 0 || 25209ff5ce6bSBoris Popov (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 252137b8ccd3SPeter Wemm continue; 25226bdfe06aSEivind Eklund if (VOP_ISLOCKED(vp, NULL)) 252361f5d510SDavid Greenman continue; 252495e5e988SJohn Dyson } 252595e5e988SJohn Dyson 252695e5e988SJohn Dyson simple_lock(&vp->v_interlock); 25279ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) == 0 && 25289ff5ce6bSBoris Popov (obj->flags & OBJ_MIGHTBEDIRTY)) { 252995e5e988SJohn Dyson if (!vget(vp, 253095e5e988SJohn Dyson LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 25319ff5ce6bSBoris Popov if (VOP_GETVOBJECT(vp, &obj) == 0) { 25329ff5ce6bSBoris Popov vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 253395e5e988SJohn Dyson anyio = 1; 253495e5e988SJohn Dyson } 253595e5e988SJohn Dyson vput(vp); 253695e5e988SJohn Dyson } 253795e5e988SJohn Dyson } else { 253895e5e988SJohn Dyson simple_unlock(&vp->v_interlock); 253961f5d510SDavid Greenman } 254061f5d510SDavid Greenman } 254195e5e988SJohn Dyson if (anyio && (--tries > 0)) 254295e5e988SJohn Dyson goto loop; 254361f5d510SDavid Greenman } 25446476c0d2SJohn Dyson 25456476c0d2SJohn Dyson /* 25466476c0d2SJohn Dyson * Create the VM object needed for VMIO and mmap support. This 25476476c0d2SJohn Dyson * is done for all VREG files in the system. Some filesystems might 25486476c0d2SJohn Dyson * afford the additional metadata buffering capability of the 25496476c0d2SJohn Dyson * VMIO code by making the device node be VMIO mode also. 255095e5e988SJohn Dyson * 2551fb116777SEivind Eklund * vp must be locked when vfs_object_create is called. 25526476c0d2SJohn Dyson */ 25536476c0d2SJohn Dyson int 2554fb116777SEivind Eklund vfs_object_create(vp, p, cred) 25556476c0d2SJohn Dyson struct vnode *vp; 25566476c0d2SJohn Dyson struct proc *p; 25576476c0d2SJohn Dyson struct ucred *cred; 25586476c0d2SJohn Dyson { 25599ff5ce6bSBoris Popov return (VOP_CREATEVOBJECT(vp, cred, p)); 25606476c0d2SJohn Dyson } 2561b15a966eSPoul-Henning Kamp 2562c904bbbdSKirk McKusick void 2563a051452aSPoul-Henning Kamp vfree(vp) 2564b15a966eSPoul-Henning Kamp struct vnode *vp; 2565b15a966eSPoul-Henning Kamp { 2566925a3a41SJohn Dyson int s; 2567925a3a41SJohn Dyson 2568925a3a41SJohn Dyson s = splbio(); 2569a051452aSPoul-Henning Kamp simple_lock(&vnode_free_list_slock); 2570c904bbbdSKirk McKusick KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); 2571a051452aSPoul-Henning Kamp if (vp->v_flag & VAGE) { 2572a051452aSPoul-Henning Kamp TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2573a051452aSPoul-Henning Kamp } else { 2574b15a966eSPoul-Henning Kamp TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 25758670684aSPoul-Henning Kamp } 2576a051452aSPoul-Henning Kamp freevnodes++; 2577b15a966eSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 2578a051452aSPoul-Henning Kamp vp->v_flag &= ~VAGE; 2579a051452aSPoul-Henning Kamp vp->v_flag |= VFREE; 2580925a3a41SJohn Dyson splx(s); 2581b15a966eSPoul-Henning Kamp } 2582a051452aSPoul-Henning Kamp 258347221757SJohn Dyson void 2584a051452aSPoul-Henning Kamp vbusy(vp) 2585a051452aSPoul-Henning Kamp struct vnode *vp; 2586a051452aSPoul-Henning Kamp { 2587925a3a41SJohn Dyson int s; 2588925a3a41SJohn Dyson 2589925a3a41SJohn Dyson s = splbio(); 2590a051452aSPoul-Henning Kamp simple_lock(&vnode_free_list_slock); 2591c904bbbdSKirk McKusick KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); 2592a051452aSPoul-Henning Kamp TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2593a051452aSPoul-Henning Kamp freevnodes--; 2594a051452aSPoul-Henning Kamp simple_unlock(&vnode_free_list_slock); 259564d3c7e3SJohn Dyson vp->v_flag &= ~(VFREE|VAGE); 2596925a3a41SJohn Dyson splx(s); 2597b15a966eSPoul-Henning Kamp } 25981cbbd625SGarrett Wollman 25991cbbd625SGarrett Wollman /* 26001cbbd625SGarrett Wollman * Record a process's interest in events which might happen to 26011cbbd625SGarrett Wollman * a vnode. Because poll uses the historic select-style interface 26021cbbd625SGarrett Wollman * internally, this routine serves as both the ``check for any 26031cbbd625SGarrett Wollman * pending events'' and the ``record my interest in future events'' 26041cbbd625SGarrett Wollman * functions. (These are done together, while the lock is held, 26051cbbd625SGarrett Wollman * to avoid race conditions.) 26061cbbd625SGarrett Wollman */ 26071cbbd625SGarrett Wollman int 26081cbbd625SGarrett Wollman vn_pollrecord(vp, p, events) 26091cbbd625SGarrett Wollman struct vnode *vp; 26101cbbd625SGarrett Wollman struct proc *p; 26111cbbd625SGarrett Wollman short events; 26121cbbd625SGarrett Wollman { 26131cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 26141cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_revents & events) { 26151cbbd625SGarrett Wollman /* 26161cbbd625SGarrett Wollman * This leaves events we are not interested 26171cbbd625SGarrett Wollman * in available for the other process which 26181cbbd625SGarrett Wollman * which presumably had requested them 26191cbbd625SGarrett Wollman * (otherwise they would never have been 26201cbbd625SGarrett Wollman * recorded). 26211cbbd625SGarrett Wollman */ 26221cbbd625SGarrett Wollman events &= vp->v_pollinfo.vpi_revents; 26231cbbd625SGarrett Wollman vp->v_pollinfo.vpi_revents &= ~events; 26241cbbd625SGarrett Wollman 26251cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 26261cbbd625SGarrett Wollman return events; 26271cbbd625SGarrett Wollman } 26281cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events |= events; 26291cbbd625SGarrett Wollman selrecord(p, &vp->v_pollinfo.vpi_selinfo); 26301cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 26311cbbd625SGarrett Wollman return 0; 26321cbbd625SGarrett Wollman } 26331cbbd625SGarrett Wollman 26341cbbd625SGarrett Wollman /* 26351cbbd625SGarrett Wollman * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 26361cbbd625SGarrett Wollman * it is possible for us to miss an event due to race conditions, but 26371cbbd625SGarrett Wollman * that condition is expected to be rare, so for the moment it is the 26381cbbd625SGarrett Wollman * preferred interface. 26391cbbd625SGarrett Wollman */ 26401cbbd625SGarrett Wollman void 26411cbbd625SGarrett Wollman vn_pollevent(vp, events) 26421cbbd625SGarrett Wollman struct vnode *vp; 26431cbbd625SGarrett Wollman short events; 26441cbbd625SGarrett Wollman { 26451cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 26461cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_events & events) { 26471cbbd625SGarrett Wollman /* 26481cbbd625SGarrett Wollman * We clear vpi_events so that we don't 26491cbbd625SGarrett Wollman * call selwakeup() twice if two events are 26501cbbd625SGarrett Wollman * posted before the polling process(es) is 26511cbbd625SGarrett Wollman * awakened. This also ensures that we take at 26521cbbd625SGarrett Wollman * most one selwakeup() if the polling process 26531cbbd625SGarrett Wollman * is no longer interested. However, it does 26541cbbd625SGarrett Wollman * mean that only one event can be noticed at 26551cbbd625SGarrett Wollman * a time. (Perhaps we should only clear those 26561cbbd625SGarrett Wollman * event bits which we note?) XXX 26571cbbd625SGarrett Wollman */ 26581cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 26591cbbd625SGarrett Wollman vp->v_pollinfo.vpi_revents |= events; 26601cbbd625SGarrett Wollman selwakeup(&vp->v_pollinfo.vpi_selinfo); 26611cbbd625SGarrett Wollman } 26621cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 26631cbbd625SGarrett Wollman } 26641cbbd625SGarrett Wollman 26651cbbd625SGarrett Wollman /* 26661cbbd625SGarrett Wollman * Wake up anyone polling on vp because it is being revoked. 26671cbbd625SGarrett Wollman * This depends on dead_poll() returning POLLHUP for correct 26681cbbd625SGarrett Wollman * behavior. 26691cbbd625SGarrett Wollman */ 26701cbbd625SGarrett Wollman void 26711cbbd625SGarrett Wollman vn_pollgone(vp) 26721cbbd625SGarrett Wollman struct vnode *vp; 26731cbbd625SGarrett Wollman { 26741cbbd625SGarrett Wollman simple_lock(&vp->v_pollinfo.vpi_lock); 26751cbbd625SGarrett Wollman if (vp->v_pollinfo.vpi_events) { 26761cbbd625SGarrett Wollman vp->v_pollinfo.vpi_events = 0; 26771cbbd625SGarrett Wollman selwakeup(&vp->v_pollinfo.vpi_selinfo); 26781cbbd625SGarrett Wollman } 26791cbbd625SGarrett Wollman simple_unlock(&vp->v_pollinfo.vpi_lock); 26801cbbd625SGarrett Wollman } 2681b1897c19SJulian Elischer 2682b1897c19SJulian Elischer 2683b1897c19SJulian Elischer 2684b1897c19SJulian Elischer /* 2685b1897c19SJulian Elischer * Routine to create and manage a filesystem syncer vnode. 2686b1897c19SJulian Elischer */ 2687b1897c19SJulian Elischer #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 26880df45b5aSEivind Eklund static int sync_fsync __P((struct vop_fsync_args *)); 26890df45b5aSEivind Eklund static int sync_inactive __P((struct vop_inactive_args *)); 26900df45b5aSEivind Eklund static int sync_reclaim __P((struct vop_reclaim_args *)); 2691b1897c19SJulian Elischer #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2692b1897c19SJulian Elischer #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 26930df45b5aSEivind Eklund static int sync_print __P((struct vop_print_args *)); 2694b1897c19SJulian Elischer #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2695b1897c19SJulian Elischer 2696db878ba4SEivind Eklund static vop_t **sync_vnodeop_p; 2697db878ba4SEivind Eklund static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2698b1897c19SJulian Elischer { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2699b1897c19SJulian Elischer { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2700b1897c19SJulian Elischer { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2701b1897c19SJulian Elischer { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2702b1897c19SJulian Elischer { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2703b1897c19SJulian Elischer { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2704b1897c19SJulian Elischer { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2705b1897c19SJulian Elischer { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2706b1897c19SJulian Elischer { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2707b1897c19SJulian Elischer { NULL, NULL } 2708b1897c19SJulian Elischer }; 2709db878ba4SEivind Eklund static struct vnodeopv_desc sync_vnodeop_opv_desc = 2710b1897c19SJulian Elischer { &sync_vnodeop_p, sync_vnodeop_entries }; 2711b1897c19SJulian Elischer 2712b1897c19SJulian Elischer VNODEOP_SET(sync_vnodeop_opv_desc); 2713b1897c19SJulian Elischer 2714b1897c19SJulian Elischer /* 2715b1897c19SJulian Elischer * Create a new filesystem syncer vnode for the specified mount point. 2716b1897c19SJulian Elischer */ 2717b1897c19SJulian Elischer int 2718b1897c19SJulian Elischer vfs_allocate_syncvnode(mp) 2719b1897c19SJulian Elischer struct mount *mp; 2720b1897c19SJulian Elischer { 2721b1897c19SJulian Elischer struct vnode *vp; 2722b1897c19SJulian Elischer static long start, incr, next; 2723b1897c19SJulian Elischer int error; 2724b1897c19SJulian Elischer 2725b1897c19SJulian Elischer /* Allocate a new vnode */ 2726b1897c19SJulian Elischer if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2727b1897c19SJulian Elischer mp->mnt_syncer = NULL; 2728b1897c19SJulian Elischer return (error); 2729b1897c19SJulian Elischer } 2730b1897c19SJulian Elischer vp->v_type = VNON; 2731b1897c19SJulian Elischer /* 2732b1897c19SJulian Elischer * Place the vnode onto the syncer worklist. We attempt to 2733b1897c19SJulian Elischer * scatter them about on the list so that they will go off 2734b1897c19SJulian Elischer * at evenly distributed times even if all the filesystems 2735b1897c19SJulian Elischer * are mounted at once. 2736b1897c19SJulian Elischer */ 2737b1897c19SJulian Elischer next += incr; 2738b1897c19SJulian Elischer if (next == 0 || next > syncer_maxdelay) { 2739b1897c19SJulian Elischer start /= 2; 2740b1897c19SJulian Elischer incr /= 2; 2741b1897c19SJulian Elischer if (start == 0) { 2742b1897c19SJulian Elischer start = syncer_maxdelay / 2; 2743b1897c19SJulian Elischer incr = syncer_maxdelay; 2744b1897c19SJulian Elischer } 2745b1897c19SJulian Elischer next = start; 2746b1897c19SJulian Elischer } 2747b1897c19SJulian Elischer vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2748b1897c19SJulian Elischer mp->mnt_syncer = vp; 2749b1897c19SJulian Elischer return (0); 2750b1897c19SJulian Elischer } 2751b1897c19SJulian Elischer 2752b1897c19SJulian Elischer /* 2753b1897c19SJulian Elischer * Do a lazy sync of the filesystem. 2754b1897c19SJulian Elischer */ 2755db878ba4SEivind Eklund static int 2756b1897c19SJulian Elischer sync_fsync(ap) 2757b1897c19SJulian Elischer struct vop_fsync_args /* { 2758b1897c19SJulian Elischer struct vnode *a_vp; 2759b1897c19SJulian Elischer struct ucred *a_cred; 2760b1897c19SJulian Elischer int a_waitfor; 2761b1897c19SJulian Elischer struct proc *a_p; 2762b1897c19SJulian Elischer } */ *ap; 2763b1897c19SJulian Elischer { 2764b1897c19SJulian Elischer struct vnode *syncvp = ap->a_vp; 2765b1897c19SJulian Elischer struct mount *mp = syncvp->v_mount; 2766b1897c19SJulian Elischer struct proc *p = ap->a_p; 2767b1897c19SJulian Elischer int asyncflag; 2768b1897c19SJulian Elischer 2769b1897c19SJulian Elischer /* 2770b1897c19SJulian Elischer * We only need to do something if this is a lazy evaluation. 2771b1897c19SJulian Elischer */ 2772b1897c19SJulian Elischer if (ap->a_waitfor != MNT_LAZY) 2773b1897c19SJulian Elischer return (0); 2774b1897c19SJulian Elischer 2775b1897c19SJulian Elischer /* 2776b1897c19SJulian Elischer * Move ourselves to the back of the sync list. 2777b1897c19SJulian Elischer */ 2778b1897c19SJulian Elischer vn_syncer_add_to_worklist(syncvp, syncdelay); 2779b1897c19SJulian Elischer 2780b1897c19SJulian Elischer /* 2781b1897c19SJulian Elischer * Walk the list of vnodes pushing all that are dirty and 2782b1897c19SJulian Elischer * not already on the sync list. 2783b1897c19SJulian Elischer */ 2784b1897c19SJulian Elischer simple_lock(&mountlist_slock); 278571033a8cSTor Egge if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 278671033a8cSTor Egge simple_unlock(&mountlist_slock); 2787b1897c19SJulian Elischer return (0); 278871033a8cSTor Egge } 2789f2a2857bSKirk McKusick if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { 2790f2a2857bSKirk McKusick vfs_unbusy(mp, p); 2791f2a2857bSKirk McKusick simple_unlock(&mountlist_slock); 2792f2a2857bSKirk McKusick return (0); 2793f2a2857bSKirk McKusick } 2794b1897c19SJulian Elischer asyncflag = mp->mnt_flag & MNT_ASYNC; 2795b1897c19SJulian Elischer mp->mnt_flag &= ~MNT_ASYNC; 2796efdc5523SPeter Wemm vfs_msync(mp, MNT_NOWAIT); 2797b1897c19SJulian Elischer VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2798b1897c19SJulian Elischer if (asyncflag) 2799b1897c19SJulian Elischer mp->mnt_flag |= MNT_ASYNC; 2800f2a2857bSKirk McKusick vn_finished_write(mp); 2801b1897c19SJulian Elischer vfs_unbusy(mp, p); 2802b1897c19SJulian Elischer return (0); 2803b1897c19SJulian Elischer } 2804b1897c19SJulian Elischer 2805b1897c19SJulian Elischer /* 2806b1897c19SJulian Elischer * The syncer vnode is no referenced. 2807b1897c19SJulian Elischer */ 2808db878ba4SEivind Eklund static int 2809b1897c19SJulian Elischer sync_inactive(ap) 2810b1897c19SJulian Elischer struct vop_inactive_args /* { 2811b1897c19SJulian Elischer struct vnode *a_vp; 2812b1897c19SJulian Elischer struct proc *a_p; 2813b1897c19SJulian Elischer } */ *ap; 2814b1897c19SJulian Elischer { 2815b1897c19SJulian Elischer 2816b1897c19SJulian Elischer vgone(ap->a_vp); 2817b1897c19SJulian Elischer return (0); 2818b1897c19SJulian Elischer } 2819b1897c19SJulian Elischer 2820b1897c19SJulian Elischer /* 2821b1897c19SJulian Elischer * The syncer vnode is no longer needed and is being decommissioned. 282242e26d47SMatthew Dillon * 282342e26d47SMatthew Dillon * Modifications to the worklist must be protected at splbio(). 2824b1897c19SJulian Elischer */ 2825db878ba4SEivind Eklund static int 2826b1897c19SJulian Elischer sync_reclaim(ap) 2827b1897c19SJulian Elischer struct vop_reclaim_args /* { 2828b1897c19SJulian Elischer struct vnode *a_vp; 2829b1897c19SJulian Elischer } */ *ap; 2830b1897c19SJulian Elischer { 2831b1897c19SJulian Elischer struct vnode *vp = ap->a_vp; 283242e26d47SMatthew Dillon int s; 2833b1897c19SJulian Elischer 283442e26d47SMatthew Dillon s = splbio(); 2835b1897c19SJulian Elischer vp->v_mount->mnt_syncer = NULL; 2836b1897c19SJulian Elischer if (vp->v_flag & VONWORKLST) { 2837b1897c19SJulian Elischer LIST_REMOVE(vp, v_synclist); 2838b1897c19SJulian Elischer vp->v_flag &= ~VONWORKLST; 2839b1897c19SJulian Elischer } 284042e26d47SMatthew Dillon splx(s); 2841b1897c19SJulian Elischer 2842b1897c19SJulian Elischer return (0); 2843b1897c19SJulian Elischer } 2844b1897c19SJulian Elischer 2845b1897c19SJulian Elischer /* 2846b1897c19SJulian Elischer * Print out a syncer vnode. 2847b1897c19SJulian Elischer */ 2848db878ba4SEivind Eklund static int 2849b1897c19SJulian Elischer sync_print(ap) 2850b1897c19SJulian Elischer struct vop_print_args /* { 2851b1897c19SJulian Elischer struct vnode *a_vp; 2852b1897c19SJulian Elischer } */ *ap; 2853b1897c19SJulian Elischer { 2854b1897c19SJulian Elischer struct vnode *vp = ap->a_vp; 2855b1897c19SJulian Elischer 2856b1897c19SJulian Elischer printf("syncer vnode"); 2857b1897c19SJulian Elischer if (vp->v_vnlock != NULL) 2858b1897c19SJulian Elischer lockmgr_printinfo(vp->v_vnlock); 2859b1897c19SJulian Elischer printf("\n"); 2860b1897c19SJulian Elischer return (0); 2861b1897c19SJulian Elischer } 28626ca54864SPoul-Henning Kamp 28636ca54864SPoul-Henning Kamp /* 28646ca54864SPoul-Henning Kamp * extract the dev_t from a VBLK or VCHR 28656ca54864SPoul-Henning Kamp */ 28666ca54864SPoul-Henning Kamp dev_t 28676ca54864SPoul-Henning Kamp vn_todev(vp) 28686ca54864SPoul-Henning Kamp struct vnode *vp; 28696ca54864SPoul-Henning Kamp { 28706ca54864SPoul-Henning Kamp if (vp->v_type != VBLK && vp->v_type != VCHR) 28716ca54864SPoul-Henning Kamp return (NODEV); 28726ca54864SPoul-Henning Kamp return (vp->v_rdev); 28736ca54864SPoul-Henning Kamp } 287441d2e3e0SPoul-Henning Kamp 287541d2e3e0SPoul-Henning Kamp /* 287641d2e3e0SPoul-Henning Kamp * Check if vnode represents a disk device 287741d2e3e0SPoul-Henning Kamp */ 287841d2e3e0SPoul-Henning Kamp int 2879ba4ad1fcSPoul-Henning Kamp vn_isdisk(vp, errp) 288041d2e3e0SPoul-Henning Kamp struct vnode *vp; 2881ba4ad1fcSPoul-Henning Kamp int *errp; 288241d2e3e0SPoul-Henning Kamp { 288364dc16dfSPoul-Henning Kamp struct cdevsw *cdevsw; 288464dc16dfSPoul-Henning Kamp 2885ba4ad1fcSPoul-Henning Kamp if (vp->v_type != VBLK && vp->v_type != VCHR) { 2886ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2887ba4ad1fcSPoul-Henning Kamp *errp = ENOTBLK; 288841d2e3e0SPoul-Henning Kamp return (0); 2889ba4ad1fcSPoul-Henning Kamp } 2890b081a64aSChris Costello if (vp->v_rdev == NULL) { 2891b081a64aSChris Costello if (errp != NULL) 2892b081a64aSChris Costello *errp = ENXIO; 2893b081a64aSChris Costello return (0); 2894b081a64aSChris Costello } 289564dc16dfSPoul-Henning Kamp cdevsw = devsw(vp->v_rdev); 289664dc16dfSPoul-Henning Kamp if (cdevsw == NULL) { 2897ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2898ba4ad1fcSPoul-Henning Kamp *errp = ENXIO; 289941d2e3e0SPoul-Henning Kamp return (0); 2900ba4ad1fcSPoul-Henning Kamp } 290164dc16dfSPoul-Henning Kamp if (!(cdevsw->d_flags & D_DISK)) { 2902ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2903ba4ad1fcSPoul-Henning Kamp *errp = ENOTBLK; 290441d2e3e0SPoul-Henning Kamp return (0); 2905ba4ad1fcSPoul-Henning Kamp } 2906ba4ad1fcSPoul-Henning Kamp if (errp != NULL) 2907ba4ad1fcSPoul-Henning Kamp *errp = 0; 290841d2e3e0SPoul-Henning Kamp return (1); 290941d2e3e0SPoul-Henning Kamp } 291041d2e3e0SPoul-Henning Kamp 2911e12d97d2SEivind Eklund void 2912e12d97d2SEivind Eklund NDFREE(ndp, flags) 2913e12d97d2SEivind Eklund struct nameidata *ndp; 2914e12d97d2SEivind Eklund const uint flags; 2915e12d97d2SEivind Eklund { 2916e12d97d2SEivind Eklund if (!(flags & NDF_NO_FREE_PNBUF) && 2917e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & HASBUF)) { 2918e12d97d2SEivind Eklund zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 2919e12d97d2SEivind Eklund ndp->ni_cnd.cn_flags &= ~HASBUF; 2920e12d97d2SEivind Eklund } 2921e12d97d2SEivind Eklund if (!(flags & NDF_NO_DVP_UNLOCK) && 2922e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & LOCKPARENT) && 2923e12d97d2SEivind Eklund ndp->ni_dvp != ndp->ni_vp) 2924e12d97d2SEivind Eklund VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); 2925e12d97d2SEivind Eklund if (!(flags & NDF_NO_DVP_RELE) && 2926e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 2927e12d97d2SEivind Eklund vrele(ndp->ni_dvp); 2928e12d97d2SEivind Eklund ndp->ni_dvp = NULL; 2929e12d97d2SEivind Eklund } 2930e12d97d2SEivind Eklund if (!(flags & NDF_NO_VP_UNLOCK) && 2931e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 2932e12d97d2SEivind Eklund VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); 2933e12d97d2SEivind Eklund if (!(flags & NDF_NO_VP_RELE) && 2934e12d97d2SEivind Eklund ndp->ni_vp) { 2935e12d97d2SEivind Eklund vrele(ndp->ni_vp); 2936e12d97d2SEivind Eklund ndp->ni_vp = NULL; 2937e12d97d2SEivind Eklund } 2938e12d97d2SEivind Eklund if (!(flags & NDF_NO_STARTDIR_RELE) && 2939e12d97d2SEivind Eklund (ndp->ni_cnd.cn_flags & SAVESTART)) { 2940e12d97d2SEivind Eklund vrele(ndp->ni_startdir); 2941e12d97d2SEivind Eklund ndp->ni_startdir = NULL; 2942e12d97d2SEivind Eklund } 2943e12d97d2SEivind Eklund } 2944e39c53edSPoul-Henning Kamp 2945e0848358SRobert Watson /* 2946e0848358SRobert Watson * Common file system object access control check routine. Accepts a 2947e0848358SRobert Watson * vnode's type, "mode", uid and gid, requested access mode, credentials, 2948e0848358SRobert Watson * and optional call-by-reference privused argument allowing vaccess() 2949e0848358SRobert Watson * to indicate to the caller whether privilege was used to satisfy the 2950e0848358SRobert Watson * request. Returns 0 on success, or an errno on failure. 2951e0848358SRobert Watson */ 2952e39c53edSPoul-Henning Kamp int 2953012c643dSRobert Watson vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused) 2954e39c53edSPoul-Henning Kamp enum vtype type; 2955e39c53edSPoul-Henning Kamp mode_t file_mode; 2956012c643dSRobert Watson uid_t file_uid; 2957012c643dSRobert Watson gid_t file_gid; 2958e39c53edSPoul-Henning Kamp mode_t acc_mode; 2959e39c53edSPoul-Henning Kamp struct ucred *cred; 2960012c643dSRobert Watson int *privused; 2961e39c53edSPoul-Henning Kamp { 2962012c643dSRobert Watson mode_t dac_granted; 2963012c643dSRobert Watson #ifdef CAPABILITIES 2964012c643dSRobert Watson mode_t cap_granted; 2965012c643dSRobert Watson #endif 2966e39c53edSPoul-Henning Kamp 2967e39c53edSPoul-Henning Kamp /* 2968012c643dSRobert Watson * Look for a normal, non-privileged way to access the file/directory 2969012c643dSRobert Watson * as requested. If it exists, go with that. 2970e39c53edSPoul-Henning Kamp */ 2971012c643dSRobert Watson 2972012c643dSRobert Watson if (privused != NULL) 2973012c643dSRobert Watson *privused = 0; 2974012c643dSRobert Watson 2975012c643dSRobert Watson dac_granted = 0; 2976012c643dSRobert Watson 2977012c643dSRobert Watson /* Check the owner. */ 2978012c643dSRobert Watson if (cred->cr_uid == file_uid) { 2979012c643dSRobert Watson if (file_mode & S_IXUSR) 2980012c643dSRobert Watson dac_granted |= VEXEC; 2981012c643dSRobert Watson if (file_mode & S_IRUSR) 2982012c643dSRobert Watson dac_granted |= VREAD; 2983012c643dSRobert Watson if (file_mode & S_IWUSR) 2984012c643dSRobert Watson dac_granted |= VWRITE; 2985012c643dSRobert Watson 2986012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 2987e39c53edSPoul-Henning Kamp return (0); 2988e39c53edSPoul-Henning Kamp 2989012c643dSRobert Watson goto privcheck; 2990e39c53edSPoul-Henning Kamp } 2991e39c53edSPoul-Henning Kamp 2992012c643dSRobert Watson /* Otherwise, check the groups (first match) */ 2993012c643dSRobert Watson if (groupmember(file_gid, cred)) { 2994012c643dSRobert Watson if (file_mode & S_IXGRP) 2995012c643dSRobert Watson dac_granted |= VEXEC; 2996012c643dSRobert Watson if (file_mode & S_IRGRP) 2997012c643dSRobert Watson dac_granted |= VREAD; 2998012c643dSRobert Watson if (file_mode & S_IWGRP) 2999012c643dSRobert Watson dac_granted |= VWRITE; 3000012c643dSRobert Watson 3001012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 3002012c643dSRobert Watson return (0); 3003012c643dSRobert Watson 3004012c643dSRobert Watson goto privcheck; 3005e39c53edSPoul-Henning Kamp } 3006e39c53edSPoul-Henning Kamp 3007e39c53edSPoul-Henning Kamp /* Otherwise, check everyone else. */ 3008012c643dSRobert Watson if (file_mode & S_IXOTH) 3009012c643dSRobert Watson dac_granted |= VEXEC; 3010012c643dSRobert Watson if (file_mode & S_IROTH) 3011012c643dSRobert Watson dac_granted |= VREAD; 3012012c643dSRobert Watson if (file_mode & S_IWOTH) 3013012c643dSRobert Watson dac_granted |= VWRITE; 3014012c643dSRobert Watson if ((acc_mode & dac_granted) == acc_mode) 3015012c643dSRobert Watson return (0); 3016012c643dSRobert Watson 3017012c643dSRobert Watson privcheck: 3018012c643dSRobert Watson if (!suser_xxx(cred, NULL, PRISON_ROOT)) { 3019012c643dSRobert Watson /* XXX audit: privilege used */ 3020012c643dSRobert Watson if (privused != NULL) 3021012c643dSRobert Watson *privused = 1; 3022012c643dSRobert Watson return (0); 3023012c643dSRobert Watson } 3024012c643dSRobert Watson 3025012c643dSRobert Watson #ifdef CAPABILITIES 3026012c643dSRobert Watson /* 3027012c643dSRobert Watson * Build a capability mask to determine if the set of capabilities 3028012c643dSRobert Watson * satisfies the requirements when combined with the granted mask 3029012c643dSRobert Watson * from above. 3030012c643dSRobert Watson * For each capability, if the capability is required, bitwise 3031012c643dSRobert Watson * or the request type onto the cap_granted mask. 3032012c643dSRobert Watson */ 3033012c643dSRobert Watson cap_granted = 0; 3034012c643dSRobert Watson if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && 3035728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT)) 3036012c643dSRobert Watson cap_granted |= VEXEC; 3037012c643dSRobert Watson 3038012c643dSRobert Watson if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) && 3039728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT)) 3040012c643dSRobert Watson cap_granted |= VREAD; 3041012c643dSRobert Watson 3042012c643dSRobert Watson if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) && 3043728783c2SRobert Watson !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT)) 3044012c643dSRobert Watson cap_granted |= VWRITE; 3045012c643dSRobert Watson 3046728783c2SRobert Watson if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) { 3047012c643dSRobert Watson /* XXX audit: privilege used */ 3048012c643dSRobert Watson if (privused != NULL) 3049012c643dSRobert Watson *privused = 1; 3050012c643dSRobert Watson return (0); 3051012c643dSRobert Watson } 3052012c643dSRobert Watson #endif 3053012c643dSRobert Watson 3054012c643dSRobert Watson return (EACCES); 3055e39c53edSPoul-Henning Kamp } 3056