1d167cf6fSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1992, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software donated to Berkeley by 6df8bae1dSRodney W. Grimes * Jan-Simon Pendry. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 17df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 18df8bae1dSRodney W. Grimes * without specific prior written permission. 19df8bae1dSRodney W. Grimes * 20df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30df8bae1dSRodney W. Grimes * SUCH DAMAGE. 31df8bae1dSRodney W. Grimes * 32996c772fSJohn Dyson * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 33df8bae1dSRodney W. Grimes * 34c3aac50fSPeter Wemm * $FreeBSD$ 35df8bae1dSRodney W. Grimes */ 36df8bae1dSRodney W. Grimes 37df8bae1dSRodney W. Grimes #include <sys/param.h> 38df8bae1dSRodney W. Grimes #include <sys/systm.h> 398da80660SBoris Popov #include <sys/kernel.h> 40fb919e4dSMark Murray #include <sys/lock.h> 4115420031SSemen Ustimenko #include <sys/mutex.h> 42df8bae1dSRodney W. Grimes #include <sys/malloc.h> 43fb919e4dSMark Murray #include <sys/mount.h> 44fb919e4dSMark Murray #include <sys/proc.h> 45fb919e4dSMark Murray #include <sys/vnode.h> 46fb919e4dSMark Murray 4799d300a1SRuslan Ermilov #include <fs/nullfs/null.h> 48df8bae1dSRodney W. Grimes 4954939875STim J. Robbins #define LOG2_SIZEVNODE 8 /* log2(sizeof struct vnode) */ 50df8bae1dSRodney W. Grimes #define NNULLNODECACHE 16 51df8bae1dSRodney W. Grimes 52df8bae1dSRodney W. Grimes /* 53df8bae1dSRodney W. Grimes * Null layer cache: 54df8bae1dSRodney W. Grimes * Each cache entry holds a reference to the lower vnode 55df8bae1dSRodney W. Grimes * along with a pointer to the alias vnode. When an 56df8bae1dSRodney W. Grimes * entry is added the lower vnode is VREF'd. When the 57df8bae1dSRodney W. Grimes * alias is removed the lower vnode is vrele'd. 58df8bae1dSRodney W. Grimes */ 59df8bae1dSRodney W. Grimes 60996c772fSJohn Dyson #define NULL_NHASH(vp) \ 61a23d65bfSBruce Evans (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) 628da80660SBoris Popov 63e3975643SJake Burkholder static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; 64303b270bSEivind Eklund static u_long null_node_hash; 6515420031SSemen Ustimenko struct mtx null_hashmtx; 668da80660SBoris Popov 675bb84bc8SRobert Watson static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table"); 685bb84bc8SRobert Watson MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part"); 69df8bae1dSRodney W. Grimes 7054939875STim J. Robbins static struct vnode * null_hashget(struct mount *, struct vnode *); 7154939875STim J. Robbins static struct vnode * null_hashins(struct mount *, struct null_node *); 729b5e8b3aSBruce Evans 73df8bae1dSRodney W. Grimes /* 74df8bae1dSRodney W. Grimes * Initialise cache headers 75df8bae1dSRodney W. Grimes */ 7626f9a767SRodney W. Grimes int 77996c772fSJohn Dyson nullfs_init(vfsp) 78996c772fSJohn Dyson struct vfsconf *vfsp; 79df8bae1dSRodney W. Grimes { 80996c772fSJohn Dyson 818da80660SBoris Popov NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */ 828da80660SBoris Popov null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash); 8315420031SSemen Ustimenko mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF); 848da80660SBoris Popov return (0); 858da80660SBoris Popov } 868da80660SBoris Popov 878da80660SBoris Popov int 888da80660SBoris Popov nullfs_uninit(vfsp) 898da80660SBoris Popov struct vfsconf *vfsp; 908da80660SBoris Popov { 918da80660SBoris Popov 9215420031SSemen Ustimenko mtx_destroy(&null_hashmtx); 938da80660SBoris Popov free(null_node_hashtbl, M_NULLFSHASH); 9426f9a767SRodney W. Grimes return (0); 95df8bae1dSRodney W. Grimes } 96df8bae1dSRodney W. Grimes 97df8bae1dSRodney W. Grimes /* 98df8bae1dSRodney W. Grimes * Return a VREF'ed alias for lower vnode if already exists, else 0. 994451405fSBoris Popov * Lower vnode should be locked on entry and will be left locked on exit. 100df8bae1dSRodney W. Grimes */ 101df8bae1dSRodney W. Grimes static struct vnode * 10254939875STim J. Robbins null_hashget(mp, lowervp) 10354939875STim J. Robbins struct mount *mp; 104df8bae1dSRodney W. Grimes struct vnode *lowervp; 105df8bae1dSRodney W. Grimes { 106b40ce416SJulian Elischer struct thread *td = curthread; /* XXX */ 107996c772fSJohn Dyson struct null_node_hashhead *hd; 108df8bae1dSRodney W. Grimes struct null_node *a; 109df8bae1dSRodney W. Grimes struct vnode *vp; 1109c12e631SJeff Roberson int error; 1119c12e631SJeff Roberson 1129c12e631SJeff Roberson ASSERT_VOP_LOCKED(lowervp, "null_hashget"); 113df8bae1dSRodney W. Grimes 114df8bae1dSRodney W. Grimes /* 115df8bae1dSRodney W. Grimes * Find hash base, and then search the (two-way) linked 116df8bae1dSRodney W. Grimes * list looking for a null_node structure which is referencing 117df8bae1dSRodney W. Grimes * the lower vnode. If found, the increment the null_node 118df8bae1dSRodney W. Grimes * reference count (but NOT the lower vnode's VREF counter). 119df8bae1dSRodney W. Grimes */ 120996c772fSJohn Dyson hd = NULL_NHASH(lowervp); 12115420031SSemen Ustimenko mtx_lock(&null_hashmtx); 122fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(a, hd, null_hash) { 12354939875STim J. Robbins if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { 124df8bae1dSRodney W. Grimes vp = NULLTOV(a); 125bc855512SJeff Roberson VI_LOCK(vp); 12615420031SSemen Ustimenko mtx_unlock(&null_hashmtx); 1278e82c4cdSJeff Roberson /* 1288e82c4cdSJeff Roberson * We need to clear the OWEINACT flag here as this 1298e82c4cdSJeff Roberson * may lead vget() to try to lock our vnode which 1308e82c4cdSJeff Roberson * is already locked via lowervp. 1318e82c4cdSJeff Roberson */ 1328e82c4cdSJeff Roberson vp->v_iflag &= ~VI_OWEINACT; 1339c12e631SJeff Roberson error = vget(vp, LK_INTERLOCK, td); 1349c12e631SJeff Roberson /* 1359c12e631SJeff Roberson * Since we have the lower node locked the nullfs 1369c12e631SJeff Roberson * node can not be in the process of recycling. If 1379c12e631SJeff Roberson * it had been recycled before we grabed the lower 1389c12e631SJeff Roberson * lock it would not have been found on the hash. 1399c12e631SJeff Roberson */ 1409c12e631SJeff Roberson if (error) 1419c12e631SJeff Roberson panic("null_hashget: vget error %d", error); 142df8bae1dSRodney W. Grimes return (vp); 143df8bae1dSRodney W. Grimes } 144df8bae1dSRodney W. Grimes } 14515420031SSemen Ustimenko mtx_unlock(&null_hashmtx); 1461cfdefbbSSemen Ustimenko return (NULLVP); 147df8bae1dSRodney W. Grimes } 148df8bae1dSRodney W. Grimes 1491cfdefbbSSemen Ustimenko /* 1501cfdefbbSSemen Ustimenko * Act like null_hashget, but add passed null_node to hash if no existing 1511cfdefbbSSemen Ustimenko * node found. 1521cfdefbbSSemen Ustimenko */ 1531cfdefbbSSemen Ustimenko static struct vnode * 15454939875STim J. Robbins null_hashins(mp, xp) 15554939875STim J. Robbins struct mount *mp; 1561cfdefbbSSemen Ustimenko struct null_node *xp; 1571cfdefbbSSemen Ustimenko { 1581cfdefbbSSemen Ustimenko struct thread *td = curthread; /* XXX */ 1591cfdefbbSSemen Ustimenko struct null_node_hashhead *hd; 1601cfdefbbSSemen Ustimenko struct null_node *oxp; 1611cfdefbbSSemen Ustimenko struct vnode *ovp; 1629c12e631SJeff Roberson int error; 1631cfdefbbSSemen Ustimenko 1641cfdefbbSSemen Ustimenko hd = NULL_NHASH(xp->null_lowervp); 1651cfdefbbSSemen Ustimenko mtx_lock(&null_hashmtx); 1661cfdefbbSSemen Ustimenko LIST_FOREACH(oxp, hd, null_hash) { 16754939875STim J. Robbins if (oxp->null_lowervp == xp->null_lowervp && 16854939875STim J. Robbins NULLTOV(oxp)->v_mount == mp) { 1699c12e631SJeff Roberson /* 1709c12e631SJeff Roberson * See null_hashget for a description of this 1719c12e631SJeff Roberson * operation. 1729c12e631SJeff Roberson */ 1731cfdefbbSSemen Ustimenko ovp = NULLTOV(oxp); 174bc855512SJeff Roberson VI_LOCK(ovp); 1751cfdefbbSSemen Ustimenko mtx_unlock(&null_hashmtx); 1769c12e631SJeff Roberson ovp->v_iflag &= ~VI_OWEINACT; 1779c12e631SJeff Roberson error = vget(ovp, LK_INTERLOCK, td); 1789c12e631SJeff Roberson if (error) 1799c12e631SJeff Roberson panic("null_hashins: vget error %d", error); 1801cfdefbbSSemen Ustimenko return (ovp); 1811cfdefbbSSemen Ustimenko } 1821cfdefbbSSemen Ustimenko } 1831cfdefbbSSemen Ustimenko LIST_INSERT_HEAD(hd, xp, null_hash); 1841cfdefbbSSemen Ustimenko mtx_unlock(&null_hashmtx); 1851cfdefbbSSemen Ustimenko return (NULLVP); 1861cfdefbbSSemen Ustimenko } 187df8bae1dSRodney W. Grimes 18861b9d89fSTor Egge static void 18961b9d89fSTor Egge null_insmntque_dtr(struct vnode *vp, void *xp) 19061b9d89fSTor Egge { 19161b9d89fSTor Egge vp->v_data = NULL; 19261b9d89fSTor Egge vp->v_vnlock = &vp->v_lock; 19361b9d89fSTor Egge FREE(xp, M_NULLFSNODE); 19461b9d89fSTor Egge vp->v_op = &dead_vnodeops; 19561b9d89fSTor Egge (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); 19661b9d89fSTor Egge vgone(vp); 19761b9d89fSTor Egge vput(vp); 19861b9d89fSTor Egge } 19961b9d89fSTor Egge 200df8bae1dSRodney W. Grimes /* 2011cfdefbbSSemen Ustimenko * Make a new or get existing nullfs node. 2021cfdefbbSSemen Ustimenko * Vp is the alias vnode, lowervp is the lower vnode. 2031cfdefbbSSemen Ustimenko * 2041cfdefbbSSemen Ustimenko * The lowervp assumed to be locked and having "spare" reference. This routine 2051cfdefbbSSemen Ustimenko * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers" 2061cfdefbbSSemen Ustimenko * the caller's "spare" reference to created nullfs vnode. 207df8bae1dSRodney W. Grimes */ 2081cfdefbbSSemen Ustimenko int 2091cfdefbbSSemen Ustimenko null_nodeget(mp, lowervp, vpp) 210df8bae1dSRodney W. Grimes struct mount *mp; 211df8bae1dSRodney W. Grimes struct vnode *lowervp; 212df8bae1dSRodney W. Grimes struct vnode **vpp; 213df8bae1dSRodney W. Grimes { 214df8bae1dSRodney W. Grimes struct null_node *xp; 2151cfdefbbSSemen Ustimenko struct vnode *vp; 216df8bae1dSRodney W. Grimes int error; 217df8bae1dSRodney W. Grimes 2181cfdefbbSSemen Ustimenko /* Lookup the hash firstly */ 21954939875STim J. Robbins *vpp = null_hashget(mp, lowervp); 2201cfdefbbSSemen Ustimenko if (*vpp != NULL) { 2211cfdefbbSSemen Ustimenko vrele(lowervp); 2221cfdefbbSSemen Ustimenko return (0); 2231cfdefbbSSemen Ustimenko } 2241cfdefbbSSemen Ustimenko 2251cfdefbbSSemen Ustimenko /* 2261cfdefbbSSemen Ustimenko * We do not serialize vnode creation, instead we will check for 2271cfdefbbSSemen Ustimenko * duplicates later, when adding new vnode to hash. 2281cfdefbbSSemen Ustimenko * 2291cfdefbbSSemen Ustimenko * Note that duplicate can only appear in hash if the lowervp is 2301cfdefbbSSemen Ustimenko * locked LK_SHARED. 2311cfdefbbSSemen Ustimenko */ 2321cfdefbbSSemen Ustimenko 2332f9bae59SDavid Greenman /* 2342f9bae59SDavid Greenman * Do the MALLOC before the getnewvnode since doing so afterward 2352f9bae59SDavid Greenman * might cause a bogus v_data pointer to get dereferenced 2362f9bae59SDavid Greenman * elsewhere if MALLOC should block. 2372f9bae59SDavid Greenman */ 2388da80660SBoris Popov MALLOC(xp, struct null_node *, sizeof(struct null_node), 239a163d034SWarner Losh M_NULLFSNODE, M_WAITOK); 2402f9bae59SDavid Greenman 241aec0fb7bSPoul-Henning Kamp error = getnewvnode("null", mp, &null_vnodeops, &vp); 2422f9bae59SDavid Greenman if (error) { 2438da80660SBoris Popov FREE(xp, M_NULLFSNODE); 244df8bae1dSRodney W. Grimes return (error); 2452f9bae59SDavid Greenman } 246df8bae1dSRodney W. Grimes 247df8bae1dSRodney W. Grimes xp->null_vnode = vp; 248df8bae1dSRodney W. Grimes xp->null_lowervp = lowervp; 24908720e34SSemen Ustimenko vp->v_type = lowervp->v_type; 25008720e34SSemen Ustimenko vp->v_data = xp; 2514451405fSBoris Popov vp->v_vnlock = lowervp->v_vnlock; 252bc855512SJeff Roberson if (vp->v_vnlock == NULL) 253bc855512SJeff Roberson panic("null_nodeget: Passed a NULL vnlock.\n"); 25461b9d89fSTor Egge error = insmntque1(vp, mp, null_insmntque_dtr, xp); 25561b9d89fSTor Egge if (error != 0) 25661b9d89fSTor Egge return (error); 257df8bae1dSRodney W. Grimes /* 2581cfdefbbSSemen Ustimenko * Atomically insert our new node into the hash or vget existing 2591cfdefbbSSemen Ustimenko * if someone else has beaten us to it. 260df8bae1dSRodney W. Grimes */ 26154939875STim J. Robbins *vpp = null_hashins(mp, xp); 2621cfdefbbSSemen Ustimenko if (*vpp != NULL) { 2634451405fSBoris Popov vrele(lowervp); 264bc855512SJeff Roberson vp->v_vnlock = &vp->v_lock; 2651cfdefbbSSemen Ustimenko xp->null_lowervp = NULL; 2661cfdefbbSSemen Ustimenko vrele(vp); 267df8bae1dSRodney W. Grimes return (0); 268df8bae1dSRodney W. Grimes } 2691cfdefbbSSemen Ustimenko *vpp = vp; 2701cfdefbbSSemen Ustimenko 2711cfdefbbSSemen Ustimenko return (0); 2721cfdefbbSSemen Ustimenko } 2731cfdefbbSSemen Ustimenko 2741cfdefbbSSemen Ustimenko /* 2751cfdefbbSSemen Ustimenko * Remove node from hash. 2761cfdefbbSSemen Ustimenko */ 27708720e34SSemen Ustimenko void 27808720e34SSemen Ustimenko null_hashrem(xp) 27908720e34SSemen Ustimenko struct null_node *xp; 28008720e34SSemen Ustimenko { 28108720e34SSemen Ustimenko 28215420031SSemen Ustimenko mtx_lock(&null_hashmtx); 28308720e34SSemen Ustimenko LIST_REMOVE(xp, null_hash); 28415420031SSemen Ustimenko mtx_unlock(&null_hashmtx); 28508720e34SSemen Ustimenko } 28608720e34SSemen Ustimenko 287a0f40f54SBruce Evans #ifdef DIAGNOSTIC 2881bf978ceSKATO Takenori 2894ea4f1f9SMarcel Moolenaar #ifdef KDB 290e958d078SKATO Takenori #define null_checkvp_barrier 1 291e958d078SKATO Takenori #else 292e958d078SKATO Takenori #define null_checkvp_barrier 0 293e958d078SKATO Takenori #endif 294e958d078SKATO Takenori 295df8bae1dSRodney W. Grimes struct vnode * 296df8bae1dSRodney W. Grimes null_checkvp(vp, fil, lno) 297df8bae1dSRodney W. Grimes struct vnode *vp; 298df8bae1dSRodney W. Grimes char *fil; 299df8bae1dSRodney W. Grimes int lno; 300df8bae1dSRodney W. Grimes { 301df8bae1dSRodney W. Grimes struct null_node *a = VTONULL(vp); 302df8bae1dSRodney W. Grimes #ifdef notyet 303df8bae1dSRodney W. Grimes /* 304df8bae1dSRodney W. Grimes * Can't do this check because vop_reclaim runs 305df8bae1dSRodney W. Grimes * with a funny vop vector. 306df8bae1dSRodney W. Grimes */ 307df8bae1dSRodney W. Grimes if (vp->v_op != null_vnodeop_p) { 308df8bae1dSRodney W. Grimes printf ("null_checkvp: on non-null-node\n"); 309df8bae1dSRodney W. Grimes while (null_checkvp_barrier) /*WAIT*/ ; 310df8bae1dSRodney W. Grimes panic("null_checkvp"); 311df8bae1dSRodney W. Grimes }; 312df8bae1dSRodney W. Grimes #endif 313c5e17d9eSKATO Takenori if (a->null_lowervp == NULLVP) { 314df8bae1dSRodney W. Grimes /* Should never happen */ 315df8bae1dSRodney W. Grimes int i; u_long *p; 31689785a16SBruce Evans printf("vp = %p, ZERO ptr\n", (void *)vp); 317df8bae1dSRodney W. Grimes for (p = (u_long *) a, i = 0; i < 8; i++) 31889785a16SBruce Evans printf(" %lx", p[i]); 319df8bae1dSRodney W. Grimes printf("\n"); 320df8bae1dSRodney W. Grimes /* wait for debugger */ 321df8bae1dSRodney W. Grimes while (null_checkvp_barrier) /*WAIT*/ ; 322df8bae1dSRodney W. Grimes panic("null_checkvp"); 323df8bae1dSRodney W. Grimes } 3244d93c0beSJeff Roberson if (vrefcnt(a->null_lowervp) < 1) { 325df8bae1dSRodney W. Grimes int i; u_long *p; 32689785a16SBruce Evans printf("vp = %p, unref'ed lowervp\n", (void *)vp); 327df8bae1dSRodney W. Grimes for (p = (u_long *) a, i = 0; i < 8; i++) 32889785a16SBruce Evans printf(" %lx", p[i]); 329df8bae1dSRodney W. Grimes printf("\n"); 330df8bae1dSRodney W. Grimes /* wait for debugger */ 331df8bae1dSRodney W. Grimes while (null_checkvp_barrier) /*WAIT*/ ; 332df8bae1dSRodney W. Grimes panic ("null with unref'ed lowervp"); 333df8bae1dSRodney W. Grimes }; 334df8bae1dSRodney W. Grimes #ifdef notyet 335df8bae1dSRodney W. Grimes printf("null %x/%d -> %x/%d [%s, %d]\n", 3364d93c0beSJeff Roberson NULLTOV(a), vrefcnt(NULLTOV(a)), 3374d93c0beSJeff Roberson a->null_lowervp, vrefcnt(a->null_lowervp), 338df8bae1dSRodney W. Grimes fil, lno); 339df8bae1dSRodney W. Grimes #endif 340df8bae1dSRodney W. Grimes return a->null_lowervp; 341df8bae1dSRodney W. Grimes } 342df8bae1dSRodney W. Grimes #endif 343