1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software donated to Berkeley by 6 * Jan-Simon Pendry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 37 * 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/proc.h> 49 #include <sys/vnode.h> 50 51 #include <fs/nullfs/null.h> 52 53 #define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */ 54 #define NNULLNODECACHE 16 55 56 /* 57 * Null layer cache: 58 * Each cache entry holds a reference to the lower vnode 59 * along with a pointer to the alias vnode. When an 60 * entry is added the lower vnode is VREF'd. When the 61 * alias is removed the lower vnode is vrele'd. 62 */ 63 64 #define NULL_NHASH(vp) \ 65 (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) 66 67 static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; 68 static u_long null_node_hash; 69 struct mtx null_hashmtx; 70 71 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table"); 72 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part"); 73 74 static struct vnode * null_hashget(struct vnode *); 75 static struct vnode * null_hashins(struct null_node *); 76 77 /* 78 * Initialise cache headers 79 */ 80 int 81 nullfs_init(vfsp) 82 struct vfsconf *vfsp; 83 { 84 85 NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */ 86 null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash); 87 mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF); 88 return (0); 89 } 90 91 int 92 nullfs_uninit(vfsp) 93 struct vfsconf *vfsp; 94 { 95 96 mtx_destroy(&null_hashmtx); 97 free(null_node_hashtbl, M_NULLFSHASH); 98 return (0); 99 } 100 101 /* 102 * Return a VREF'ed alias for lower vnode if already exists, else 0. 103 * Lower vnode should be locked on entry and will be left locked on exit. 104 */ 105 static struct vnode * 106 null_hashget(lowervp) 107 struct vnode *lowervp; 108 { 109 struct thread *td = curthread; /* XXX */ 110 struct null_node_hashhead *hd; 111 struct null_node *a; 112 struct vnode *vp; 113 114 /* 115 * Find hash base, and then search the (two-way) linked 116 * list looking for a null_node structure which is referencing 117 * the lower vnode. If found, the increment the null_node 118 * reference count (but NOT the lower vnode's VREF counter). 119 */ 120 hd = NULL_NHASH(lowervp); 121 loop: 122 mtx_lock(&null_hashmtx); 123 LIST_FOREACH(a, hd, null_hash) { 124 if (a->null_lowervp == lowervp) { 125 vp = NULLTOV(a); 126 mtx_lock(&vp->v_interlock); 127 mtx_unlock(&null_hashmtx); 128 /* 129 * We need vget for the VXLOCK 130 * stuff, but we don't want to lock 131 * the lower node. 132 */ 133 if (vget(vp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 134 goto loop; 135 136 return (vp); 137 } 138 } 139 mtx_unlock(&null_hashmtx); 140 return (NULLVP); 141 } 142 143 /* 144 * Act like null_hashget, but add passed null_node to hash if no existing 145 * node found. 146 */ 147 static struct vnode * 148 null_hashins(xp) 149 struct null_node *xp; 150 { 151 struct thread *td = curthread; /* XXX */ 152 struct null_node_hashhead *hd; 153 struct null_node *oxp; 154 struct vnode *ovp; 155 156 hd = NULL_NHASH(xp->null_lowervp); 157 loop: 158 mtx_lock(&null_hashmtx); 159 LIST_FOREACH(oxp, hd, null_hash) { 160 if (oxp->null_lowervp == xp->null_lowervp) { 161 ovp = NULLTOV(oxp); 162 mtx_lock(&ovp->v_interlock); 163 mtx_unlock(&null_hashmtx); 164 if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 165 goto loop; 166 167 return (ovp); 168 } 169 } 170 LIST_INSERT_HEAD(hd, xp, null_hash); 171 mtx_unlock(&null_hashmtx); 172 return (NULLVP); 173 } 174 175 /* 176 * Make a new or get existing nullfs node. 177 * Vp is the alias vnode, lowervp is the lower vnode. 178 * 179 * The lowervp assumed to be locked and having "spare" reference. This routine 180 * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers" 181 * the caller's "spare" reference to created nullfs vnode. 182 */ 183 int 184 null_nodeget(mp, lowervp, vpp) 185 struct mount *mp; 186 struct vnode *lowervp; 187 struct vnode **vpp; 188 { 189 struct thread *td = curthread; /* XXX */ 190 struct null_node *xp; 191 struct vnode *vp; 192 int error; 193 194 /* Lookup the hash firstly */ 195 *vpp = null_hashget(lowervp); 196 if (*vpp != NULL) { 197 vrele(lowervp); 198 return (0); 199 } 200 201 /* 202 * We do not serialize vnode creation, instead we will check for 203 * duplicates later, when adding new vnode to hash. 204 * 205 * Note that duplicate can only appear in hash if the lowervp is 206 * locked LK_SHARED. 207 */ 208 209 /* 210 * Do the MALLOC before the getnewvnode since doing so afterward 211 * might cause a bogus v_data pointer to get dereferenced 212 * elsewhere if MALLOC should block. 213 */ 214 MALLOC(xp, struct null_node *, sizeof(struct null_node), 215 M_NULLFSNODE, M_WAITOK); 216 217 error = getnewvnode(VT_NULL, mp, null_vnodeop_p, &vp); 218 if (error) { 219 FREE(xp, M_NULLFSNODE); 220 return (error); 221 } 222 223 xp->null_vnode = vp; 224 xp->null_lowervp = lowervp; 225 226 vp->v_type = lowervp->v_type; 227 vp->v_data = xp; 228 229 /* Though v_lock is inited by getnewvnode(), we want our own wmesg */ 230 lockinit(&vp->v_lock, PVFS, "nunode", VLKTIMEOUT, LK_NOPAUSE); 231 232 /* 233 * From NetBSD: 234 * Now lock the new node. We rely on the fact that we were passed 235 * a locked vnode. If the lower node is exporting a struct lock 236 * (v_vnlock != NULL) then we just set the upper v_vnlock to the 237 * lower one, and both are now locked. If the lower node is exporting 238 * NULL, then we copy that up and manually lock the new vnode. 239 */ 240 241 vp->v_vnlock = lowervp->v_vnlock; 242 error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td); 243 if (error) 244 panic("null_nodeget: can't lock new vnode\n"); 245 246 /* 247 * Atomically insert our new node into the hash or vget existing 248 * if someone else has beaten us to it. 249 */ 250 *vpp = null_hashins(xp); 251 if (*vpp != NULL) { 252 vrele(lowervp); 253 VOP_UNLOCK(vp, LK_THISLAYER, td); 254 vp->v_vnlock = NULL; 255 xp->null_lowervp = NULL; 256 vrele(vp); 257 return (0); 258 } 259 260 /* 261 * XXX We take extra vref just to workaround UFS's XXX: 262 * UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this 263 * can only happen if v_usecount == 1. To workaround, we just don't 264 * let v_usecount be 1, it will be 2 or more. 265 */ 266 VREF(lowervp); 267 268 *vpp = vp; 269 270 return (0); 271 } 272 273 /* 274 * Remove node from hash. 275 */ 276 void 277 null_hashrem(xp) 278 struct null_node *xp; 279 { 280 281 mtx_lock(&null_hashmtx); 282 LIST_REMOVE(xp, null_hash); 283 mtx_unlock(&null_hashmtx); 284 } 285 286 #ifdef DIAGNOSTIC 287 #include "opt_ddb.h" 288 289 #ifdef DDB 290 #define null_checkvp_barrier 1 291 #else 292 #define null_checkvp_barrier 0 293 #endif 294 295 struct vnode * 296 null_checkvp(vp, fil, lno) 297 struct vnode *vp; 298 char *fil; 299 int lno; 300 { 301 struct null_node *a = VTONULL(vp); 302 #ifdef notyet 303 /* 304 * Can't do this check because vop_reclaim runs 305 * with a funny vop vector. 306 */ 307 if (vp->v_op != null_vnodeop_p) { 308 printf ("null_checkvp: on non-null-node\n"); 309 while (null_checkvp_barrier) /*WAIT*/ ; 310 panic("null_checkvp"); 311 }; 312 #endif 313 if (a->null_lowervp == NULLVP) { 314 /* Should never happen */ 315 int i; u_long *p; 316 printf("vp = %p, ZERO ptr\n", (void *)vp); 317 for (p = (u_long *) a, i = 0; i < 8; i++) 318 printf(" %lx", p[i]); 319 printf("\n"); 320 /* wait for debugger */ 321 while (null_checkvp_barrier) /*WAIT*/ ; 322 panic("null_checkvp"); 323 } 324 if (a->null_lowervp->v_usecount < 1) { 325 int i; u_long *p; 326 printf("vp = %p, unref'ed lowervp\n", (void *)vp); 327 for (p = (u_long *) a, i = 0; i < 8; i++) 328 printf(" %lx", p[i]); 329 printf("\n"); 330 /* wait for debugger */ 331 while (null_checkvp_barrier) /*WAIT*/ ; 332 panic ("null with unref'ed lowervp"); 333 }; 334 #ifdef notyet 335 printf("null %x/%d -> %x/%d [%s, %d]\n", 336 NULLTOV(a), NULLTOV(a)->v_usecount, 337 a->null_lowervp, a->null_lowervp->v_usecount, 338 fil, lno); 339 #endif 340 return a->null_lowervp; 341 } 342 #endif 343