1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software donated to Berkeley by 6 * Jan-Simon Pendry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 33 * 34 * $FreeBSD$ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/mount.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 47 #include <fs/nullfs/null.h> 48 49 #define LOG2_SIZEVNODE 8 /* log2(sizeof struct vnode) */ 50 #define NNULLNODECACHE 16 51 52 /* 53 * Null layer cache: 54 * Each cache entry holds a reference to the lower vnode 55 * along with a pointer to the alias vnode. When an 56 * entry is added the lower vnode is VREF'd. When the 57 * alias is removed the lower vnode is vrele'd. 58 */ 59 60 #define NULL_NHASH(vp) \ 61 (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) 62 63 static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; 64 static u_long null_node_hash; 65 struct mtx null_hashmtx; 66 67 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table"); 68 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part"); 69 70 static struct vnode * null_hashget(struct mount *, struct vnode *); 71 static struct vnode * null_hashins(struct mount *, struct null_node *); 72 73 /* 74 * Initialise cache headers 75 */ 76 int 77 nullfs_init(vfsp) 78 struct vfsconf *vfsp; 79 { 80 81 NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */ 82 null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash); 83 mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF); 84 return (0); 85 } 86 87 int 88 nullfs_uninit(vfsp) 89 struct vfsconf *vfsp; 90 { 91 92 mtx_destroy(&null_hashmtx); 93 free(null_node_hashtbl, M_NULLFSHASH); 94 return (0); 95 } 96 97 /* 98 * Return a VREF'ed alias for lower vnode if already exists, else 0. 99 * Lower vnode should be locked on entry and will be left locked on exit. 100 */ 101 static struct vnode * 102 null_hashget(mp, lowervp) 103 struct mount *mp; 104 struct vnode *lowervp; 105 { 106 struct thread *td = curthread; /* XXX */ 107 struct null_node_hashhead *hd; 108 struct null_node *a; 109 struct vnode *vp; 110 111 /* 112 * Find hash base, and then search the (two-way) linked 113 * list looking for a null_node structure which is referencing 114 * the lower vnode. If found, the increment the null_node 115 * reference count (but NOT the lower vnode's VREF counter). 116 */ 117 hd = NULL_NHASH(lowervp); 118 loop: 119 mtx_lock(&null_hashmtx); 120 LIST_FOREACH(a, hd, null_hash) { 121 if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { 122 vp = NULLTOV(a); 123 mtx_lock(&vp->v_interlock); 124 /* 125 * Don't block if nullfs vnode is being recycled. 126 * We already hold a lock on the lower vnode, thus 127 * waiting might deadlock against the thread 128 * recycling the nullfs vnode or another thread 129 * in vrele() waiting for the vnode lock. 130 */ 131 if ((vp->v_iflag & VI_XLOCK) != 0) { 132 VI_UNLOCK(vp); 133 continue; 134 } 135 mtx_unlock(&null_hashmtx); 136 /* 137 * We need vget for the VXLOCK 138 * stuff, but we don't want to lock 139 * the lower node. 140 */ 141 if (vget(vp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 142 goto loop; 143 144 return (vp); 145 } 146 } 147 mtx_unlock(&null_hashmtx); 148 return (NULLVP); 149 } 150 151 /* 152 * Act like null_hashget, but add passed null_node to hash if no existing 153 * node found. 154 */ 155 static struct vnode * 156 null_hashins(mp, xp) 157 struct mount *mp; 158 struct null_node *xp; 159 { 160 struct thread *td = curthread; /* XXX */ 161 struct null_node_hashhead *hd; 162 struct null_node *oxp; 163 struct vnode *ovp; 164 165 hd = NULL_NHASH(xp->null_lowervp); 166 loop: 167 mtx_lock(&null_hashmtx); 168 LIST_FOREACH(oxp, hd, null_hash) { 169 if (oxp->null_lowervp == xp->null_lowervp && 170 NULLTOV(oxp)->v_mount == mp) { 171 ovp = NULLTOV(oxp); 172 mtx_lock(&ovp->v_interlock); 173 /* 174 * Don't block if nullfs vnode is being recycled. 175 * We already hold a lock on the lower vnode, thus 176 * waiting might deadlock against the thread 177 * recycling the nullfs vnode or another thread 178 * in vrele() waiting for the vnode lock. 179 */ 180 if ((ovp->v_iflag & VI_XLOCK) != 0) { 181 VI_UNLOCK(ovp); 182 continue; 183 } 184 mtx_unlock(&null_hashmtx); 185 if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 186 goto loop; 187 188 return (ovp); 189 } 190 } 191 LIST_INSERT_HEAD(hd, xp, null_hash); 192 mtx_unlock(&null_hashmtx); 193 return (NULLVP); 194 } 195 196 /* 197 * Make a new or get existing nullfs node. 198 * Vp is the alias vnode, lowervp is the lower vnode. 199 * 200 * The lowervp assumed to be locked and having "spare" reference. This routine 201 * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers" 202 * the caller's "spare" reference to created nullfs vnode. 203 */ 204 int 205 null_nodeget(mp, lowervp, vpp) 206 struct mount *mp; 207 struct vnode *lowervp; 208 struct vnode **vpp; 209 { 210 struct thread *td = curthread; /* XXX */ 211 struct null_node *xp; 212 struct vnode *vp; 213 int error; 214 215 /* Lookup the hash firstly */ 216 *vpp = null_hashget(mp, lowervp); 217 if (*vpp != NULL) { 218 vrele(lowervp); 219 return (0); 220 } 221 222 /* 223 * We do not serialize vnode creation, instead we will check for 224 * duplicates later, when adding new vnode to hash. 225 * 226 * Note that duplicate can only appear in hash if the lowervp is 227 * locked LK_SHARED. 228 */ 229 230 /* 231 * Do the MALLOC before the getnewvnode since doing so afterward 232 * might cause a bogus v_data pointer to get dereferenced 233 * elsewhere if MALLOC should block. 234 */ 235 MALLOC(xp, struct null_node *, sizeof(struct null_node), 236 M_NULLFSNODE, M_WAITOK); 237 238 error = getnewvnode("null", mp, null_vnodeop_p, &vp); 239 if (error) { 240 FREE(xp, M_NULLFSNODE); 241 return (error); 242 } 243 244 xp->null_vnode = vp; 245 xp->null_lowervp = lowervp; 246 xp->null_pending_locks = 0; 247 xp->null_drain_wakeup = 0; 248 249 vp->v_type = lowervp->v_type; 250 vp->v_data = xp; 251 252 /* 253 * From NetBSD: 254 * Now lock the new node. We rely on the fact that we were passed 255 * a locked vnode. If the lower node is exporting a struct lock 256 * (v_vnlock != NULL) then we just set the upper v_vnlock to the 257 * lower one, and both are now locked. If the lower node is exporting 258 * NULL, then we copy that up and manually lock the new vnode. 259 */ 260 261 vp->v_vnlock = lowervp->v_vnlock; 262 error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td); 263 if (error) 264 panic("null_nodeget: can't lock new vnode\n"); 265 266 /* 267 * Atomically insert our new node into the hash or vget existing 268 * if someone else has beaten us to it. 269 */ 270 *vpp = null_hashins(mp, xp); 271 if (*vpp != NULL) { 272 vrele(lowervp); 273 VOP_UNLOCK(vp, LK_THISLAYER, td); 274 vp->v_vnlock = NULL; 275 xp->null_lowervp = NULL; 276 vrele(vp); 277 return (0); 278 } 279 280 /* 281 * XXX We take extra vref just to workaround UFS's XXX: 282 * UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this 283 * can only happen if v_usecount == 1. To workaround, we just don't 284 * let v_usecount be 1, it will be 2 or more. 285 */ 286 VREF(lowervp); 287 288 *vpp = vp; 289 290 return (0); 291 } 292 293 /* 294 * Remove node from hash. 295 */ 296 void 297 null_hashrem(xp) 298 struct null_node *xp; 299 { 300 301 mtx_lock(&null_hashmtx); 302 LIST_REMOVE(xp, null_hash); 303 mtx_unlock(&null_hashmtx); 304 } 305 306 #ifdef DIAGNOSTIC 307 308 #ifdef KDB 309 #define null_checkvp_barrier 1 310 #else 311 #define null_checkvp_barrier 0 312 #endif 313 314 struct vnode * 315 null_checkvp(vp, fil, lno) 316 struct vnode *vp; 317 char *fil; 318 int lno; 319 { 320 struct null_node *a = VTONULL(vp); 321 #ifdef notyet 322 /* 323 * Can't do this check because vop_reclaim runs 324 * with a funny vop vector. 325 */ 326 if (vp->v_op != null_vnodeop_p) { 327 printf ("null_checkvp: on non-null-node\n"); 328 while (null_checkvp_barrier) /*WAIT*/ ; 329 panic("null_checkvp"); 330 }; 331 #endif 332 if (a->null_lowervp == NULLVP) { 333 /* Should never happen */ 334 int i; u_long *p; 335 printf("vp = %p, ZERO ptr\n", (void *)vp); 336 for (p = (u_long *) a, i = 0; i < 8; i++) 337 printf(" %lx", p[i]); 338 printf("\n"); 339 /* wait for debugger */ 340 while (null_checkvp_barrier) /*WAIT*/ ; 341 panic("null_checkvp"); 342 } 343 if (vrefcnt(a->null_lowervp) < 1) { 344 int i; u_long *p; 345 printf("vp = %p, unref'ed lowervp\n", (void *)vp); 346 for (p = (u_long *) a, i = 0; i < 8; i++) 347 printf(" %lx", p[i]); 348 printf("\n"); 349 /* wait for debugger */ 350 while (null_checkvp_barrier) /*WAIT*/ ; 351 panic ("null with unref'ed lowervp"); 352 }; 353 #ifdef notyet 354 printf("null %x/%d -> %x/%d [%s, %d]\n", 355 NULLTOV(a), vrefcnt(NULLTOV(a)), 356 a->null_lowervp, vrefcnt(a->null_lowervp), 357 fil, lno); 358 #endif 359 return a->null_lowervp; 360 } 361 #endif 362