1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software donated to Berkeley by 6 * Jan-Simon Pendry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 37 * 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/proc.h> 49 #include <sys/vnode.h> 50 51 #include <fs/nullfs/null.h> 52 53 #define LOG2_SIZEVNODE 8 /* log2(sizeof struct vnode) */ 54 #define NNULLNODECACHE 16 55 56 /* 57 * Null layer cache: 58 * Each cache entry holds a reference to the lower vnode 59 * along with a pointer to the alias vnode. When an 60 * entry is added the lower vnode is VREF'd. When the 61 * alias is removed the lower vnode is vrele'd. 62 */ 63 64 #define NULL_NHASH(vp) \ 65 (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) 66 67 static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; 68 static u_long null_node_hash; 69 struct mtx null_hashmtx; 70 71 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table"); 72 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part"); 73 74 static struct vnode * null_hashget(struct mount *, struct vnode *); 75 static struct vnode * null_hashins(struct mount *, struct null_node *); 76 77 /* 78 * Initialise cache headers 79 */ 80 int 81 nullfs_init(vfsp) 82 struct vfsconf *vfsp; 83 { 84 85 NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */ 86 null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash); 87 mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF); 88 return (0); 89 } 90 91 int 92 nullfs_uninit(vfsp) 93 struct vfsconf *vfsp; 94 { 95 96 mtx_destroy(&null_hashmtx); 97 free(null_node_hashtbl, M_NULLFSHASH); 98 return (0); 99 } 100 101 /* 102 * Return a VREF'ed alias for lower vnode if already exists, else 0. 103 * Lower vnode should be locked on entry and will be left locked on exit. 104 */ 105 static struct vnode * 106 null_hashget(mp, lowervp) 107 struct mount *mp; 108 struct vnode *lowervp; 109 { 110 struct thread *td = curthread; /* XXX */ 111 struct null_node_hashhead *hd; 112 struct null_node *a; 113 struct vnode *vp; 114 115 /* 116 * Find hash base, and then search the (two-way) linked 117 * list looking for a null_node structure which is referencing 118 * the lower vnode. If found, the increment the null_node 119 * reference count (but NOT the lower vnode's VREF counter). 120 */ 121 hd = NULL_NHASH(lowervp); 122 loop: 123 mtx_lock(&null_hashmtx); 124 LIST_FOREACH(a, hd, null_hash) { 125 if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { 126 vp = NULLTOV(a); 127 mtx_lock(&vp->v_interlock); 128 /* 129 * Don't block if nullfs vnode is being recycled. 130 * We already hold a lock on the lower vnode, thus 131 * waiting might deadlock against the thread 132 * recycling the nullfs vnode or another thread 133 * in vrele() waiting for the vnode lock. 134 */ 135 if ((vp->v_iflag & VI_XLOCK) != 0) { 136 VI_UNLOCK(vp); 137 continue; 138 } 139 mtx_unlock(&null_hashmtx); 140 /* 141 * We need vget for the VXLOCK 142 * stuff, but we don't want to lock 143 * the lower node. 144 */ 145 if (vget(vp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 146 goto loop; 147 148 return (vp); 149 } 150 } 151 mtx_unlock(&null_hashmtx); 152 return (NULLVP); 153 } 154 155 /* 156 * Act like null_hashget, but add passed null_node to hash if no existing 157 * node found. 158 */ 159 static struct vnode * 160 null_hashins(mp, xp) 161 struct mount *mp; 162 struct null_node *xp; 163 { 164 struct thread *td = curthread; /* XXX */ 165 struct null_node_hashhead *hd; 166 struct null_node *oxp; 167 struct vnode *ovp; 168 169 hd = NULL_NHASH(xp->null_lowervp); 170 loop: 171 mtx_lock(&null_hashmtx); 172 LIST_FOREACH(oxp, hd, null_hash) { 173 if (oxp->null_lowervp == xp->null_lowervp && 174 NULLTOV(oxp)->v_mount == mp) { 175 ovp = NULLTOV(oxp); 176 mtx_lock(&ovp->v_interlock); 177 /* 178 * Don't block if nullfs vnode is being recycled. 179 * We already hold a lock on the lower vnode, thus 180 * waiting might deadlock against the thread 181 * recycling the nullfs vnode or another thread 182 * in vrele() waiting for the vnode lock. 183 */ 184 if ((ovp->v_iflag & VI_XLOCK) != 0) { 185 VI_UNLOCK(ovp); 186 continue; 187 } 188 mtx_unlock(&null_hashmtx); 189 if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 190 goto loop; 191 192 return (ovp); 193 } 194 } 195 LIST_INSERT_HEAD(hd, xp, null_hash); 196 mtx_unlock(&null_hashmtx); 197 return (NULLVP); 198 } 199 200 /* 201 * Make a new or get existing nullfs node. 202 * Vp is the alias vnode, lowervp is the lower vnode. 203 * 204 * The lowervp assumed to be locked and having "spare" reference. This routine 205 * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers" 206 * the caller's "spare" reference to created nullfs vnode. 207 */ 208 int 209 null_nodeget(mp, lowervp, vpp) 210 struct mount *mp; 211 struct vnode *lowervp; 212 struct vnode **vpp; 213 { 214 struct thread *td = curthread; /* XXX */ 215 struct null_node *xp; 216 struct vnode *vp; 217 int error; 218 219 /* Lookup the hash firstly */ 220 *vpp = null_hashget(mp, lowervp); 221 if (*vpp != NULL) { 222 vrele(lowervp); 223 return (0); 224 } 225 226 /* 227 * We do not serialize vnode creation, instead we will check for 228 * duplicates later, when adding new vnode to hash. 229 * 230 * Note that duplicate can only appear in hash if the lowervp is 231 * locked LK_SHARED. 232 */ 233 234 /* 235 * Do the MALLOC before the getnewvnode since doing so afterward 236 * might cause a bogus v_data pointer to get dereferenced 237 * elsewhere if MALLOC should block. 238 */ 239 MALLOC(xp, struct null_node *, sizeof(struct null_node), 240 M_NULLFSNODE, M_WAITOK); 241 242 error = getnewvnode("null", mp, null_vnodeop_p, &vp); 243 if (error) { 244 FREE(xp, M_NULLFSNODE); 245 return (error); 246 } 247 248 xp->null_vnode = vp; 249 xp->null_lowervp = lowervp; 250 xp->null_pending_locks = 0; 251 xp->null_drain_wakeup = 0; 252 253 vp->v_type = lowervp->v_type; 254 vp->v_data = xp; 255 256 /* 257 * From NetBSD: 258 * Now lock the new node. We rely on the fact that we were passed 259 * a locked vnode. If the lower node is exporting a struct lock 260 * (v_vnlock != NULL) then we just set the upper v_vnlock to the 261 * lower one, and both are now locked. If the lower node is exporting 262 * NULL, then we copy that up and manually lock the new vnode. 263 */ 264 265 vp->v_vnlock = lowervp->v_vnlock; 266 error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td); 267 if (error) 268 panic("null_nodeget: can't lock new vnode\n"); 269 270 /* 271 * Atomically insert our new node into the hash or vget existing 272 * if someone else has beaten us to it. 273 */ 274 *vpp = null_hashins(mp, xp); 275 if (*vpp != NULL) { 276 vrele(lowervp); 277 VOP_UNLOCK(vp, LK_THISLAYER, td); 278 vp->v_vnlock = NULL; 279 xp->null_lowervp = NULL; 280 vrele(vp); 281 return (0); 282 } 283 284 /* 285 * XXX We take extra vref just to workaround UFS's XXX: 286 * UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this 287 * can only happen if v_usecount == 1. To workaround, we just don't 288 * let v_usecount be 1, it will be 2 or more. 289 */ 290 VREF(lowervp); 291 292 *vpp = vp; 293 294 return (0); 295 } 296 297 /* 298 * Remove node from hash. 299 */ 300 void 301 null_hashrem(xp) 302 struct null_node *xp; 303 { 304 305 mtx_lock(&null_hashmtx); 306 LIST_REMOVE(xp, null_hash); 307 mtx_unlock(&null_hashmtx); 308 } 309 310 #ifdef DIAGNOSTIC 311 #include "opt_ddb.h" 312 313 #ifdef DDB 314 #define null_checkvp_barrier 1 315 #else 316 #define null_checkvp_barrier 0 317 #endif 318 319 struct vnode * 320 null_checkvp(vp, fil, lno) 321 struct vnode *vp; 322 char *fil; 323 int lno; 324 { 325 struct null_node *a = VTONULL(vp); 326 #ifdef notyet 327 /* 328 * Can't do this check because vop_reclaim runs 329 * with a funny vop vector. 330 */ 331 if (vp->v_op != null_vnodeop_p) { 332 printf ("null_checkvp: on non-null-node\n"); 333 while (null_checkvp_barrier) /*WAIT*/ ; 334 panic("null_checkvp"); 335 }; 336 #endif 337 if (a->null_lowervp == NULLVP) { 338 /* Should never happen */ 339 int i; u_long *p; 340 printf("vp = %p, ZERO ptr\n", (void *)vp); 341 for (p = (u_long *) a, i = 0; i < 8; i++) 342 printf(" %lx", p[i]); 343 printf("\n"); 344 /* wait for debugger */ 345 while (null_checkvp_barrier) /*WAIT*/ ; 346 panic("null_checkvp"); 347 } 348 if (vrefcnt(a->null_lowervp) < 1) { 349 int i; u_long *p; 350 printf("vp = %p, unref'ed lowervp\n", (void *)vp); 351 for (p = (u_long *) a, i = 0; i < 8; i++) 352 printf(" %lx", p[i]); 353 printf("\n"); 354 /* wait for debugger */ 355 while (null_checkvp_barrier) /*WAIT*/ ; 356 panic ("null with unref'ed lowervp"); 357 }; 358 #ifdef notyet 359 printf("null %x/%d -> %x/%d [%s, %d]\n", 360 NULLTOV(a), vrefcnt(NULLTOV(a)), 361 a->null_lowervp, vrefcnt(a->null_lowervp), 362 fil, lno); 363 #endif 364 return a->null_lowervp; 365 } 366 #endif 367