xref: /freebsd/sys/fs/nullfs/null_subr.c (revision 4c65d593e20994b1ccad2c0bb6f6e1d6b52e2cd3)
1d167cf6fSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1992, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * This code is derived from software donated to Berkeley by
6df8bae1dSRodney W. Grimes  * Jan-Simon Pendry.
7df8bae1dSRodney W. Grimes  *
8df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
9df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
10df8bae1dSRodney W. Grimes  * are met:
11df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
12df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
13df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
15df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
16df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
17df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
18df8bae1dSRodney W. Grimes  *    without specific prior written permission.
19df8bae1dSRodney W. Grimes  *
20df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
31df8bae1dSRodney W. Grimes  *
32996c772fSJohn Dyson  *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
33df8bae1dSRodney W. Grimes  *
34c3aac50fSPeter Wemm  * $FreeBSD$
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37df8bae1dSRodney W. Grimes #include <sys/param.h>
38df8bae1dSRodney W. Grimes #include <sys/systm.h>
398da80660SBoris Popov #include <sys/kernel.h>
40fb919e4dSMark Murray #include <sys/lock.h>
4115420031SSemen Ustimenko #include <sys/mutex.h>
42df8bae1dSRodney W. Grimes #include <sys/malloc.h>
43fb919e4dSMark Murray #include <sys/mount.h>
44fb919e4dSMark Murray #include <sys/proc.h>
45fb919e4dSMark Murray #include <sys/vnode.h>
46fb919e4dSMark Murray 
4799d300a1SRuslan Ermilov #include <fs/nullfs/null.h>
48df8bae1dSRodney W. Grimes 
4954939875STim J. Robbins #define LOG2_SIZEVNODE 8		/* log2(sizeof struct vnode) */
50df8bae1dSRodney W. Grimes #define	NNULLNODECACHE 16
51df8bae1dSRodney W. Grimes 
52df8bae1dSRodney W. Grimes /*
53df8bae1dSRodney W. Grimes  * Null layer cache:
54df8bae1dSRodney W. Grimes  * Each cache entry holds a reference to the lower vnode
55df8bae1dSRodney W. Grimes  * along with a pointer to the alias vnode.  When an
56df8bae1dSRodney W. Grimes  * entry is added the lower vnode is VREF'd.  When the
57df8bae1dSRodney W. Grimes  * alias is removed the lower vnode is vrele'd.
58df8bae1dSRodney W. Grimes  */
59df8bae1dSRodney W. Grimes 
60996c772fSJohn Dyson #define	NULL_NHASH(vp) \
61a23d65bfSBruce Evans 	(&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
628da80660SBoris Popov 
63e3975643SJake Burkholder static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
64303b270bSEivind Eklund static u_long null_node_hash;
6515420031SSemen Ustimenko struct mtx null_hashmtx;
668da80660SBoris Popov 
675bb84bc8SRobert Watson static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
685bb84bc8SRobert Watson MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
69df8bae1dSRodney W. Grimes 
7054939875STim J. Robbins static struct vnode * null_hashget(struct mount *, struct vnode *);
7154939875STim J. Robbins static struct vnode * null_hashins(struct mount *, struct null_node *);
729b5e8b3aSBruce Evans 
73df8bae1dSRodney W. Grimes /*
74df8bae1dSRodney W. Grimes  * Initialise cache headers
75df8bae1dSRodney W. Grimes  */
7626f9a767SRodney W. Grimes int
77996c772fSJohn Dyson nullfs_init(vfsp)
78996c772fSJohn Dyson 	struct vfsconf *vfsp;
79df8bae1dSRodney W. Grimes {
80996c772fSJohn Dyson 
818da80660SBoris Popov 	NULLFSDEBUG("nullfs_init\n");		/* printed during system boot */
828da80660SBoris Popov 	null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash);
8315420031SSemen Ustimenko 	mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF);
848da80660SBoris Popov 	return (0);
858da80660SBoris Popov }
868da80660SBoris Popov 
878da80660SBoris Popov int
888da80660SBoris Popov nullfs_uninit(vfsp)
898da80660SBoris Popov 	struct vfsconf *vfsp;
908da80660SBoris Popov {
918da80660SBoris Popov 
9215420031SSemen Ustimenko 	mtx_destroy(&null_hashmtx);
938da80660SBoris Popov 	free(null_node_hashtbl, M_NULLFSHASH);
9426f9a767SRodney W. Grimes 	return (0);
95df8bae1dSRodney W. Grimes }
96df8bae1dSRodney W. Grimes 
97df8bae1dSRodney W. Grimes /*
98df8bae1dSRodney W. Grimes  * Return a VREF'ed alias for lower vnode if already exists, else 0.
994451405fSBoris Popov  * Lower vnode should be locked on entry and will be left locked on exit.
100df8bae1dSRodney W. Grimes  */
101df8bae1dSRodney W. Grimes static struct vnode *
10254939875STim J. Robbins null_hashget(mp, lowervp)
10354939875STim J. Robbins 	struct mount *mp;
104df8bae1dSRodney W. Grimes 	struct vnode *lowervp;
105df8bae1dSRodney W. Grimes {
106996c772fSJohn Dyson 	struct null_node_hashhead *hd;
107df8bae1dSRodney W. Grimes 	struct null_node *a;
108df8bae1dSRodney W. Grimes 	struct vnode *vp;
1099c12e631SJeff Roberson 
1109c12e631SJeff Roberson 	ASSERT_VOP_LOCKED(lowervp, "null_hashget");
111df8bae1dSRodney W. Grimes 
112df8bae1dSRodney W. Grimes 	/*
113df8bae1dSRodney W. Grimes 	 * Find hash base, and then search the (two-way) linked
114df8bae1dSRodney W. Grimes 	 * list looking for a null_node structure which is referencing
115df8bae1dSRodney W. Grimes 	 * the lower vnode.  If found, the increment the null_node
116df8bae1dSRodney W. Grimes 	 * reference count (but NOT the lower vnode's VREF counter).
117df8bae1dSRodney W. Grimes 	 */
118996c772fSJohn Dyson 	hd = NULL_NHASH(lowervp);
11915420031SSemen Ustimenko 	mtx_lock(&null_hashmtx);
120fc2ffbe6SPoul-Henning Kamp 	LIST_FOREACH(a, hd, null_hash) {
12154939875STim J. Robbins 		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
1229c12e631SJeff Roberson 			/*
1239c12e631SJeff Roberson 			 * Since we have the lower node locked the nullfs
1249c12e631SJeff Roberson 			 * node can not be in the process of recycling.  If
1259c12e631SJeff Roberson 			 * it had been recycled before we grabed the lower
1269c12e631SJeff Roberson 			 * lock it would not have been found on the hash.
1279c12e631SJeff Roberson 			 */
1284c65d593SJeff Roberson 			vp = NULLTOV(a);
1294c65d593SJeff Roberson 			vref(vp);
1304c65d593SJeff Roberson 			mtx_unlock(&null_hashmtx);
131df8bae1dSRodney W. Grimes 			return (vp);
132df8bae1dSRodney W. Grimes 		}
133df8bae1dSRodney W. Grimes 	}
13415420031SSemen Ustimenko 	mtx_unlock(&null_hashmtx);
1351cfdefbbSSemen Ustimenko 	return (NULLVP);
136df8bae1dSRodney W. Grimes }
137df8bae1dSRodney W. Grimes 
1381cfdefbbSSemen Ustimenko /*
1391cfdefbbSSemen Ustimenko  * Act like null_hashget, but add passed null_node to hash if no existing
1401cfdefbbSSemen Ustimenko  * node found.
1411cfdefbbSSemen Ustimenko  */
1421cfdefbbSSemen Ustimenko static struct vnode *
14354939875STim J. Robbins null_hashins(mp, xp)
14454939875STim J. Robbins 	struct mount *mp;
1451cfdefbbSSemen Ustimenko 	struct null_node *xp;
1461cfdefbbSSemen Ustimenko {
1471cfdefbbSSemen Ustimenko 	struct null_node_hashhead *hd;
1481cfdefbbSSemen Ustimenko 	struct null_node *oxp;
1491cfdefbbSSemen Ustimenko 	struct vnode *ovp;
1501cfdefbbSSemen Ustimenko 
1511cfdefbbSSemen Ustimenko 	hd = NULL_NHASH(xp->null_lowervp);
1521cfdefbbSSemen Ustimenko 	mtx_lock(&null_hashmtx);
1531cfdefbbSSemen Ustimenko 	LIST_FOREACH(oxp, hd, null_hash) {
15454939875STim J. Robbins 		if (oxp->null_lowervp == xp->null_lowervp &&
15554939875STim J. Robbins 		    NULLTOV(oxp)->v_mount == mp) {
1569c12e631SJeff Roberson 			/*
1579c12e631SJeff Roberson 			 * See null_hashget for a description of this
1589c12e631SJeff Roberson 			 * operation.
1599c12e631SJeff Roberson 			 */
1601cfdefbbSSemen Ustimenko 			ovp = NULLTOV(oxp);
1614c65d593SJeff Roberson 			vref(ovp);
1621cfdefbbSSemen Ustimenko 			mtx_unlock(&null_hashmtx);
1631cfdefbbSSemen Ustimenko 			return (ovp);
1641cfdefbbSSemen Ustimenko 		}
1651cfdefbbSSemen Ustimenko 	}
1661cfdefbbSSemen Ustimenko 	LIST_INSERT_HEAD(hd, xp, null_hash);
1671cfdefbbSSemen Ustimenko 	mtx_unlock(&null_hashmtx);
1681cfdefbbSSemen Ustimenko 	return (NULLVP);
1691cfdefbbSSemen Ustimenko }
170df8bae1dSRodney W. Grimes 
17161b9d89fSTor Egge static void
17261b9d89fSTor Egge null_insmntque_dtr(struct vnode *vp, void *xp)
17361b9d89fSTor Egge {
17461b9d89fSTor Egge 	vp->v_data = NULL;
17561b9d89fSTor Egge 	vp->v_vnlock = &vp->v_lock;
17661b9d89fSTor Egge 	FREE(xp, M_NULLFSNODE);
17761b9d89fSTor Egge 	vp->v_op = &dead_vnodeops;
178cb05b60aSAttilio Rao 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
17961b9d89fSTor Egge 	vgone(vp);
18061b9d89fSTor Egge 	vput(vp);
18161b9d89fSTor Egge }
18261b9d89fSTor Egge 
183df8bae1dSRodney W. Grimes /*
1841cfdefbbSSemen Ustimenko  * Make a new or get existing nullfs node.
1851cfdefbbSSemen Ustimenko  * Vp is the alias vnode, lowervp is the lower vnode.
1861cfdefbbSSemen Ustimenko  *
1871cfdefbbSSemen Ustimenko  * The lowervp assumed to be locked and having "spare" reference. This routine
1881cfdefbbSSemen Ustimenko  * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers"
1891cfdefbbSSemen Ustimenko  * the caller's "spare" reference to created nullfs vnode.
190df8bae1dSRodney W. Grimes  */
1911cfdefbbSSemen Ustimenko int
1921cfdefbbSSemen Ustimenko null_nodeget(mp, lowervp, vpp)
193df8bae1dSRodney W. Grimes 	struct mount *mp;
194df8bae1dSRodney W. Grimes 	struct vnode *lowervp;
195df8bae1dSRodney W. Grimes 	struct vnode **vpp;
196df8bae1dSRodney W. Grimes {
197df8bae1dSRodney W. Grimes 	struct null_node *xp;
1981cfdefbbSSemen Ustimenko 	struct vnode *vp;
199df8bae1dSRodney W. Grimes 	int error;
200df8bae1dSRodney W. Grimes 
2011cfdefbbSSemen Ustimenko 	/* Lookup the hash firstly */
20254939875STim J. Robbins 	*vpp = null_hashget(mp, lowervp);
2031cfdefbbSSemen Ustimenko 	if (*vpp != NULL) {
2041cfdefbbSSemen Ustimenko 		vrele(lowervp);
2051cfdefbbSSemen Ustimenko 		return (0);
2061cfdefbbSSemen Ustimenko 	}
2071cfdefbbSSemen Ustimenko 
2081cfdefbbSSemen Ustimenko 	/*
2091cfdefbbSSemen Ustimenko 	 * We do not serialize vnode creation, instead we will check for
2101cfdefbbSSemen Ustimenko 	 * duplicates later, when adding new vnode to hash.
2111cfdefbbSSemen Ustimenko 	 *
2121cfdefbbSSemen Ustimenko 	 * Note that duplicate can only appear in hash if the lowervp is
2131cfdefbbSSemen Ustimenko 	 * locked LK_SHARED.
2141cfdefbbSSemen Ustimenko 	 */
2151cfdefbbSSemen Ustimenko 
2162f9bae59SDavid Greenman 	/*
2172f9bae59SDavid Greenman 	 * Do the MALLOC before the getnewvnode since doing so afterward
2182f9bae59SDavid Greenman 	 * might cause a bogus v_data pointer to get dereferenced
2192f9bae59SDavid Greenman 	 * elsewhere if MALLOC should block.
2202f9bae59SDavid Greenman 	 */
2218da80660SBoris Popov 	MALLOC(xp, struct null_node *, sizeof(struct null_node),
222a163d034SWarner Losh 	    M_NULLFSNODE, M_WAITOK);
2232f9bae59SDavid Greenman 
224aec0fb7bSPoul-Henning Kamp 	error = getnewvnode("null", mp, &null_vnodeops, &vp);
2252f9bae59SDavid Greenman 	if (error) {
2268da80660SBoris Popov 		FREE(xp, M_NULLFSNODE);
227df8bae1dSRodney W. Grimes 		return (error);
2282f9bae59SDavid Greenman 	}
229df8bae1dSRodney W. Grimes 
230df8bae1dSRodney W. Grimes 	xp->null_vnode = vp;
231df8bae1dSRodney W. Grimes 	xp->null_lowervp = lowervp;
23208720e34SSemen Ustimenko 	vp->v_type = lowervp->v_type;
23308720e34SSemen Ustimenko 	vp->v_data = xp;
2344451405fSBoris Popov 	vp->v_vnlock = lowervp->v_vnlock;
235bc855512SJeff Roberson 	if (vp->v_vnlock == NULL)
236bc855512SJeff Roberson 		panic("null_nodeget: Passed a NULL vnlock.\n");
23761b9d89fSTor Egge 	error = insmntque1(vp, mp, null_insmntque_dtr, xp);
23861b9d89fSTor Egge 	if (error != 0)
23961b9d89fSTor Egge 		return (error);
240df8bae1dSRodney W. Grimes 	/*
2411cfdefbbSSemen Ustimenko 	 * Atomically insert our new node into the hash or vget existing
2421cfdefbbSSemen Ustimenko 	 * if someone else has beaten us to it.
243df8bae1dSRodney W. Grimes 	 */
24454939875STim J. Robbins 	*vpp = null_hashins(mp, xp);
2451cfdefbbSSemen Ustimenko 	if (*vpp != NULL) {
2464451405fSBoris Popov 		vrele(lowervp);
247bc855512SJeff Roberson 		vp->v_vnlock = &vp->v_lock;
2481cfdefbbSSemen Ustimenko 		xp->null_lowervp = NULL;
2491cfdefbbSSemen Ustimenko 		vrele(vp);
250df8bae1dSRodney W. Grimes 		return (0);
251df8bae1dSRodney W. Grimes 	}
2521cfdefbbSSemen Ustimenko 	*vpp = vp;
2531cfdefbbSSemen Ustimenko 
2541cfdefbbSSemen Ustimenko 	return (0);
2551cfdefbbSSemen Ustimenko }
2561cfdefbbSSemen Ustimenko 
2571cfdefbbSSemen Ustimenko /*
2581cfdefbbSSemen Ustimenko  * Remove node from hash.
2591cfdefbbSSemen Ustimenko  */
26008720e34SSemen Ustimenko void
26108720e34SSemen Ustimenko null_hashrem(xp)
26208720e34SSemen Ustimenko 	struct null_node *xp;
26308720e34SSemen Ustimenko {
26408720e34SSemen Ustimenko 
26515420031SSemen Ustimenko 	mtx_lock(&null_hashmtx);
26608720e34SSemen Ustimenko 	LIST_REMOVE(xp, null_hash);
26715420031SSemen Ustimenko 	mtx_unlock(&null_hashmtx);
26808720e34SSemen Ustimenko }
26908720e34SSemen Ustimenko 
270a0f40f54SBruce Evans #ifdef DIAGNOSTIC
2711bf978ceSKATO Takenori 
2724ea4f1f9SMarcel Moolenaar #ifdef KDB
273e958d078SKATO Takenori #define	null_checkvp_barrier	1
274e958d078SKATO Takenori #else
275e958d078SKATO Takenori #define	null_checkvp_barrier	0
276e958d078SKATO Takenori #endif
277e958d078SKATO Takenori 
278df8bae1dSRodney W. Grimes struct vnode *
279df8bae1dSRodney W. Grimes null_checkvp(vp, fil, lno)
280df8bae1dSRodney W. Grimes 	struct vnode *vp;
281df8bae1dSRodney W. Grimes 	char *fil;
282df8bae1dSRodney W. Grimes 	int lno;
283df8bae1dSRodney W. Grimes {
28410166260SDaichi GOTO 	int interlock = 0;
285df8bae1dSRodney W. Grimes 	struct null_node *a = VTONULL(vp);
286df8bae1dSRodney W. Grimes #ifdef notyet
287df8bae1dSRodney W. Grimes 	/*
288df8bae1dSRodney W. Grimes 	 * Can't do this check because vop_reclaim runs
289df8bae1dSRodney W. Grimes 	 * with a funny vop vector.
290df8bae1dSRodney W. Grimes 	 */
291df8bae1dSRodney W. Grimes 	if (vp->v_op != null_vnodeop_p) {
292df8bae1dSRodney W. Grimes 		printf ("null_checkvp: on non-null-node\n");
293df8bae1dSRodney W. Grimes 		while (null_checkvp_barrier) /*WAIT*/ ;
294df8bae1dSRodney W. Grimes 		panic("null_checkvp");
295df8bae1dSRodney W. Grimes 	};
296df8bae1dSRodney W. Grimes #endif
297c5e17d9eSKATO Takenori 	if (a->null_lowervp == NULLVP) {
298df8bae1dSRodney W. Grimes 		/* Should never happen */
299df8bae1dSRodney W. Grimes 		int i; u_long *p;
30089785a16SBruce Evans 		printf("vp = %p, ZERO ptr\n", (void *)vp);
301df8bae1dSRodney W. Grimes 		for (p = (u_long *) a, i = 0; i < 8; i++)
30289785a16SBruce Evans 			printf(" %lx", p[i]);
303df8bae1dSRodney W. Grimes 		printf("\n");
304df8bae1dSRodney W. Grimes 		/* wait for debugger */
305df8bae1dSRodney W. Grimes 		while (null_checkvp_barrier) /*WAIT*/ ;
306df8bae1dSRodney W. Grimes 		panic("null_checkvp");
307df8bae1dSRodney W. Grimes 	}
30810166260SDaichi GOTO 	if (mtx_owned(VI_MTX(vp)) != 0) {
30910166260SDaichi GOTO 		VI_UNLOCK(vp);
31010166260SDaichi GOTO 		interlock = 1;
31110166260SDaichi GOTO 	}
3124d93c0beSJeff Roberson 	if (vrefcnt(a->null_lowervp) < 1) {
313df8bae1dSRodney W. Grimes 		int i; u_long *p;
31489785a16SBruce Evans 		printf("vp = %p, unref'ed lowervp\n", (void *)vp);
315df8bae1dSRodney W. Grimes 		for (p = (u_long *) a, i = 0; i < 8; i++)
31689785a16SBruce Evans 			printf(" %lx", p[i]);
317df8bae1dSRodney W. Grimes 		printf("\n");
318df8bae1dSRodney W. Grimes 		/* wait for debugger */
319df8bae1dSRodney W. Grimes 		while (null_checkvp_barrier) /*WAIT*/ ;
320df8bae1dSRodney W. Grimes 		panic ("null with unref'ed lowervp");
321df8bae1dSRodney W. Grimes 	};
32210166260SDaichi GOTO 	if (interlock != 0)
32310166260SDaichi GOTO 		VI_LOCK(vp);
324df8bae1dSRodney W. Grimes #ifdef notyet
325df8bae1dSRodney W. Grimes 	printf("null %x/%d -> %x/%d [%s, %d]\n",
3264d93c0beSJeff Roberson 	        NULLTOV(a), vrefcnt(NULLTOV(a)),
3274d93c0beSJeff Roberson 		a->null_lowervp, vrefcnt(a->null_lowervp),
328df8bae1dSRodney W. Grimes 		fil, lno);
329df8bae1dSRodney W. Grimes #endif
330df8bae1dSRodney W. Grimes 	return a->null_lowervp;
331df8bae1dSRodney W. Grimes }
332df8bae1dSRodney W. Grimes #endif
333