xref: /freebsd/sys/fs/nullfs/null_subr.c (revision 29363fb446372cb3f10bc98664e9767c53fbb457)
1d167cf6fSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1992, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software donated to Berkeley by
8df8bae1dSRodney W. Grimes  * Jan-Simon Pendry.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  */
34df8bae1dSRodney W. Grimes 
35df8bae1dSRodney W. Grimes #include <sys/param.h>
36df8bae1dSRodney W. Grimes #include <sys/systm.h>
378da80660SBoris Popov #include <sys/kernel.h>
38fb919e4dSMark Murray #include <sys/lock.h>
39cd29b292SMateusz Guzik #include <sys/rwlock.h>
40df8bae1dSRodney W. Grimes #include <sys/malloc.h>
41fb919e4dSMark Murray #include <sys/mount.h>
42fb919e4dSMark Murray #include <sys/proc.h>
43fb919e4dSMark Murray #include <sys/vnode.h>
44fb919e4dSMark Murray 
4599d300a1SRuslan Ermilov #include <fs/nullfs/null.h>
46df8bae1dSRodney W. Grimes 
47df8bae1dSRodney W. Grimes /*
48df8bae1dSRodney W. Grimes  * Null layer cache:
49df8bae1dSRodney W. Grimes  * Each cache entry holds a reference to the lower vnode
50df8bae1dSRodney W. Grimes  * along with a pointer to the alias vnode.  When an
51df8bae1dSRodney W. Grimes  * entry is added the lower vnode is VREF'd.  When the
52df8bae1dSRodney W. Grimes  * alias is removed the lower vnode is vrele'd.
53df8bae1dSRodney W. Grimes  */
54df8bae1dSRodney W. Grimes 
55603f963eSKonstantin Belousov #define	NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
568da80660SBoris Popov 
57e3975643SJake Burkholder static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
58cd29b292SMateusz Guzik static struct rwlock null_hash_lock;
59603f963eSKonstantin Belousov static u_long null_hash_mask;
608da80660SBoris Popov 
615bb84bc8SRobert Watson static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
625bb84bc8SRobert Watson MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
63df8bae1dSRodney W. Grimes 
64aeabf8d4SMateusz Guzik static void null_hashins(struct mount *, struct null_node *);
659b5e8b3aSBruce Evans 
66df8bae1dSRodney W. Grimes /*
67df8bae1dSRodney W. Grimes  * Initialise cache headers
68df8bae1dSRodney W. Grimes  */
6926f9a767SRodney W. Grimes int
nullfs_init(struct vfsconf * vfsp)70d35991d3SMateusz Guzik nullfs_init(struct vfsconf *vfsp)
71df8bae1dSRodney W. Grimes {
72996c772fSJohn Dyson 
73603f963eSKonstantin Belousov 	null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
74603f963eSKonstantin Belousov 	    &null_hash_mask);
75cd29b292SMateusz Guzik 	rw_init(&null_hash_lock, "nullhs");
768da80660SBoris Popov 	return (0);
778da80660SBoris Popov }
788da80660SBoris Popov 
798da80660SBoris Popov int
nullfs_uninit(struct vfsconf * vfsp)80d35991d3SMateusz Guzik nullfs_uninit(struct vfsconf *vfsp)
818da80660SBoris Popov {
828da80660SBoris Popov 
83cd29b292SMateusz Guzik 	rw_destroy(&null_hash_lock);
84603f963eSKonstantin Belousov 	hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
8526f9a767SRodney W. Grimes 	return (0);
86df8bae1dSRodney W. Grimes }
87df8bae1dSRodney W. Grimes 
88df8bae1dSRodney W. Grimes /*
89df8bae1dSRodney W. Grimes  * Return a VREF'ed alias for lower vnode if already exists, else 0.
904451405fSBoris Popov  * Lower vnode should be locked on entry and will be left locked on exit.
91df8bae1dSRodney W. Grimes  */
92aeabf8d4SMateusz Guzik static struct vnode *
null_hashget_locked(struct mount * mp,struct vnode * lowervp)93aeabf8d4SMateusz Guzik null_hashget_locked(struct mount *mp, struct vnode *lowervp)
94df8bae1dSRodney W. Grimes {
95996c772fSJohn Dyson 	struct null_node_hashhead *hd;
96df8bae1dSRodney W. Grimes 	struct null_node *a;
97df8bae1dSRodney W. Grimes 	struct vnode *vp;
989c12e631SJeff Roberson 
999c12e631SJeff Roberson 	ASSERT_VOP_LOCKED(lowervp, "null_hashget");
100aeabf8d4SMateusz Guzik 	rw_assert(&null_hash_lock, RA_LOCKED);
101df8bae1dSRodney W. Grimes 
102df8bae1dSRodney W. Grimes 	/*
103df8bae1dSRodney W. Grimes 	 * Find hash base, and then search the (two-way) linked
104df8bae1dSRodney W. Grimes 	 * list looking for a null_node structure which is referencing
105df8bae1dSRodney W. Grimes 	 * the lower vnode.  If found, the increment the null_node
106df8bae1dSRodney W. Grimes 	 * reference count (but NOT the lower vnode's VREF counter).
107df8bae1dSRodney W. Grimes 	 */
108996c772fSJohn Dyson 	hd = NULL_NHASH(lowervp);
109fc2ffbe6SPoul-Henning Kamp 	LIST_FOREACH(a, hd, null_hash) {
11054939875STim J. Robbins 		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
1119c12e631SJeff Roberson 			/*
1129c12e631SJeff Roberson 			 * Since we have the lower node locked the nullfs
1139c12e631SJeff Roberson 			 * node can not be in the process of recycling.  If
1149c12e631SJeff Roberson 			 * it had been recycled before we grabed the lower
1159c12e631SJeff Roberson 			 * lock it would not have been found on the hash.
1169c12e631SJeff Roberson 			 */
1174c65d593SJeff Roberson 			vp = NULLTOV(a);
1184c65d593SJeff Roberson 			vref(vp);
119df8bae1dSRodney W. Grimes 			return (vp);
120df8bae1dSRodney W. Grimes 		}
121df8bae1dSRodney W. Grimes 	}
1221cfdefbbSSemen Ustimenko 	return (NULLVP);
123df8bae1dSRodney W. Grimes }
124df8bae1dSRodney W. Grimes 
125aeabf8d4SMateusz Guzik struct vnode *
null_hashget(struct mount * mp,struct vnode * lowervp)126aeabf8d4SMateusz Guzik null_hashget(struct mount *mp, struct vnode *lowervp)
127aeabf8d4SMateusz Guzik {
128aeabf8d4SMateusz Guzik 	struct null_node_hashhead *hd;
129aeabf8d4SMateusz Guzik 	struct vnode *vp;
130aeabf8d4SMateusz Guzik 
131aeabf8d4SMateusz Guzik 	hd = NULL_NHASH(lowervp);
132aeabf8d4SMateusz Guzik 	if (LIST_EMPTY(hd))
133aeabf8d4SMateusz Guzik 		return (NULLVP);
134aeabf8d4SMateusz Guzik 
135aeabf8d4SMateusz Guzik 	rw_rlock(&null_hash_lock);
136aeabf8d4SMateusz Guzik 	vp = null_hashget_locked(mp, lowervp);
137aeabf8d4SMateusz Guzik 	rw_runlock(&null_hash_lock);
138aeabf8d4SMateusz Guzik 
139aeabf8d4SMateusz Guzik 	return (vp);
140aeabf8d4SMateusz Guzik }
141aeabf8d4SMateusz Guzik 
142aeabf8d4SMateusz Guzik static void
null_hashins(struct mount * mp,struct null_node * xp)143d35991d3SMateusz Guzik null_hashins(struct mount *mp, struct null_node *xp)
1441cfdefbbSSemen Ustimenko {
1451cfdefbbSSemen Ustimenko 	struct null_node_hashhead *hd;
146aeabf8d4SMateusz Guzik #ifdef INVARIANTS
1471cfdefbbSSemen Ustimenko 	struct null_node *oxp;
148aeabf8d4SMateusz Guzik #endif
149aeabf8d4SMateusz Guzik 
150aeabf8d4SMateusz Guzik 	rw_assert(&null_hash_lock, RA_WLOCKED);
1511cfdefbbSSemen Ustimenko 
1521cfdefbbSSemen Ustimenko 	hd = NULL_NHASH(xp->null_lowervp);
153aeabf8d4SMateusz Guzik #ifdef INVARIANTS
1541cfdefbbSSemen Ustimenko 	LIST_FOREACH(oxp, hd, null_hash) {
15554939875STim J. Robbins 		if (oxp->null_lowervp == xp->null_lowervp &&
15654939875STim J. Robbins 		    NULLTOV(oxp)->v_mount == mp) {
157aeabf8d4SMateusz Guzik 			VNASSERT(0, NULLTOV(oxp),
158aeabf8d4SMateusz Guzik 			    ("vnode already in hash"));
1591cfdefbbSSemen Ustimenko 		}
1601cfdefbbSSemen Ustimenko 	}
161aeabf8d4SMateusz Guzik #endif
1621cfdefbbSSemen Ustimenko 	LIST_INSERT_HEAD(hd, xp, null_hash);
1631cfdefbbSSemen Ustimenko }
164df8bae1dSRodney W. Grimes 
16561b9d89fSTor Egge static void
null_destroy_proto(struct vnode * vp,void * xp)16667e3d54fSKonstantin Belousov null_destroy_proto(struct vnode *vp, void *xp)
16767e3d54fSKonstantin Belousov {
16867e3d54fSKonstantin Belousov 
16966f02f4bSKonstantin Belousov 	lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
17067e3d54fSKonstantin Belousov 	VI_LOCK(vp);
17167e3d54fSKonstantin Belousov 	vp->v_data = NULL;
17267e3d54fSKonstantin Belousov 	vp->v_vnlock = &vp->v_lock;
17367e3d54fSKonstantin Belousov 	vp->v_op = &dead_vnodeops;
17467e3d54fSKonstantin Belousov 	VI_UNLOCK(vp);
17567e3d54fSKonstantin Belousov 	vgone(vp);
17667e3d54fSKonstantin Belousov 	vput(vp);
17767e3d54fSKonstantin Belousov 	free(xp, M_NULLFSNODE);
17867e3d54fSKonstantin Belousov }
17967e3d54fSKonstantin Belousov 
180df8bae1dSRodney W. Grimes /*
1811cfdefbbSSemen Ustimenko  * Make a new or get existing nullfs node.
1821cfdefbbSSemen Ustimenko  * Vp is the alias vnode, lowervp is the lower vnode.
1831cfdefbbSSemen Ustimenko  *
1841cfdefbbSSemen Ustimenko  * The lowervp assumed to be locked and having "spare" reference. This routine
1851cfdefbbSSemen Ustimenko  * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers"
1861cfdefbbSSemen Ustimenko  * the caller's "spare" reference to created nullfs vnode.
187df8bae1dSRodney W. Grimes  */
1881cfdefbbSSemen Ustimenko int
null_nodeget(struct mount * mp,struct vnode * lowervp,struct vnode ** vpp)189d35991d3SMateusz Guzik null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
190df8bae1dSRodney W. Grimes {
191df8bae1dSRodney W. Grimes 	struct null_node *xp;
1921cfdefbbSSemen Ustimenko 	struct vnode *vp;
193df8bae1dSRodney W. Grimes 	int error;
194df8bae1dSRodney W. Grimes 
195d9e9650aSKonstantin Belousov 	ASSERT_VOP_LOCKED(lowervp, "lowervp");
196f1fa1ba3SMateusz Guzik 	VNPASS(lowervp->v_usecount > 0, lowervp);
19748a1e3f6SKonstantin Belousov 
198d9e9650aSKonstantin Belousov 	/* Lookup the hash firstly. */
19954939875STim J. Robbins 	*vpp = null_hashget(mp, lowervp);
2001cfdefbbSSemen Ustimenko 	if (*vpp != NULL) {
2011cfdefbbSSemen Ustimenko 		vrele(lowervp);
2021cfdefbbSSemen Ustimenko 		return (0);
2031cfdefbbSSemen Ustimenko 	}
2041cfdefbbSSemen Ustimenko 
2051cfdefbbSSemen Ustimenko 	/*
2061cfdefbbSSemen Ustimenko 	 * We do not serialize vnode creation, instead we will check for
2071cfdefbbSSemen Ustimenko 	 * duplicates later, when adding new vnode to hash.
2081cfdefbbSSemen Ustimenko 	 * Note that duplicate can only appear in hash if the lowervp is
2091cfdefbbSSemen Ustimenko 	 * locked LK_SHARED.
2102f9bae59SDavid Greenman 	 */
211d9e9650aSKonstantin Belousov 	xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
2122f9bae59SDavid Greenman 
213e583d999SEdward Tomasz Napierala 	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
2142f9bae59SDavid Greenman 	if (error) {
215dd0f9532SKonstantin Belousov 		vput(lowervp);
2161ede983cSDag-Erling Smørgrav 		free(xp, M_NULLFSNODE);
217df8bae1dSRodney W. Grimes 		return (error);
2182f9bae59SDavid Greenman 	}
219df8bae1dSRodney W. Grimes 
220aeabf8d4SMateusz Guzik 	VNPASS(vp->v_object == NULL, vp);
221aeabf8d4SMateusz Guzik 	VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) == 0, vp);
222aeabf8d4SMateusz Guzik 
223aeabf8d4SMateusz Guzik 	rw_wlock(&null_hash_lock);
224df8bae1dSRodney W. Grimes 	xp->null_vnode = vp;
225df8bae1dSRodney W. Grimes 	xp->null_lowervp = lowervp;
2260fc6daa7SKonstantin Belousov 	xp->null_flags = 0;
22708720e34SSemen Ustimenko 	vp->v_type = lowervp->v_type;
22808720e34SSemen Ustimenko 	vp->v_data = xp;
2294451405fSBoris Popov 	vp->v_vnlock = lowervp->v_vnlock;
230aeabf8d4SMateusz Guzik 	*vpp = null_hashget_locked(mp, lowervp);
231aeabf8d4SMateusz Guzik 	if (*vpp != NULL) {
232aeabf8d4SMateusz Guzik 		rw_wunlock(&null_hash_lock);
233aeabf8d4SMateusz Guzik 		vrele(lowervp);
2344e91a0b9SMateusz Guzik 		null_destroy_proto(vp, xp);
235aeabf8d4SMateusz Guzik 		return (0);
2364e91a0b9SMateusz Guzik 	}
237dc1d2cc6SKonstantin Belousov 
238df8bae1dSRodney W. Grimes 	/*
239685cb01aSKonstantin Belousov 	 * We might miss the case where lower vnode sets VIRF_PGREAD
240685cb01aSKonstantin Belousov 	 * some time after construction, which is typical case.
241685cb01aSKonstantin Belousov 	 * null_open rechecks.
242685cb01aSKonstantin Belousov 	 */
2433e506a67SMateusz Guzik 	if ((vn_irflag_read(lowervp) & VIRF_PGREAD) != 0) {
244685cb01aSKonstantin Belousov 		MPASS(lowervp->v_object != NULL);
245685cb01aSKonstantin Belousov 		vp->v_object = lowervp->v_object;
246aeabf8d4SMateusz Guzik 		vn_irflag_set(vp, VIRF_PGREAD);
247685cb01aSKonstantin Belousov 	}
248aeabf8d4SMateusz Guzik 	if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
249aeabf8d4SMateusz Guzik 		vp->v_vflag |= VV_ROOT;
250aeabf8d4SMateusz Guzik 
251aeabf8d4SMateusz Guzik 	error = insmntque1(vp, mp);
252aeabf8d4SMateusz Guzik 	if (error != 0) {
253aeabf8d4SMateusz Guzik 		rw_wunlock(&null_hash_lock);
254aeabf8d4SMateusz Guzik 		vput(lowervp);
255aeabf8d4SMateusz Guzik 		vp->v_object = NULL;
256aeabf8d4SMateusz Guzik 		null_destroy_proto(vp, xp);
257aeabf8d4SMateusz Guzik 		return (error);
258685cb01aSKonstantin Belousov 	}
259685cb01aSKonstantin Belousov 
260aeabf8d4SMateusz Guzik 	null_hashins(mp, xp);
261*829f0bcbSMateusz Guzik 	vn_set_state(vp, VSTATE_CONSTRUCTED);
262aeabf8d4SMateusz Guzik 	rw_wunlock(&null_hash_lock);
2631cfdefbbSSemen Ustimenko 	*vpp = vp;
2641cfdefbbSSemen Ustimenko 
2651cfdefbbSSemen Ustimenko 	return (0);
2661cfdefbbSSemen Ustimenko }
2671cfdefbbSSemen Ustimenko 
2681cfdefbbSSemen Ustimenko /*
2691cfdefbbSSemen Ustimenko  * Remove node from hash.
2701cfdefbbSSemen Ustimenko  */
27108720e34SSemen Ustimenko void
null_hashrem(struct null_node * xp)272d35991d3SMateusz Guzik null_hashrem(struct null_node *xp)
27308720e34SSemen Ustimenko {
27408720e34SSemen Ustimenko 
275cd29b292SMateusz Guzik 	rw_wlock(&null_hash_lock);
27608720e34SSemen Ustimenko 	LIST_REMOVE(xp, null_hash);
277cd29b292SMateusz Guzik 	rw_wunlock(&null_hash_lock);
27808720e34SSemen Ustimenko }
27908720e34SSemen Ustimenko 
280a0f40f54SBruce Evans #ifdef DIAGNOSTIC
2811bf978ceSKATO Takenori 
282df8bae1dSRodney W. Grimes struct vnode *
null_checkvp(struct vnode * vp,char * fil,int lno)283d35991d3SMateusz Guzik null_checkvp(struct vnode *vp, char *fil, int lno)
284df8bae1dSRodney W. Grimes {
285df8bae1dSRodney W. Grimes 	struct null_node *a = VTONULL(vp);
286b9131889SKonstantin Belousov 
287df8bae1dSRodney W. Grimes #ifdef notyet
288df8bae1dSRodney W. Grimes 	/*
289df8bae1dSRodney W. Grimes 	 * Can't do this check because vop_reclaim runs
290df8bae1dSRodney W. Grimes 	 * with a funny vop vector.
291df8bae1dSRodney W. Grimes 	 */
292df8bae1dSRodney W. Grimes 	if (vp->v_op != null_vnodeop_p) {
293df8bae1dSRodney W. Grimes 		printf ("null_checkvp: on non-null-node\n");
294df8bae1dSRodney W. Grimes 		panic("null_checkvp");
295b9131889SKonstantin Belousov 	}
296df8bae1dSRodney W. Grimes #endif
297c5e17d9eSKATO Takenori 	if (a->null_lowervp == NULLVP) {
298df8bae1dSRodney W. Grimes 		/* Should never happen */
2994d2310ddSKonstantin Belousov 		panic("null_checkvp %p", vp);
300df8bae1dSRodney W. Grimes 	}
301b9131889SKonstantin Belousov 	VI_LOCK_FLAGS(a->null_lowervp, MTX_DUPOK);
3024d2310ddSKonstantin Belousov 	if (a->null_lowervp->v_usecount < 1)
3034d2310ddSKonstantin Belousov 		panic ("null with unref'ed lowervp, vp %p lvp %p",
3044d2310ddSKonstantin Belousov 		    vp, a->null_lowervp);
305b9131889SKonstantin Belousov 	VI_UNLOCK(a->null_lowervp);
306df8bae1dSRodney W. Grimes #ifdef notyet
307df8bae1dSRodney W. Grimes 	printf("null %x/%d -> %x/%d [%s, %d]\n",
3084d93c0beSJeff Roberson 	        NULLTOV(a), vrefcnt(NULLTOV(a)),
3094d93c0beSJeff Roberson 		a->null_lowervp, vrefcnt(a->null_lowervp),
310df8bae1dSRodney W. Grimes 		fil, lno);
311df8bae1dSRodney W. Grimes #endif
312b9131889SKonstantin Belousov 	return (a->null_lowervp);
313df8bae1dSRodney W. Grimes }
314df8bae1dSRodney W. Grimes #endif
315