1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software donated to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/rwlock.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/vnode.h>
44 #include <sys/smr.h>
45
46 #include <fs/nullfs/null.h>
47
48 #include <vm/uma.h>
49
50 VFS_SMR_DECLARE;
51
52 /*
53 * Null layer cache:
54 * Each cache entry holds a reference to the lower vnode
55 * along with a pointer to the alias vnode. When an
56 * entry is added the lower vnode is VREF'd. When the
57 * alias is removed the lower vnode is vrele'd.
58 */
59
60 #define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
61
62 static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
63 static struct rwlock null_hash_lock;
64 static u_long null_hash_mask;
65
66 static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
67 uma_zone_t __read_mostly null_node_zone;
68
69 static void null_hashins(struct mount *, struct null_node *);
70
71 /*
72 * Initialise cache headers
73 */
74 int
nullfs_init(struct vfsconf * vfsp)75 nullfs_init(struct vfsconf *vfsp)
76 {
77
78 null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
79 &null_hash_mask);
80 rw_init(&null_hash_lock, "nullhs");
81 null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
82 NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
83 VFS_SMR_ZONE_SET(null_node_zone);
84
85 return (0);
86 }
87
88 int
nullfs_uninit(struct vfsconf * vfsp)89 nullfs_uninit(struct vfsconf *vfsp)
90 {
91
92 uma_zdestroy(null_node_zone);
93 rw_destroy(&null_hash_lock);
94 hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
95 return (0);
96 }
97
98 /*
99 * Return a VREF'ed alias for lower vnode if already exists, else 0.
100 * Lower vnode should be locked on entry and will be left locked on exit.
101 */
102 static struct vnode *
null_hashget_locked(struct mount * mp,struct vnode * lowervp)103 null_hashget_locked(struct mount *mp, struct vnode *lowervp)
104 {
105 struct null_node_hashhead *hd;
106 struct null_node *a;
107 struct vnode *vp;
108
109 ASSERT_VOP_LOCKED(lowervp, __func__);
110 rw_assert(&null_hash_lock, RA_LOCKED);
111
112 /*
113 * Find hash base, and then search the (two-way) linked
114 * list looking for a null_node structure which is referencing
115 * the lower vnode. If found, the increment the null_node
116 * reference count (but NOT the lower vnode's VREF counter).
117 */
118 hd = NULL_NHASH(lowervp);
119 CK_LIST_FOREACH(a, hd, null_hash) {
120 if (a->null_lowervp != lowervp)
121 continue;
122 /*
123 * Since we have the lower node locked the nullfs
124 * node can not be in the process of recycling. If
125 * it had been recycled before we grabed the lower
126 * lock it would not have been found on the hash.
127 */
128 vp = NULLTOV(a);
129 VNPASS(!VN_IS_DOOMED(vp), vp);
130 if (vp->v_mount != mp)
131 continue;
132 vref(vp);
133 return (vp);
134 }
135 return (NULL);
136 }
137
138 struct vnode *
null_hashget(struct mount * mp,struct vnode * lowervp)139 null_hashget(struct mount *mp, struct vnode *lowervp)
140 {
141 struct null_node_hashhead *hd;
142 struct null_node *a;
143 struct vnode *vp;
144 enum vgetstate vs;
145
146 ASSERT_VOP_LOCKED(lowervp, "null_hashget");
147 rw_assert(&null_hash_lock, RA_UNLOCKED);
148
149 vfs_smr_enter();
150 hd = NULL_NHASH(lowervp);
151 CK_LIST_FOREACH(a, hd, null_hash) {
152 if (a->null_lowervp != lowervp)
153 continue;
154 /*
155 * See null_hashget_locked as to why the nullfs vnode can't be
156 * doomed here.
157 */
158 vp = NULLTOV(a);
159 VNPASS(!VN_IS_DOOMED(vp), vp);
160 if (vp->v_mount != mp)
161 continue;
162 vs = vget_prep_smr(vp);
163 vfs_smr_exit();
164 VNPASS(vs != VGET_NONE, vp);
165 vget_finish_ref(vp, vs);
166 return (vp);
167 }
168 vfs_smr_exit();
169 return (NULL);
170 }
171
172 static void
null_hashins(struct mount * mp,struct null_node * xp)173 null_hashins(struct mount *mp, struct null_node *xp)
174 {
175 struct null_node_hashhead *hd;
176 #ifdef INVARIANTS
177 struct null_node *oxp;
178 #endif
179
180 rw_assert(&null_hash_lock, RA_WLOCKED);
181
182 hd = NULL_NHASH(xp->null_lowervp);
183 #ifdef INVARIANTS
184 CK_LIST_FOREACH(oxp, hd, null_hash) {
185 if (oxp->null_lowervp == xp->null_lowervp &&
186 NULLTOV(oxp)->v_mount == mp) {
187 VNASSERT(0, NULLTOV(oxp),
188 ("vnode already in hash"));
189 }
190 }
191 #endif
192 CK_LIST_INSERT_HEAD(hd, xp, null_hash);
193 }
194
195 static void
null_destroy_proto(struct vnode * vp,void * xp)196 null_destroy_proto(struct vnode *vp, void *xp)
197 {
198
199 lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
200 VI_LOCK(vp);
201 vp->v_data = NULL;
202 vp->v_vnlock = &vp->v_lock;
203 vp->v_op = &dead_vnodeops;
204 VI_UNLOCK(vp);
205 vgone(vp);
206 vput(vp);
207 uma_zfree_smr(null_node_zone, xp);
208 }
209
210 /*
211 * Make a new or get existing nullfs node.
212 * Vp is the alias vnode, lowervp is the lower vnode.
213 *
214 * The lowervp assumed to be locked and having "spare" reference. This routine
215 * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers"
216 * the caller's "spare" reference to created nullfs vnode.
217 */
218 int
null_nodeget(struct mount * mp,struct vnode * lowervp,struct vnode ** vpp)219 null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
220 {
221 struct null_node *xp;
222 struct vnode *vp;
223 int error;
224
225 ASSERT_VOP_LOCKED(lowervp, "lowervp");
226 VNPASS(lowervp->v_usecount > 0, lowervp);
227
228 /* Lookup the hash firstly. */
229 *vpp = null_hashget(mp, lowervp);
230 if (*vpp != NULL) {
231 vrele(lowervp);
232 return (0);
233 }
234
235 /*
236 * We do not serialize vnode creation, instead we will check for
237 * duplicates later, when adding new vnode to hash.
238 * Note that duplicate can only appear in hash if the lowervp is
239 * locked LK_SHARED.
240 */
241 xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
242
243 error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
244 if (error) {
245 vput(lowervp);
246 uma_zfree_smr(null_node_zone, xp);
247 return (error);
248 }
249
250 VNPASS(vp->v_object == NULL, vp);
251 VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) == 0, vp);
252
253 rw_wlock(&null_hash_lock);
254 xp->null_vnode = vp;
255 xp->null_lowervp = lowervp;
256 xp->null_flags = 0;
257 vp->v_type = lowervp->v_type;
258 vp->v_data = xp;
259 vp->v_vnlock = lowervp->v_vnlock;
260 *vpp = null_hashget_locked(mp, lowervp);
261 if (*vpp != NULL) {
262 rw_wunlock(&null_hash_lock);
263 vrele(lowervp);
264 null_destroy_proto(vp, xp);
265 return (0);
266 }
267
268 /*
269 * We might miss the case where lower vnode sets VIRF_PGREAD
270 * some time after construction, which is typical case.
271 * null_open rechecks.
272 */
273 if ((vn_irflag_read(lowervp) & VIRF_PGREAD) != 0) {
274 MPASS(lowervp->v_object != NULL);
275 vp->v_object = lowervp->v_object;
276 vn_irflag_set(vp, VIRF_PGREAD);
277 }
278 if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0)
279 vn_irflag_set(vp, VIRF_INOTIFY);
280 if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0)
281 vn_irflag_set(vp, VIRF_INOTIFY_PARENT);
282 if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
283 vp->v_vflag |= VV_ROOT;
284
285 error = insmntque1(vp, mp);
286 if (error != 0) {
287 rw_wunlock(&null_hash_lock);
288 vput(lowervp);
289 vp->v_object = NULL;
290 null_destroy_proto(vp, xp);
291 return (error);
292 }
293
294 vn_set_state(vp, VSTATE_CONSTRUCTED);
295 null_hashins(mp, xp);
296 rw_wunlock(&null_hash_lock);
297 *vpp = vp;
298
299 return (0);
300 }
301
302 /*
303 * Remove node from hash.
304 */
305 void
null_hashrem(struct null_node * xp)306 null_hashrem(struct null_node *xp)
307 {
308
309 rw_wlock(&null_hash_lock);
310 CK_LIST_REMOVE(xp, null_hash);
311 rw_wunlock(&null_hash_lock);
312 }
313
314 #ifdef DIAGNOSTIC
315
316 struct vnode *
null_checkvp(struct vnode * vp,char * fil,int lno)317 null_checkvp(struct vnode *vp, char *fil, int lno)
318 {
319 struct null_node *a = VTONULL(vp);
320
321 #ifdef notyet
322 /*
323 * Can't do this check because vop_reclaim runs
324 * with a funny vop vector.
325 */
326 if (vp->v_op != null_vnodeop_p) {
327 printf ("null_checkvp: on non-null-node\n");
328 panic("null_checkvp");
329 }
330 #endif
331 if (a->null_lowervp == NULL) {
332 /* Should never happen */
333 panic("null_checkvp %p", vp);
334 }
335 VI_LOCK_FLAGS(a->null_lowervp, MTX_DUPOK);
336 if (a->null_lowervp->v_usecount < 1)
337 panic ("null with unref'ed lowervp, vp %p lvp %p",
338 vp, a->null_lowervp);
339 VI_UNLOCK(a->null_lowervp);
340 #ifdef notyet
341 printf("null %x/%d -> %x/%d [%s, %d]\n",
342 NULLTOV(a), vrefcnt(NULLTOV(a)),
343 a->null_lowervp, vrefcnt(a->null_lowervp),
344 fil, lno);
345 #endif
346 return (a->null_lowervp);
347 }
348 #endif
349