xref: /freebsd/sys/fs/nullfs/null_vfsops.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Null Layer
37  * (See null_vnops.c for a description of what this does.)
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/sysctl.h>
50 #include <sys/vnode.h>
51 #include <sys/jail.h>
52 
53 #include <fs/nullfs/null.h>
54 
55 static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
56 
57 static vfs_fhtovp_t	nullfs_fhtovp;
58 static vfs_mount_t	nullfs_mount;
59 static vfs_quotactl_t	nullfs_quotactl;
60 static vfs_root_t	nullfs_root;
61 static vfs_sync_t	nullfs_sync;
62 static vfs_statfs_t	nullfs_statfs;
63 static vfs_unmount_t	nullfs_unmount;
64 static vfs_vget_t	nullfs_vget;
65 static vfs_extattrctl_t	nullfs_extattrctl;
66 
67 SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs");
68 
69 static bool null_cache_vnodes = true;
70 SYSCTL_BOOL(_vfs_nullfs, OID_AUTO, cache_vnodes, CTLFLAG_RWTUN,
71     &null_cache_vnodes, 0,
72     "cache free nullfs vnodes");
73 
74 /*
75  * Mount null layer
76  */
77 static int
78 nullfs_mount(struct mount *mp)
79 {
80 	struct vnode *lowerrootvp;
81 	struct vnode *nullm_rootvp;
82 	struct null_mount *xmp;
83 	struct null_node *nn;
84 	struct nameidata nd, *ndp;
85 	char *target;
86 	int error, len;
87 	bool isvnunlocked;
88 
89 	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
90 
91 	if (mp->mnt_flag & MNT_ROOTFS)
92 		return (EOPNOTSUPP);
93 
94 	/*
95 	 * Update is a no-op
96 	 */
97 	if (mp->mnt_flag & MNT_UPDATE) {
98 		/*
99 		 * Only support update mounts for NFS export.
100 		 */
101 		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
102 			return (0);
103 		else
104 			return (EOPNOTSUPP);
105 	}
106 
107 	/*
108 	 * Get argument
109 	 */
110 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len);
111 	if (error != 0)
112 		error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
113 	if (error || target[len - 1] != '\0')
114 		return (EINVAL);
115 
116 	/*
117 	 * Unlock lower node to avoid possible deadlock.
118 	 */
119 	if (mp->mnt_vnodecovered->v_op == &null_vnodeops &&
120 	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
121 		VOP_UNLOCK(mp->mnt_vnodecovered);
122 		isvnunlocked = true;
123 	} else {
124 		isvnunlocked = false;
125 	}
126 
127 	/*
128 	 * Find lower node
129 	 */
130 	ndp = &nd;
131 	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target);
132 	error = namei(ndp);
133 
134 	/*
135 	 * Re-lock vnode.
136 	 * XXXKIB This is deadlock-prone as well.
137 	 */
138 	if (isvnunlocked)
139 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
140 
141 	if (error)
142 		return (error);
143 	NDFREE_PNBUF(ndp);
144 
145 	/*
146 	 * Sanity check on lower vnode
147 	 */
148 	lowerrootvp = ndp->ni_vp;
149 
150 	/*
151 	 * Check multi null mount to avoid `lock against myself' panic.
152 	 */
153 	if (mp->mnt_vnodecovered->v_op == &null_vnodeops) {
154 		nn = VTONULL(mp->mnt_vnodecovered);
155 		if (nn == NULL || lowerrootvp == nn->null_lowervp) {
156 			NULLFSDEBUG("nullfs_mount: multi null mount?\n");
157 			vput(lowerrootvp);
158 			return (EDEADLK);
159 		}
160 	}
161 
162 	/*
163 	 * Lower vnode must be the same type as the covered vnode - we
164 	 * don't allow mounting directories to files or vice versa.
165 	 */
166 	if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) ||
167 	    lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) {
168 		NULLFSDEBUG("nullfs_mount: target must be same type as fspath");
169 		vput(lowerrootvp);
170 		return (EINVAL);
171 	}
172 
173 	xmp = malloc(sizeof(struct null_mount), M_NULLFSMNT,
174 	    M_WAITOK | M_ZERO);
175 
176 	/*
177 	 * Save pointer to underlying FS and the reference to the
178 	 * lower root vnode.
179 	 */
180 	xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
181 	    &xmp->upper_node);
182 	if (xmp->nullm_vfs == NULL) {
183 		vput(lowerrootvp);
184 		free(xmp, M_NULLFSMNT);
185 		return (ENOENT);
186 	}
187 	vref(lowerrootvp);
188 	xmp->nullm_lowerrootvp = lowerrootvp;
189 	mp->mnt_data = xmp;
190 
191 	/*
192 	 * Make sure the node alias worked.
193 	 */
194 	error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
195 	if (error != 0) {
196 		vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
197 		vrele(lowerrootvp);
198 		free(xmp, M_NULLFSMNT);
199 		return (error);
200 	}
201 
202 	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
203 		MNT_ILOCK(mp);
204 		mp->mnt_flag |= MNT_LOCAL;
205 		MNT_IUNLOCK(mp);
206 	}
207 
208 	if (vfs_getopt(mp->mnt_optnew, "cache", NULL, NULL) == 0) {
209 		xmp->nullm_flags |= NULLM_CACHE;
210 	} else if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) {
211 		;
212 	} else if (null_cache_vnodes &&
213 	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
214 		xmp->nullm_flags |= NULLM_CACHE;
215 	}
216 
217 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
218 		vfs_register_for_notification(xmp->nullm_vfs, mp,
219 		    &xmp->notify_node);
220 	}
221 
222 	if (lowerrootvp == mp->mnt_vnodecovered) {
223 		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
224 		lowerrootvp->v_vflag |= VV_CROSSLOCK;
225 		VOP_UNLOCK(lowerrootvp);
226 	}
227 
228 	MNT_ILOCK(mp);
229 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
230 		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
231 		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
232 		    MNTK_EXTENDED_SHARED);
233 	}
234 	mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
235 	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
236 	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
237 	MNT_IUNLOCK(mp);
238 	vfs_getnewfsid(mp);
239 	vfs_mountedfrom(mp, target);
240 	vput(nullm_rootvp);
241 
242 	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
243 		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
244 	return (0);
245 }
246 
247 /*
248  * Free reference to null layer
249  */
250 static int
251 nullfs_unmount(struct mount *mp, int mntflags)
252 {
253 	struct null_mount *mntdata;
254 	int error, flags;
255 
256 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
257 
258 	if (mntflags & MNT_FORCE)
259 		flags = FORCECLOSE;
260 	else
261 		flags = 0;
262 
263 	for (;;) {
264 		/* There is 1 extra root vnode reference (nullm_rootvp). */
265 		error = vflush(mp, 0, flags, curthread);
266 		if (error)
267 			return (error);
268 		MNT_ILOCK(mp);
269 		if (mp->mnt_nvnodelistsize == 0) {
270 			MNT_IUNLOCK(mp);
271 			break;
272 		}
273 		MNT_IUNLOCK(mp);
274 		if ((mntflags & MNT_FORCE) == 0)
275 			return (EBUSY);
276 	}
277 
278 	/*
279 	 * Finally, throw away the null_mount structure
280 	 */
281 	mntdata = mp->mnt_data;
282 	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
283 		vfs_unregister_for_notification(mntdata->nullm_vfs,
284 		    &mntdata->notify_node);
285 	}
286 	if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) {
287 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
288 		mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
289 		VOP_UNLOCK(mp->mnt_vnodecovered);
290 	}
291 	vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
292 	vrele(mntdata->nullm_lowerrootvp);
293 	mp->mnt_data = NULL;
294 	free(mntdata, M_NULLFSMNT);
295 	return (0);
296 }
297 
298 static int
299 nullfs_root(struct mount *mp, int flags, struct vnode **vpp)
300 {
301 	struct vnode *vp;
302 	struct null_mount *mntdata;
303 	int error;
304 
305 	mntdata = MOUNTTONULLMOUNT(mp);
306 	NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp,
307 	    mntdata->nullm_lowerrootvp);
308 
309 	error = vget(mntdata->nullm_lowerrootvp, flags);
310 	if (error == 0) {
311 		error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp);
312 		if (error == 0) {
313 			*vpp = vp;
314 		}
315 	}
316 	return (error);
317 }
318 
319 static int
320 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy)
321 {
322 	struct mount *lowermp;
323 	struct null_mount *mntdata;
324 	int error;
325 	bool unbusy;
326 
327 	mntdata = MOUNTTONULLMOUNT(mp);
328 	lowermp = atomic_load_ptr(&mntdata->nullm_vfs);
329 	KASSERT(*mp_busy == true, ("upper mount not busy"));
330 	/*
331 	 * See comment in sys_quotactl() for an explanation of why the
332 	 * lower mount needs to be busied by the caller of VFS_QUOTACTL()
333 	 * but may be unbusied by the implementation.  We must unbusy
334 	 * the upper mount for the same reason; otherwise a namei lookup
335 	 * issued by the VFS_QUOTACTL() implementation could traverse the
336 	 * upper mount and deadlock.
337 	 */
338 	vfs_unbusy(mp);
339 	*mp_busy = false;
340 	unbusy = true;
341 	error = vfs_busy(lowermp, 0);
342 	if (error == 0)
343 		error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy);
344 	if (unbusy)
345 		vfs_unbusy(lowermp);
346 
347 	return (error);
348 }
349 
350 static int
351 nullfs_statfs(struct mount *mp, struct statfs *sbp)
352 {
353 	int error;
354 	struct statfs *mstat;
355 
356 	NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
357 	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
358 	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
359 
360 	mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
361 
362 	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat);
363 	if (error) {
364 		free(mstat, M_STATFS);
365 		return (error);
366 	}
367 
368 	sbp->f_type = mstat->f_type;
369 	sbp->f_bsize = mstat->f_bsize;
370 	sbp->f_iosize = mstat->f_iosize;
371 	sbp->f_blocks = mstat->f_blocks;
372 	sbp->f_bfree = mstat->f_bfree;
373 	sbp->f_bavail = mstat->f_bavail;
374 	sbp->f_files = mstat->f_files;
375 	sbp->f_ffree = mstat->f_ffree;
376 
377 	free(mstat, M_STATFS);
378 	return (0);
379 }
380 
381 static int
382 nullfs_sync(struct mount *mp, int waitfor)
383 {
384 	/*
385 	 * XXX - Assumes no data cached at null layer.
386 	 */
387 	return (0);
388 }
389 
390 static int
391 nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
392 {
393 	int error;
394 
395 	KASSERT((flags & LK_TYPE_MASK) != 0,
396 	    ("nullfs_vget: no lock requested"));
397 
398 	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
399 	if (error != 0)
400 		return (error);
401 	return (null_nodeget(mp, *vpp, vpp));
402 }
403 
404 static int
405 nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp)
406 {
407 	int error;
408 
409 	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
410 	    vpp);
411 	if (error != 0)
412 		return (error);
413 	return (null_nodeget(mp, *vpp, vpp));
414 }
415 
416 static int
417 nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
418     int namespace, const char *attrname)
419 {
420 
421 	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
422 	    filename_vp, namespace, attrname));
423 }
424 
425 static void
426 nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
427 {
428 	struct vnode *vp;
429 
430 	vp = null_hashget(mp, lowervp);
431 	if (vp == NULL)
432 		return;
433 	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
434 	vgone(vp);
435 	vput(vp);
436 }
437 
438 static void
439 nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
440 {
441 	struct vnode *vp;
442 	struct null_node *xp;
443 
444 	vp = null_hashget(mp, lowervp);
445 	if (vp == NULL)
446 		return;
447 	xp = VTONULL(vp);
448 	xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
449 	vhold(vp);
450 	vunref(vp);
451 
452 	if (vp->v_usecount == 0) {
453 		/*
454 		 * If vunref() dropped the last use reference on the
455 		 * nullfs vnode, it must be reclaimed, and its lock
456 		 * was split from the lower vnode lock.  Need to do
457 		 * extra unlock before allowing the final vdrop() to
458 		 * free the vnode.
459 		 */
460 		KASSERT(VN_IS_DOOMED(vp),
461 		    ("not reclaimed nullfs vnode %p", vp));
462 		VOP_UNLOCK(vp);
463 	} else {
464 		/*
465 		 * Otherwise, the nullfs vnode still shares the lock
466 		 * with the lower vnode, and must not be unlocked.
467 		 * Also clear the NULLV_NOUNLOCK, the flag is not
468 		 * relevant for future reclamations.
469 		 */
470 		ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
471 		KASSERT(!VN_IS_DOOMED(vp),
472 		    ("reclaimed nullfs vnode %p", vp));
473 		xp->null_flags &= ~NULLV_NOUNLOCK;
474 	}
475 	vdrop(vp);
476 }
477 
478 static struct vfsops null_vfsops = {
479 	.vfs_extattrctl =	nullfs_extattrctl,
480 	.vfs_fhtovp =		nullfs_fhtovp,
481 	.vfs_init =		nullfs_init,
482 	.vfs_mount =		nullfs_mount,
483 	.vfs_quotactl =		nullfs_quotactl,
484 	.vfs_root =		nullfs_root,
485 	.vfs_statfs =		nullfs_statfs,
486 	.vfs_sync =		nullfs_sync,
487 	.vfs_uninit =		nullfs_uninit,
488 	.vfs_unmount =		nullfs_unmount,
489 	.vfs_vget =		nullfs_vget,
490 	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
491 	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
492 };
493 
494 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT);
495