xref: /freebsd/sys/fs/nullfs/null_vfsops.c (revision 7bdf2b5d5fbabfc8749c4ff6e618c3e843b14de0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Null Layer
37  * (See null_vnops.c for a description of what this does.)
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/sysctl.h>
50 #include <sys/vnode.h>
51 #include <sys/jail.h>
52 
53 #include <fs/nullfs/null.h>
54 
55 static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
56 
57 static vfs_fhtovp_t	nullfs_fhtovp;
58 static vfs_mount_t	nullfs_mount;
59 static vfs_quotactl_t	nullfs_quotactl;
60 static vfs_root_t	nullfs_root;
61 static vfs_sync_t	nullfs_sync;
62 static vfs_statfs_t	nullfs_statfs;
63 static vfs_unmount_t	nullfs_unmount;
64 static vfs_vget_t	nullfs_vget;
65 static vfs_extattrctl_t	nullfs_extattrctl;
66 
67 SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs");
68 
69 static bool null_cache_vnodes = true;
70 SYSCTL_BOOL(_vfs_nullfs, OID_AUTO, cache_vnodes, CTLFLAG_RWTUN,
71     &null_cache_vnodes, 0,
72     "cache free nullfs vnodes");
73 
74 /*
75  * Mount null layer
76  */
77 static int
nullfs_mount(struct mount * mp)78 nullfs_mount(struct mount *mp)
79 {
80 	struct vnode *lowerrootvp;
81 	struct vnode *nullm_rootvp;
82 	struct null_mount *xmp;
83 	struct null_node *nn;
84 	struct nameidata nd, *ndp;
85 	char *target;
86 	int error, len;
87 	bool isvnunlocked;
88 	static const char cache_opt_name[] = "cache";
89 	static const char nocache_opt_name[] = "nocache";
90 	static const char unixbypass_opt_name[] = "unixbypass";
91 	static const char nounixbypass_opt_name[] = "nounixbypass";
92 
93 	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
94 
95 	if (mp->mnt_flag & MNT_ROOTFS)
96 		return (EOPNOTSUPP);
97 
98 	/*
99 	 * Update is a no-op
100 	 */
101 	if (mp->mnt_flag & MNT_UPDATE) {
102 		/*
103 		 * Only support update mounts for NFS export.
104 		 */
105 		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
106 			return (0);
107 		else
108 			return (EOPNOTSUPP);
109 	}
110 
111 	/*
112 	 * Get argument
113 	 */
114 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len);
115 	if (error != 0)
116 		error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
117 	if (error || target[len - 1] != '\0')
118 		return (EINVAL);
119 
120 	/*
121 	 * Unlock lower node to avoid possible deadlock.
122 	 */
123 	if (null_is_nullfs_vnode(mp->mnt_vnodecovered) &&
124 	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
125 		VOP_UNLOCK(mp->mnt_vnodecovered);
126 		isvnunlocked = true;
127 	} else {
128 		isvnunlocked = false;
129 	}
130 
131 	/*
132 	 * Find lower node
133 	 */
134 	ndp = &nd;
135 	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target);
136 	error = namei(ndp);
137 
138 	/*
139 	 * Re-lock vnode.
140 	 * XXXKIB This is deadlock-prone as well.
141 	 */
142 	if (isvnunlocked)
143 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
144 
145 	if (error)
146 		return (error);
147 	NDFREE_PNBUF(ndp);
148 
149 	/*
150 	 * Sanity check on lower vnode
151 	 */
152 	lowerrootvp = ndp->ni_vp;
153 
154 	/*
155 	 * Do not allow to mount a vnode over itself.
156 	 */
157 	if (mp->mnt_vnodecovered == lowerrootvp) {
158 		vput(lowerrootvp);
159 		return (EDEADLK);
160 	}
161 
162 	/*
163 	 * Check multi null mount to avoid `lock against myself' panic.
164 	 */
165 	if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) {
166 		nn = VTONULL(mp->mnt_vnodecovered);
167 		if (nn == NULL || lowerrootvp == nn->null_lowervp) {
168 			NULLFSDEBUG("nullfs_mount: multi null mount?\n");
169 			vput(lowerrootvp);
170 			return (EDEADLK);
171 		}
172 	}
173 
174 	/*
175 	 * Lower vnode must be the same type as the covered vnode - we
176 	 * don't allow mounting directories to files or vice versa.
177 	 */
178 	if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) ||
179 	    lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) {
180 		NULLFSDEBUG("nullfs_mount: target must be same type as fspath");
181 		vput(lowerrootvp);
182 		return (EINVAL);
183 	}
184 
185 	xmp = malloc(sizeof(struct null_mount), M_NULLFSMNT,
186 	    M_WAITOK | M_ZERO);
187 
188 	/*
189 	 * Save pointer to underlying FS and the reference to the
190 	 * lower root vnode.
191 	 */
192 	xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
193 	    &xmp->upper_node);
194 	if (xmp->nullm_vfs == NULL) {
195 		vput(lowerrootvp);
196 		free(xmp, M_NULLFSMNT);
197 		return (ENOENT);
198 	}
199 	vref(lowerrootvp);
200 	xmp->nullm_lowerrootvp = lowerrootvp;
201 	mp->mnt_data = xmp;
202 
203 	/*
204 	 * Make sure the node alias worked.
205 	 */
206 	error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
207 	if (error != 0) {
208 		vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
209 		vrele(lowerrootvp);
210 		free(xmp, M_NULLFSMNT);
211 		return (error);
212 	}
213 
214 	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
215 		MNT_ILOCK(mp);
216 		mp->mnt_flag |= MNT_LOCAL;
217 		MNT_IUNLOCK(mp);
218 	}
219 
220 	if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) {
221 		xmp->nullm_flags |= NULLM_CACHE;
222 	} else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL,
223 	    NULL) == 0) {
224 		;
225 	} else if (null_cache_vnodes &&
226 	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
227 		xmp->nullm_flags |= NULLM_CACHE;
228 	}
229 
230 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
231 		vfs_register_for_notification(xmp->nullm_vfs, mp,
232 		    &xmp->notify_node);
233 	}
234 
235 	if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) {
236 		;
237 	} else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL,
238 	    NULL) == 0) {
239 		xmp->nullm_flags |= NULLM_NOUNPBYPASS;
240 	}
241 
242 	if (lowerrootvp == mp->mnt_vnodecovered) {
243 		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
244 		lowerrootvp->v_vflag |= VV_CROSSLOCK;
245 		VOP_UNLOCK(lowerrootvp);
246 	}
247 
248 	MNT_ILOCK(mp);
249 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
250 		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
251 		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
252 		    MNTK_EXTENDED_SHARED);
253 	}
254 	mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
255 	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
256 	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
257 	MNT_IUNLOCK(mp);
258 	vfs_getnewfsid(mp);
259 	vfs_mountedfrom(mp, target);
260 	vput(nullm_rootvp);
261 
262 	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
263 		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
264 	return (0);
265 }
266 
267 /*
268  * Free reference to null layer
269  */
270 static int
nullfs_unmount(struct mount * mp,int mntflags)271 nullfs_unmount(struct mount *mp, int mntflags)
272 {
273 	struct null_mount *mntdata;
274 	int error, flags;
275 
276 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
277 
278 	if (mntflags & MNT_FORCE)
279 		flags = FORCECLOSE;
280 	else
281 		flags = 0;
282 
283 	for (;;) {
284 		/* There is 1 extra root vnode reference (nullm_rootvp). */
285 		error = vflush(mp, 0, flags, curthread);
286 		if (error)
287 			return (error);
288 		MNT_ILOCK(mp);
289 		if (mp->mnt_nvnodelistsize == 0) {
290 			MNT_IUNLOCK(mp);
291 			break;
292 		}
293 		MNT_IUNLOCK(mp);
294 		if ((mntflags & MNT_FORCE) == 0)
295 			return (EBUSY);
296 	}
297 
298 	/*
299 	 * Finally, throw away the null_mount structure
300 	 */
301 	mntdata = mp->mnt_data;
302 	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
303 		vfs_unregister_for_notification(mntdata->nullm_vfs,
304 		    &mntdata->notify_node);
305 	}
306 	if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) {
307 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
308 		mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
309 		VOP_UNLOCK(mp->mnt_vnodecovered);
310 	}
311 	vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
312 	vrele(mntdata->nullm_lowerrootvp);
313 	mp->mnt_data = NULL;
314 	free(mntdata, M_NULLFSMNT);
315 	return (0);
316 }
317 
318 static int
nullfs_root(struct mount * mp,int flags,struct vnode ** vpp)319 nullfs_root(struct mount *mp, int flags, struct vnode **vpp)
320 {
321 	struct vnode *vp;
322 	struct null_mount *mntdata;
323 	int error;
324 
325 	mntdata = MOUNTTONULLMOUNT(mp);
326 	NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp,
327 	    mntdata->nullm_lowerrootvp);
328 
329 	error = vget(mntdata->nullm_lowerrootvp, flags);
330 	if (error == 0) {
331 		error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp);
332 		if (error == 0) {
333 			*vpp = vp;
334 		}
335 	}
336 	return (error);
337 }
338 
339 static int
nullfs_quotactl(struct mount * mp,int cmd,uid_t uid,void * arg,bool * mp_busy)340 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy)
341 {
342 	struct mount *lowermp;
343 	struct null_mount *mntdata;
344 	int error;
345 	bool unbusy;
346 
347 	mntdata = MOUNTTONULLMOUNT(mp);
348 	lowermp = atomic_load_ptr(&mntdata->nullm_vfs);
349 	KASSERT(*mp_busy == true, ("upper mount not busy"));
350 	/*
351 	 * See comment in sys_quotactl() for an explanation of why the
352 	 * lower mount needs to be busied by the caller of VFS_QUOTACTL()
353 	 * but may be unbusied by the implementation.  We must unbusy
354 	 * the upper mount for the same reason; otherwise a namei lookup
355 	 * issued by the VFS_QUOTACTL() implementation could traverse the
356 	 * upper mount and deadlock.
357 	 */
358 	vfs_unbusy(mp);
359 	*mp_busy = false;
360 	unbusy = true;
361 	error = vfs_busy(lowermp, 0);
362 	if (error == 0)
363 		error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy);
364 	if (unbusy)
365 		vfs_unbusy(lowermp);
366 
367 	return (error);
368 }
369 
370 static int
nullfs_statfs(struct mount * mp,struct statfs * sbp)371 nullfs_statfs(struct mount *mp, struct statfs *sbp)
372 {
373 	int error;
374 	struct statfs *mstat;
375 
376 	NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
377 	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
378 	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
379 
380 	mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
381 
382 	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat);
383 	if (error) {
384 		free(mstat, M_STATFS);
385 		return (error);
386 	}
387 
388 	sbp->f_type = mstat->f_type;
389 	sbp->f_bsize = mstat->f_bsize;
390 	sbp->f_iosize = mstat->f_iosize;
391 	sbp->f_blocks = mstat->f_blocks;
392 	sbp->f_bfree = mstat->f_bfree;
393 	sbp->f_bavail = mstat->f_bavail;
394 	sbp->f_files = mstat->f_files;
395 	sbp->f_ffree = mstat->f_ffree;
396 
397 	free(mstat, M_STATFS);
398 	return (0);
399 }
400 
401 static int
nullfs_sync(struct mount * mp,int waitfor)402 nullfs_sync(struct mount *mp, int waitfor)
403 {
404 	/*
405 	 * XXX - Assumes no data cached at null layer.
406 	 */
407 	return (0);
408 }
409 
410 static int
nullfs_vget(struct mount * mp,ino_t ino,int flags,struct vnode ** vpp)411 nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
412 {
413 	int error;
414 
415 	KASSERT((flags & LK_TYPE_MASK) != 0,
416 	    ("nullfs_vget: no lock requested"));
417 
418 	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
419 	if (error != 0)
420 		return (error);
421 	return (null_nodeget(mp, *vpp, vpp));
422 }
423 
424 static int
nullfs_fhtovp(struct mount * mp,struct fid * fidp,int flags,struct vnode ** vpp)425 nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp)
426 {
427 	int error;
428 
429 	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
430 	    vpp);
431 	if (error != 0)
432 		return (error);
433 	return (null_nodeget(mp, *vpp, vpp));
434 }
435 
436 static int
nullfs_extattrctl(struct mount * mp,int cmd,struct vnode * filename_vp,int namespace,const char * attrname)437 nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
438     int namespace, const char *attrname)
439 {
440 
441 	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
442 	    filename_vp, namespace, attrname));
443 }
444 
445 static void
nullfs_reclaim_lowervp(struct mount * mp,struct vnode * lowervp)446 nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
447 {
448 	struct vnode *vp;
449 
450 	vp = null_hashget(mp, lowervp);
451 	if (vp == NULL)
452 		return;
453 	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
454 	vgone(vp);
455 	vput(vp);
456 }
457 
458 static void
nullfs_unlink_lowervp(struct mount * mp,struct vnode * lowervp)459 nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
460 {
461 	struct vnode *vp;
462 	struct null_node *xp;
463 
464 	vp = null_hashget(mp, lowervp);
465 	if (vp == NULL)
466 		return;
467 	xp = VTONULL(vp);
468 	xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
469 	vhold(vp);
470 	vunref(vp);
471 
472 	if (VN_IS_DOOMED(vp)) {
473 		/*
474 		 * If the vnode is doomed, its lock was split from the lower
475 		 * vnode lock.  Therefore we need to do an extra unlock before
476 		 * allowing the final vdrop() to free the vnode.
477 		 */
478 		VOP_UNLOCK(vp);
479 	} else {
480 		/*
481 		 * Otherwise, the nullfs vnode still shares the lock
482 		 * with the lower vnode, and must not be unlocked.
483 		 * Also clear the NULLV_NOUNLOCK, the flag is not
484 		 * relevant for future reclamations.
485 		 */
486 		ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
487 		xp->null_flags &= ~NULLV_NOUNLOCK;
488 	}
489 	vdrop(vp);
490 }
491 
492 static struct vfsops null_vfsops = {
493 	.vfs_extattrctl =	nullfs_extattrctl,
494 	.vfs_fhtovp =		nullfs_fhtovp,
495 	.vfs_init =		nullfs_init,
496 	.vfs_mount =		nullfs_mount,
497 	.vfs_quotactl =		nullfs_quotactl,
498 	.vfs_root =		nullfs_root,
499 	.vfs_statfs =		nullfs_statfs,
500 	.vfs_sync =		nullfs_sync,
501 	.vfs_uninit =		nullfs_uninit,
502 	.vfs_unmount =		nullfs_unmount,
503 	.vfs_vget =		nullfs_vget,
504 	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
505 	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
506 };
507 
508 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT);
509