xref: /freebsd/sys/fs/nullfs/null_vfsops.c (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1992, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Null Layer
37  * (See null_vnops.c for a description of what this does.)
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/vnode.h>
50 #include <sys/jail.h>
51 
52 #include <fs/nullfs/null.h>
53 
54 static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
55 
56 static vfs_fhtovp_t	nullfs_fhtovp;
57 static vfs_mount_t	nullfs_mount;
58 static vfs_quotactl_t	nullfs_quotactl;
59 static vfs_root_t	nullfs_root;
60 static vfs_sync_t	nullfs_sync;
61 static vfs_statfs_t	nullfs_statfs;
62 static vfs_unmount_t	nullfs_unmount;
63 static vfs_vget_t	nullfs_vget;
64 static vfs_extattrctl_t	nullfs_extattrctl;
65 
66 /*
67  * Mount null layer
68  */
69 static int
70 nullfs_mount(struct mount *mp)
71 {
72 	struct vnode *lowerrootvp;
73 	struct vnode *nullm_rootvp;
74 	struct null_mount *xmp;
75 	struct null_node *nn;
76 	struct nameidata nd, *ndp;
77 	char *target;
78 	int error, len;
79 	bool isvnunlocked;
80 
81 	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
82 
83 	if (mp->mnt_flag & MNT_ROOTFS)
84 		return (EOPNOTSUPP);
85 
86 	/*
87 	 * Update is a no-op
88 	 */
89 	if (mp->mnt_flag & MNT_UPDATE) {
90 		/*
91 		 * Only support update mounts for NFS export.
92 		 */
93 		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
94 			return (0);
95 		else
96 			return (EOPNOTSUPP);
97 	}
98 
99 	/*
100 	 * Get argument
101 	 */
102 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len);
103 	if (error != 0)
104 		error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
105 	if (error || target[len - 1] != '\0')
106 		return (EINVAL);
107 
108 	/*
109 	 * Unlock lower node to avoid possible deadlock.
110 	 */
111 	if (mp->mnt_vnodecovered->v_op == &null_vnodeops &&
112 	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
113 		VOP_UNLOCK(mp->mnt_vnodecovered);
114 		isvnunlocked = true;
115 	} else {
116 		isvnunlocked = false;
117 	}
118 
119 	/*
120 	 * Find lower node
121 	 */
122 	ndp = &nd;
123 	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target);
124 	error = namei(ndp);
125 
126 	/*
127 	 * Re-lock vnode.
128 	 * XXXKIB This is deadlock-prone as well.
129 	 */
130 	if (isvnunlocked)
131 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
132 
133 	if (error)
134 		return (error);
135 	NDFREE_PNBUF(ndp);
136 
137 	/*
138 	 * Sanity check on lower vnode
139 	 */
140 	lowerrootvp = ndp->ni_vp;
141 
142 	/*
143 	 * Check multi null mount to avoid `lock against myself' panic.
144 	 */
145 	if (mp->mnt_vnodecovered->v_op == &null_vnodeops) {
146 		nn = VTONULL(mp->mnt_vnodecovered);
147 		if (nn == NULL || lowerrootvp == nn->null_lowervp) {
148 			NULLFSDEBUG("nullfs_mount: multi null mount?\n");
149 			vput(lowerrootvp);
150 			return (EDEADLK);
151 		}
152 	}
153 
154 	/*
155 	 * Lower vnode must be the same type as the covered vnode - we
156 	 * don't allow mounting directories to files or vice versa.
157 	 */
158 	if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) ||
159 	    lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) {
160 		NULLFSDEBUG("nullfs_mount: target must be same type as fspath");
161 		vput(lowerrootvp);
162 		return (EINVAL);
163 	}
164 
165 	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
166 	    M_NULLFSMNT, M_WAITOK | M_ZERO);
167 
168 	/*
169 	 * Save pointer to underlying FS and the reference to the
170 	 * lower root vnode.
171 	 */
172 	xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
173 	    &xmp->upper_node);
174 	if (xmp->nullm_vfs == NULL) {
175 		vput(lowerrootvp);
176 		free(xmp, M_NULLFSMNT);
177 		return (ENOENT);
178 	}
179 	vref(lowerrootvp);
180 	xmp->nullm_lowerrootvp = lowerrootvp;
181 	mp->mnt_data = xmp;
182 
183 	/*
184 	 * Make sure the node alias worked.
185 	 */
186 	error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
187 	if (error != 0) {
188 		vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
189 		vrele(lowerrootvp);
190 		free(xmp, M_NULLFSMNT);
191 		return (error);
192 	}
193 
194 	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
195 		MNT_ILOCK(mp);
196 		mp->mnt_flag |= MNT_LOCAL;
197 		MNT_IUNLOCK(mp);
198 	}
199 
200 	xmp->nullm_flags |= NULLM_CACHE;
201 	if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0 ||
202 	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) != 0)
203 		xmp->nullm_flags &= ~NULLM_CACHE;
204 
205 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
206 		vfs_register_for_notification(xmp->nullm_vfs, mp,
207 		    &xmp->notify_node);
208 	}
209 
210 	if (lowerrootvp == mp->mnt_vnodecovered) {
211 		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
212 		lowerrootvp->v_vflag |= VV_CROSSLOCK;
213 		VOP_UNLOCK(lowerrootvp);
214 	}
215 
216 	MNT_ILOCK(mp);
217 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
218 		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
219 		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
220 		    MNTK_EXTENDED_SHARED);
221 	}
222 	mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
223 	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
224 	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
225 	MNT_IUNLOCK(mp);
226 	vfs_getnewfsid(mp);
227 	vfs_mountedfrom(mp, target);
228 	vput(nullm_rootvp);
229 
230 	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
231 		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
232 	return (0);
233 }
234 
235 /*
236  * Free reference to null layer
237  */
238 static int
239 nullfs_unmount(struct mount *mp, int mntflags)
240 {
241 	struct null_mount *mntdata;
242 	int error, flags;
243 
244 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
245 
246 	if (mntflags & MNT_FORCE)
247 		flags = FORCECLOSE;
248 	else
249 		flags = 0;
250 
251 	for (;;) {
252 		/* There is 1 extra root vnode reference (nullm_rootvp). */
253 		error = vflush(mp, 0, flags, curthread);
254 		if (error)
255 			return (error);
256 		MNT_ILOCK(mp);
257 		if (mp->mnt_nvnodelistsize == 0) {
258 			MNT_IUNLOCK(mp);
259 			break;
260 		}
261 		MNT_IUNLOCK(mp);
262 		if ((mntflags & MNT_FORCE) == 0)
263 			return (EBUSY);
264 	}
265 
266 	/*
267 	 * Finally, throw away the null_mount structure
268 	 */
269 	mntdata = mp->mnt_data;
270 	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
271 		vfs_unregister_for_notification(mntdata->nullm_vfs,
272 		    &mntdata->notify_node);
273 	}
274 	if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) {
275 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
276 		mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
277 		VOP_UNLOCK(mp->mnt_vnodecovered);
278 	}
279 	vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
280 	vrele(mntdata->nullm_lowerrootvp);
281 	mp->mnt_data = NULL;
282 	free(mntdata, M_NULLFSMNT);
283 	return (0);
284 }
285 
286 static int
287 nullfs_root(struct mount *mp, int flags, struct vnode **vpp)
288 {
289 	struct vnode *vp;
290 	struct null_mount *mntdata;
291 	int error;
292 
293 	mntdata = MOUNTTONULLMOUNT(mp);
294 	NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp,
295 	    mntdata->nullm_lowerrootvp);
296 
297 	error = vget(mntdata->nullm_lowerrootvp, flags);
298 	if (error == 0) {
299 		error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp);
300 		if (error == 0) {
301 			*vpp = vp;
302 		}
303 	}
304 	return (error);
305 }
306 
307 static int
308 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy)
309 {
310 	struct mount *lowermp;
311 	struct null_mount *mntdata;
312 	int error;
313 	bool unbusy;
314 
315 	mntdata = MOUNTTONULLMOUNT(mp);
316 	lowermp = atomic_load_ptr(&mntdata->nullm_vfs);
317 	KASSERT(*mp_busy == true, ("upper mount not busy"));
318 	/*
319 	 * See comment in sys_quotactl() for an explanation of why the
320 	 * lower mount needs to be busied by the caller of VFS_QUOTACTL()
321 	 * but may be unbusied by the implementation.  We must unbusy
322 	 * the upper mount for the same reason; otherwise a namei lookup
323 	 * issued by the VFS_QUOTACTL() implementation could traverse the
324 	 * upper mount and deadlock.
325 	 */
326 	vfs_unbusy(mp);
327 	*mp_busy = false;
328 	unbusy = true;
329 	error = vfs_busy(lowermp, 0);
330 	if (error == 0)
331 		error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy);
332 	if (unbusy)
333 		vfs_unbusy(lowermp);
334 
335 	return (error);
336 }
337 
338 static int
339 nullfs_statfs(struct mount *mp, struct statfs *sbp)
340 {
341 	int error;
342 	struct statfs *mstat;
343 
344 	NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
345 	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
346 	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
347 
348 	mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
349 
350 	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat);
351 	if (error) {
352 		free(mstat, M_STATFS);
353 		return (error);
354 	}
355 
356 	/* now copy across the "interesting" information and fake the rest */
357 	sbp->f_type = mstat->f_type;
358 	sbp->f_flags = (sbp->f_flags & (MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID |
359 	    MNT_UNION | MNT_NOSYMFOLLOW | MNT_AUTOMOUNTED | MNT_IGNORE)) |
360 	    (mstat->f_flags & ~(MNT_ROOTFS | MNT_AUTOMOUNTED));
361 	sbp->f_bsize = mstat->f_bsize;
362 	sbp->f_iosize = mstat->f_iosize;
363 	sbp->f_blocks = mstat->f_blocks;
364 	sbp->f_bfree = mstat->f_bfree;
365 	sbp->f_bavail = mstat->f_bavail;
366 	sbp->f_files = mstat->f_files;
367 	sbp->f_ffree = mstat->f_ffree;
368 
369 	free(mstat, M_STATFS);
370 	return (0);
371 }
372 
373 static int
374 nullfs_sync(struct mount *mp, int waitfor)
375 {
376 	/*
377 	 * XXX - Assumes no data cached at null layer.
378 	 */
379 	return (0);
380 }
381 
382 static int
383 nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
384 {
385 	int error;
386 
387 	KASSERT((flags & LK_TYPE_MASK) != 0,
388 	    ("nullfs_vget: no lock requested"));
389 
390 	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
391 	if (error != 0)
392 		return (error);
393 	return (null_nodeget(mp, *vpp, vpp));
394 }
395 
396 static int
397 nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp)
398 {
399 	int error;
400 
401 	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
402 	    vpp);
403 	if (error != 0)
404 		return (error);
405 	return (null_nodeget(mp, *vpp, vpp));
406 }
407 
408 static int
409 nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
410     int namespace, const char *attrname)
411 {
412 
413 	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
414 	    filename_vp, namespace, attrname));
415 }
416 
417 static void
418 nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
419 {
420 	struct vnode *vp;
421 
422 	vp = null_hashget(mp, lowervp);
423 	if (vp == NULL)
424 		return;
425 	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
426 	vgone(vp);
427 	vput(vp);
428 }
429 
430 static void
431 nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
432 {
433 	struct vnode *vp;
434 	struct null_node *xp;
435 
436 	vp = null_hashget(mp, lowervp);
437 	if (vp == NULL)
438 		return;
439 	xp = VTONULL(vp);
440 	xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
441 	vhold(vp);
442 	vunref(vp);
443 
444 	if (vp->v_usecount == 0) {
445 		/*
446 		 * If vunref() dropped the last use reference on the
447 		 * nullfs vnode, it must be reclaimed, and its lock
448 		 * was split from the lower vnode lock.  Need to do
449 		 * extra unlock before allowing the final vdrop() to
450 		 * free the vnode.
451 		 */
452 		KASSERT(VN_IS_DOOMED(vp),
453 		    ("not reclaimed nullfs vnode %p", vp));
454 		VOP_UNLOCK(vp);
455 	} else {
456 		/*
457 		 * Otherwise, the nullfs vnode still shares the lock
458 		 * with the lower vnode, and must not be unlocked.
459 		 * Also clear the NULLV_NOUNLOCK, the flag is not
460 		 * relevant for future reclamations.
461 		 */
462 		ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
463 		KASSERT(!VN_IS_DOOMED(vp),
464 		    ("reclaimed nullfs vnode %p", vp));
465 		xp->null_flags &= ~NULLV_NOUNLOCK;
466 	}
467 	vdrop(vp);
468 }
469 
470 static struct vfsops null_vfsops = {
471 	.vfs_extattrctl =	nullfs_extattrctl,
472 	.vfs_fhtovp =		nullfs_fhtovp,
473 	.vfs_init =		nullfs_init,
474 	.vfs_mount =		nullfs_mount,
475 	.vfs_quotactl =		nullfs_quotactl,
476 	.vfs_root =		nullfs_root,
477 	.vfs_statfs =		nullfs_statfs,
478 	.vfs_sync =		nullfs_sync,
479 	.vfs_uninit =		nullfs_uninit,
480 	.vfs_unmount =		nullfs_unmount,
481 	.vfs_vget =		nullfs_vget,
482 	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
483 	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
484 };
485 
486 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT);
487