1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1992, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software donated to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*
36 * Null Layer
37 * (See null_vnops.c for a description of what this does.)
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/sysctl.h>
50 #include <sys/vnode.h>
51 #include <sys/jail.h>
52
53 #include <fs/nullfs/null.h>
54
55 static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
56
57 static vfs_fhtovp_t nullfs_fhtovp;
58 static vfs_mount_t nullfs_mount;
59 static vfs_quotactl_t nullfs_quotactl;
60 static vfs_root_t nullfs_root;
61 static vfs_sync_t nullfs_sync;
62 static vfs_statfs_t nullfs_statfs;
63 static vfs_unmount_t nullfs_unmount;
64 static vfs_vget_t nullfs_vget;
65 static vfs_extattrctl_t nullfs_extattrctl;
66
67 SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs");
68
69 static bool null_cache_vnodes = true;
70 SYSCTL_BOOL(_vfs_nullfs, OID_AUTO, cache_vnodes, CTLFLAG_RWTUN,
71 &null_cache_vnodes, 0,
72 "cache free nullfs vnodes");
73
74 /*
75 * Mount null layer
76 */
77 static int
nullfs_mount(struct mount * mp)78 nullfs_mount(struct mount *mp)
79 {
80 struct vnode *lowerrootvp;
81 struct vnode *nullm_rootvp;
82 struct null_mount *xmp;
83 struct null_node *nn;
84 struct nameidata nd, *ndp;
85 char *target;
86 int error, len;
87 bool isvnunlocked;
88 static const char cache_opt_name[] = "cache";
89 static const char nocache_opt_name[] = "nocache";
90 static const char unixbypass_opt_name[] = "unixbypass";
91 static const char nounixbypass_opt_name[] = "nounixbypass";
92
93 NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
94
95 if (mp->mnt_flag & MNT_ROOTFS)
96 return (EOPNOTSUPP);
97
98 /*
99 * Update is a no-op
100 */
101 if (mp->mnt_flag & MNT_UPDATE) {
102 /*
103 * Only support update mounts for NFS export.
104 */
105 if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
106 return (0);
107 else
108 return (EOPNOTSUPP);
109 }
110
111 /*
112 * Get argument
113 */
114 error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len);
115 if (error != 0)
116 error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
117 if (error || target[len - 1] != '\0')
118 return (EINVAL);
119
120 /*
121 * Unlock lower node to avoid possible deadlock.
122 */
123 if (null_is_nullfs_vnode(mp->mnt_vnodecovered) &&
124 VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
125 VOP_UNLOCK(mp->mnt_vnodecovered);
126 isvnunlocked = true;
127 } else {
128 isvnunlocked = false;
129 }
130
131 /*
132 * Find lower node
133 */
134 ndp = &nd;
135 NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target);
136 error = namei(ndp);
137
138 /*
139 * Re-lock vnode.
140 * XXXKIB This is deadlock-prone as well.
141 */
142 if (isvnunlocked)
143 vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
144
145 if (error)
146 return (error);
147 NDFREE_PNBUF(ndp);
148
149 /*
150 * Sanity check on lower vnode
151 */
152 lowerrootvp = ndp->ni_vp;
153
154 /*
155 * Check multi null mount to avoid `lock against myself' panic.
156 */
157 if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) {
158 nn = VTONULL(mp->mnt_vnodecovered);
159 if (nn == NULL || lowerrootvp == nn->null_lowervp) {
160 NULLFSDEBUG("nullfs_mount: multi null mount?\n");
161 vput(lowerrootvp);
162 return (EDEADLK);
163 }
164 }
165
166 /*
167 * Lower vnode must be the same type as the covered vnode - we
168 * don't allow mounting directories to files or vice versa.
169 */
170 if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) ||
171 lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) {
172 NULLFSDEBUG("nullfs_mount: target must be same type as fspath");
173 vput(lowerrootvp);
174 return (EINVAL);
175 }
176
177 xmp = malloc(sizeof(struct null_mount), M_NULLFSMNT,
178 M_WAITOK | M_ZERO);
179
180 /*
181 * Save pointer to underlying FS and the reference to the
182 * lower root vnode.
183 */
184 xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
185 &xmp->upper_node);
186 if (xmp->nullm_vfs == NULL) {
187 vput(lowerrootvp);
188 free(xmp, M_NULLFSMNT);
189 return (ENOENT);
190 }
191 vref(lowerrootvp);
192 xmp->nullm_lowerrootvp = lowerrootvp;
193 mp->mnt_data = xmp;
194
195 /*
196 * Make sure the node alias worked.
197 */
198 error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
199 if (error != 0) {
200 vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
201 vrele(lowerrootvp);
202 free(xmp, M_NULLFSMNT);
203 return (error);
204 }
205
206 if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
207 MNT_ILOCK(mp);
208 mp->mnt_flag |= MNT_LOCAL;
209 MNT_IUNLOCK(mp);
210 }
211
212 if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) {
213 xmp->nullm_flags |= NULLM_CACHE;
214 } else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL,
215 NULL) == 0) {
216 ;
217 } else if (null_cache_vnodes &&
218 (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
219 xmp->nullm_flags |= NULLM_CACHE;
220 }
221
222 if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
223 vfs_register_for_notification(xmp->nullm_vfs, mp,
224 &xmp->notify_node);
225 }
226
227 if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) {
228 ;
229 } else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL,
230 NULL) == 0) {
231 xmp->nullm_flags |= NULLM_NOUNPBYPASS;
232 }
233
234 if (lowerrootvp == mp->mnt_vnodecovered) {
235 vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
236 lowerrootvp->v_vflag |= VV_CROSSLOCK;
237 VOP_UNLOCK(lowerrootvp);
238 }
239
240 MNT_ILOCK(mp);
241 if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
242 mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
243 (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
244 MNTK_EXTENDED_SHARED);
245 }
246 mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
247 mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
248 (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
249 MNT_IUNLOCK(mp);
250 vfs_getnewfsid(mp);
251 vfs_mountedfrom(mp, target);
252 vput(nullm_rootvp);
253
254 NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
255 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
256 return (0);
257 }
258
259 /*
260 * Free reference to null layer
261 */
262 static int
nullfs_unmount(struct mount * mp,int mntflags)263 nullfs_unmount(struct mount *mp, int mntflags)
264 {
265 struct null_mount *mntdata;
266 int error, flags;
267
268 NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
269
270 if (mntflags & MNT_FORCE)
271 flags = FORCECLOSE;
272 else
273 flags = 0;
274
275 for (;;) {
276 /* There is 1 extra root vnode reference (nullm_rootvp). */
277 error = vflush(mp, 0, flags, curthread);
278 if (error)
279 return (error);
280 MNT_ILOCK(mp);
281 if (mp->mnt_nvnodelistsize == 0) {
282 MNT_IUNLOCK(mp);
283 break;
284 }
285 MNT_IUNLOCK(mp);
286 if ((mntflags & MNT_FORCE) == 0)
287 return (EBUSY);
288 }
289
290 /*
291 * Finally, throw away the null_mount structure
292 */
293 mntdata = mp->mnt_data;
294 if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
295 vfs_unregister_for_notification(mntdata->nullm_vfs,
296 &mntdata->notify_node);
297 }
298 if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) {
299 vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
300 mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
301 VOP_UNLOCK(mp->mnt_vnodecovered);
302 }
303 vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
304 vrele(mntdata->nullm_lowerrootvp);
305 mp->mnt_data = NULL;
306 free(mntdata, M_NULLFSMNT);
307 return (0);
308 }
309
310 static int
nullfs_root(struct mount * mp,int flags,struct vnode ** vpp)311 nullfs_root(struct mount *mp, int flags, struct vnode **vpp)
312 {
313 struct vnode *vp;
314 struct null_mount *mntdata;
315 int error;
316
317 mntdata = MOUNTTONULLMOUNT(mp);
318 NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp,
319 mntdata->nullm_lowerrootvp);
320
321 error = vget(mntdata->nullm_lowerrootvp, flags);
322 if (error == 0) {
323 error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp);
324 if (error == 0) {
325 *vpp = vp;
326 }
327 }
328 return (error);
329 }
330
331 static int
nullfs_quotactl(struct mount * mp,int cmd,uid_t uid,void * arg,bool * mp_busy)332 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy)
333 {
334 struct mount *lowermp;
335 struct null_mount *mntdata;
336 int error;
337 bool unbusy;
338
339 mntdata = MOUNTTONULLMOUNT(mp);
340 lowermp = atomic_load_ptr(&mntdata->nullm_vfs);
341 KASSERT(*mp_busy == true, ("upper mount not busy"));
342 /*
343 * See comment in sys_quotactl() for an explanation of why the
344 * lower mount needs to be busied by the caller of VFS_QUOTACTL()
345 * but may be unbusied by the implementation. We must unbusy
346 * the upper mount for the same reason; otherwise a namei lookup
347 * issued by the VFS_QUOTACTL() implementation could traverse the
348 * upper mount and deadlock.
349 */
350 vfs_unbusy(mp);
351 *mp_busy = false;
352 unbusy = true;
353 error = vfs_busy(lowermp, 0);
354 if (error == 0)
355 error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy);
356 if (unbusy)
357 vfs_unbusy(lowermp);
358
359 return (error);
360 }
361
362 static int
nullfs_statfs(struct mount * mp,struct statfs * sbp)363 nullfs_statfs(struct mount *mp, struct statfs *sbp)
364 {
365 int error;
366 struct statfs *mstat;
367
368 NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
369 (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
370 (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
371
372 mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
373
374 error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat);
375 if (error) {
376 free(mstat, M_STATFS);
377 return (error);
378 }
379
380 sbp->f_type = mstat->f_type;
381 sbp->f_bsize = mstat->f_bsize;
382 sbp->f_iosize = mstat->f_iosize;
383 sbp->f_blocks = mstat->f_blocks;
384 sbp->f_bfree = mstat->f_bfree;
385 sbp->f_bavail = mstat->f_bavail;
386 sbp->f_files = mstat->f_files;
387 sbp->f_ffree = mstat->f_ffree;
388
389 free(mstat, M_STATFS);
390 return (0);
391 }
392
393 static int
nullfs_sync(struct mount * mp,int waitfor)394 nullfs_sync(struct mount *mp, int waitfor)
395 {
396 /*
397 * XXX - Assumes no data cached at null layer.
398 */
399 return (0);
400 }
401
402 static int
nullfs_vget(struct mount * mp,ino_t ino,int flags,struct vnode ** vpp)403 nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
404 {
405 int error;
406
407 KASSERT((flags & LK_TYPE_MASK) != 0,
408 ("nullfs_vget: no lock requested"));
409
410 error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
411 if (error != 0)
412 return (error);
413 return (null_nodeget(mp, *vpp, vpp));
414 }
415
416 static int
nullfs_fhtovp(struct mount * mp,struct fid * fidp,int flags,struct vnode ** vpp)417 nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp)
418 {
419 int error;
420
421 error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
422 vpp);
423 if (error != 0)
424 return (error);
425 return (null_nodeget(mp, *vpp, vpp));
426 }
427
428 static int
nullfs_extattrctl(struct mount * mp,int cmd,struct vnode * filename_vp,int namespace,const char * attrname)429 nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
430 int namespace, const char *attrname)
431 {
432
433 return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
434 filename_vp, namespace, attrname));
435 }
436
437 static void
nullfs_reclaim_lowervp(struct mount * mp,struct vnode * lowervp)438 nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
439 {
440 struct vnode *vp;
441
442 vp = null_hashget(mp, lowervp);
443 if (vp == NULL)
444 return;
445 VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
446 vgone(vp);
447 vput(vp);
448 }
449
450 static void
nullfs_unlink_lowervp(struct mount * mp,struct vnode * lowervp)451 nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
452 {
453 struct vnode *vp;
454 struct null_node *xp;
455
456 vp = null_hashget(mp, lowervp);
457 if (vp == NULL)
458 return;
459 xp = VTONULL(vp);
460 xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
461 vhold(vp);
462 vunref(vp);
463
464 if (vp->v_usecount == 0) {
465 /*
466 * If vunref() dropped the last use reference on the
467 * nullfs vnode, it must be reclaimed, and its lock
468 * was split from the lower vnode lock. Need to do
469 * extra unlock before allowing the final vdrop() to
470 * free the vnode.
471 */
472 KASSERT(VN_IS_DOOMED(vp),
473 ("not reclaimed nullfs vnode %p", vp));
474 VOP_UNLOCK(vp);
475 } else {
476 /*
477 * Otherwise, the nullfs vnode still shares the lock
478 * with the lower vnode, and must not be unlocked.
479 * Also clear the NULLV_NOUNLOCK, the flag is not
480 * relevant for future reclamations.
481 */
482 ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
483 KASSERT(!VN_IS_DOOMED(vp),
484 ("reclaimed nullfs vnode %p", vp));
485 xp->null_flags &= ~NULLV_NOUNLOCK;
486 }
487 vdrop(vp);
488 }
489
490 static struct vfsops null_vfsops = {
491 .vfs_extattrctl = nullfs_extattrctl,
492 .vfs_fhtovp = nullfs_fhtovp,
493 .vfs_init = nullfs_init,
494 .vfs_mount = nullfs_mount,
495 .vfs_quotactl = nullfs_quotactl,
496 .vfs_root = nullfs_root,
497 .vfs_statfs = nullfs_statfs,
498 .vfs_sync = nullfs_sync,
499 .vfs_uninit = nullfs_uninit,
500 .vfs_unmount = nullfs_unmount,
501 .vfs_vget = nullfs_vget,
502 .vfs_reclaim_lowervp = nullfs_reclaim_lowervp,
503 .vfs_unlink_lowervp = nullfs_unlink_lowervp,
504 };
505
506 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT);
507