1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1992, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software donated to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * Null Layer 37 * (See null_vnops.c for a description of what this does.) 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/namei.h> 48 #include <sys/proc.h> 49 #include <sys/sysctl.h> 50 #include <sys/vnode.h> 51 #include <sys/jail.h> 52 53 #include <fs/nullfs/null.h> 54 55 static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure"); 56 57 static vfs_fhtovp_t nullfs_fhtovp; 58 static vfs_mount_t nullfs_mount; 59 static vfs_quotactl_t nullfs_quotactl; 60 static vfs_root_t nullfs_root; 61 static vfs_sync_t nullfs_sync; 62 static vfs_statfs_t nullfs_statfs; 63 static vfs_unmount_t nullfs_unmount; 64 static vfs_vget_t nullfs_vget; 65 static vfs_extattrctl_t nullfs_extattrctl; 66 67 SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs"); 68 69 static bool null_cache_vnodes = true; 70 SYSCTL_BOOL(_vfs_nullfs, OID_AUTO, cache_vnodes, CTLFLAG_RWTUN, 71 &null_cache_vnodes, 0, 72 "cache free nullfs vnodes"); 73 74 /* 75 * Mount null layer 76 */ 77 static int 78 nullfs_mount(struct mount *mp) 79 { 80 struct vnode *lowerrootvp; 81 struct vnode *nullm_rootvp; 82 struct null_mount *xmp; 83 struct null_node *nn; 84 struct nameidata nd, *ndp; 85 char *target; 86 int error, len; 87 bool isvnunlocked; 88 static const char cache_opt_name[] = "cache"; 89 static const char nocache_opt_name[] = "nocache"; 90 static const char unixbypass_opt_name[] = "unixbypass"; 91 static const char nounixbypass_opt_name[] = "nounixbypass"; 92 93 NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp); 94 95 if (mp->mnt_flag & MNT_ROOTFS) 96 return (EOPNOTSUPP); 97 98 /* 99 * Update is a no-op 100 */ 101 if (mp->mnt_flag & MNT_UPDATE) { 102 /* 103 * Only support update mounts for NFS export. 104 */ 105 if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) 106 return (0); 107 else 108 return (EOPNOTSUPP); 109 } 110 111 /* 112 * Get argument 113 */ 114 error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len); 115 if (error != 0) 116 error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len); 117 if (error || target[len - 1] != '\0') 118 return (EINVAL); 119 120 /* 121 * Unlock lower node to avoid possible deadlock. 122 */ 123 if (null_is_nullfs_vnode(mp->mnt_vnodecovered) && 124 VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) { 125 VOP_UNLOCK(mp->mnt_vnodecovered); 126 isvnunlocked = true; 127 } else { 128 isvnunlocked = false; 129 } 130 131 /* 132 * Find lower node 133 */ 134 ndp = &nd; 135 NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target); 136 error = namei(ndp); 137 138 /* 139 * Re-lock vnode. 140 * XXXKIB This is deadlock-prone as well. 141 */ 142 if (isvnunlocked) 143 vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); 144 145 if (error) 146 return (error); 147 NDFREE_PNBUF(ndp); 148 149 /* 150 * Sanity check on lower vnode 151 */ 152 lowerrootvp = ndp->ni_vp; 153 154 /* 155 * Do not allow to mount a vnode over itself. 156 */ 157 if (mp->mnt_vnodecovered == lowerrootvp) { 158 vput(lowerrootvp); 159 return (EDEADLK); 160 } 161 162 /* 163 * Check multi null mount to avoid `lock against myself' panic. 164 */ 165 if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) { 166 nn = VTONULL(mp->mnt_vnodecovered); 167 if (nn == NULL || lowerrootvp == nn->null_lowervp) { 168 NULLFSDEBUG("nullfs_mount: multi null mount?\n"); 169 vput(lowerrootvp); 170 return (EDEADLK); 171 } 172 } 173 174 /* 175 * Lower vnode must be the same type as the covered vnode - we 176 * don't allow mounting directories to files or vice versa. 177 */ 178 if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) || 179 lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) { 180 NULLFSDEBUG("nullfs_mount: target must be same type as fspath"); 181 vput(lowerrootvp); 182 return (EINVAL); 183 } 184 185 xmp = malloc(sizeof(struct null_mount), M_NULLFSMNT, 186 M_WAITOK | M_ZERO); 187 188 /* 189 * Save pointer to underlying FS and the reference to the 190 * lower root vnode. 191 */ 192 xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp, 193 &xmp->upper_node); 194 if (xmp->nullm_vfs == NULL) { 195 vput(lowerrootvp); 196 free(xmp, M_NULLFSMNT); 197 return (ENOENT); 198 } 199 vref(lowerrootvp); 200 xmp->nullm_lowerrootvp = lowerrootvp; 201 mp->mnt_data = xmp; 202 203 /* 204 * Make sure the node alias worked. 205 */ 206 error = null_nodeget(mp, lowerrootvp, &nullm_rootvp); 207 if (error != 0) { 208 vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node); 209 vrele(lowerrootvp); 210 free(xmp, M_NULLFSMNT); 211 return (error); 212 } 213 214 if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) { 215 MNT_ILOCK(mp); 216 mp->mnt_flag |= MNT_LOCAL; 217 MNT_IUNLOCK(mp); 218 } 219 220 if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) { 221 xmp->nullm_flags |= NULLM_CACHE; 222 } else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL, 223 NULL) == 0) { 224 ; 225 } else if (null_cache_vnodes && 226 (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) { 227 xmp->nullm_flags |= NULLM_CACHE; 228 } 229 230 if ((xmp->nullm_flags & NULLM_CACHE) != 0) { 231 vfs_register_for_notification(xmp->nullm_vfs, mp, 232 &xmp->notify_node); 233 } 234 235 if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) { 236 ; 237 } else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL, 238 NULL) == 0) { 239 xmp->nullm_flags |= NULLM_NOUNPBYPASS; 240 } 241 242 if (lowerrootvp == mp->mnt_vnodecovered) { 243 vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 244 lowerrootvp->v_vflag |= VV_CROSSLOCK; 245 VOP_UNLOCK(lowerrootvp); 246 } 247 248 MNT_ILOCK(mp); 249 if ((xmp->nullm_flags & NULLM_CACHE) != 0) { 250 mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & 251 (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED | 252 MNTK_EXTENDED_SHARED); 253 } 254 mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE; 255 mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & 256 (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS); 257 MNT_IUNLOCK(mp); 258 vfs_getnewfsid(mp); 259 vfs_mountedfrom(mp, target); 260 vput(nullm_rootvp); 261 262 NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", 263 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); 264 return (0); 265 } 266 267 /* 268 * Free reference to null layer 269 */ 270 static int 271 nullfs_unmount(struct mount *mp, int mntflags) 272 { 273 struct null_mount *mntdata; 274 int error, flags; 275 276 NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp); 277 278 if (mntflags & MNT_FORCE) 279 flags = FORCECLOSE; 280 else 281 flags = 0; 282 283 for (;;) { 284 /* There is 1 extra root vnode reference (nullm_rootvp). */ 285 error = vflush(mp, 0, flags, curthread); 286 if (error) 287 return (error); 288 MNT_ILOCK(mp); 289 if (mp->mnt_nvnodelistsize == 0) { 290 MNT_IUNLOCK(mp); 291 break; 292 } 293 MNT_IUNLOCK(mp); 294 if ((mntflags & MNT_FORCE) == 0) 295 return (EBUSY); 296 } 297 298 /* 299 * Finally, throw away the null_mount structure 300 */ 301 mntdata = mp->mnt_data; 302 if ((mntdata->nullm_flags & NULLM_CACHE) != 0) { 303 vfs_unregister_for_notification(mntdata->nullm_vfs, 304 &mntdata->notify_node); 305 } 306 if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) { 307 vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 308 mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK; 309 VOP_UNLOCK(mp->mnt_vnodecovered); 310 } 311 vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node); 312 vrele(mntdata->nullm_lowerrootvp); 313 mp->mnt_data = NULL; 314 free(mntdata, M_NULLFSMNT); 315 return (0); 316 } 317 318 static int 319 nullfs_root(struct mount *mp, int flags, struct vnode **vpp) 320 { 321 struct vnode *vp; 322 struct null_mount *mntdata; 323 int error; 324 325 mntdata = MOUNTTONULLMOUNT(mp); 326 NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp, 327 mntdata->nullm_lowerrootvp); 328 329 error = vget(mntdata->nullm_lowerrootvp, flags); 330 if (error == 0) { 331 error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp); 332 if (error == 0) { 333 *vpp = vp; 334 } 335 } 336 return (error); 337 } 338 339 static int 340 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy) 341 { 342 struct mount *lowermp; 343 struct null_mount *mntdata; 344 int error; 345 bool unbusy; 346 347 mntdata = MOUNTTONULLMOUNT(mp); 348 lowermp = atomic_load_ptr(&mntdata->nullm_vfs); 349 KASSERT(*mp_busy == true, ("upper mount not busy")); 350 /* 351 * See comment in sys_quotactl() for an explanation of why the 352 * lower mount needs to be busied by the caller of VFS_QUOTACTL() 353 * but may be unbusied by the implementation. We must unbusy 354 * the upper mount for the same reason; otherwise a namei lookup 355 * issued by the VFS_QUOTACTL() implementation could traverse the 356 * upper mount and deadlock. 357 */ 358 vfs_unbusy(mp); 359 *mp_busy = false; 360 unbusy = true; 361 error = vfs_busy(lowermp, 0); 362 if (error == 0) 363 error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy); 364 if (unbusy) 365 vfs_unbusy(lowermp); 366 367 return (error); 368 } 369 370 static int 371 nullfs_statfs(struct mount *mp, struct statfs *sbp) 372 { 373 int error; 374 struct statfs *mstat; 375 376 NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp, 377 (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp, 378 (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)); 379 380 mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO); 381 382 error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat); 383 if (error) { 384 free(mstat, M_STATFS); 385 return (error); 386 } 387 388 sbp->f_type = mstat->f_type; 389 sbp->f_bsize = mstat->f_bsize; 390 sbp->f_iosize = mstat->f_iosize; 391 sbp->f_blocks = mstat->f_blocks; 392 sbp->f_bfree = mstat->f_bfree; 393 sbp->f_bavail = mstat->f_bavail; 394 sbp->f_files = mstat->f_files; 395 sbp->f_ffree = mstat->f_ffree; 396 397 free(mstat, M_STATFS); 398 return (0); 399 } 400 401 static int 402 nullfs_sync(struct mount *mp, int waitfor) 403 { 404 /* 405 * XXX - Assumes no data cached at null layer. 406 */ 407 return (0); 408 } 409 410 static int 411 nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 412 { 413 int error; 414 415 KASSERT((flags & LK_TYPE_MASK) != 0, 416 ("nullfs_vget: no lock requested")); 417 418 error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp); 419 if (error != 0) 420 return (error); 421 return (null_nodeget(mp, *vpp, vpp)); 422 } 423 424 static int 425 nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp) 426 { 427 int error; 428 429 error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags, 430 vpp); 431 if (error != 0) 432 return (error); 433 return (null_nodeget(mp, *vpp, vpp)); 434 } 435 436 static int 437 nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, 438 int namespace, const char *attrname) 439 { 440 441 return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, 442 filename_vp, namespace, attrname)); 443 } 444 445 static void 446 nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp) 447 { 448 struct vnode *vp; 449 450 vp = null_hashget(mp, lowervp); 451 if (vp == NULL) 452 return; 453 VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; 454 vgone(vp); 455 vput(vp); 456 } 457 458 static void 459 nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp) 460 { 461 struct vnode *vp; 462 struct null_node *xp; 463 464 vp = null_hashget(mp, lowervp); 465 if (vp == NULL) 466 return; 467 xp = VTONULL(vp); 468 xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK; 469 vhold(vp); 470 vunref(vp); 471 472 if (VN_IS_DOOMED(vp)) { 473 /* 474 * If the vnode is doomed, its lock was split from the lower 475 * vnode lock. Therefore we need to do an extra unlock before 476 * allowing the final vdrop() to free the vnode. 477 */ 478 VOP_UNLOCK(vp); 479 } else { 480 /* 481 * Otherwise, the nullfs vnode still shares the lock 482 * with the lower vnode, and must not be unlocked. 483 * Also clear the NULLV_NOUNLOCK, the flag is not 484 * relevant for future reclamations. 485 */ 486 ASSERT_VOP_ELOCKED(vp, "unlink_lowervp"); 487 xp->null_flags &= ~NULLV_NOUNLOCK; 488 } 489 vdrop(vp); 490 } 491 492 static struct vfsops null_vfsops = { 493 .vfs_extattrctl = nullfs_extattrctl, 494 .vfs_fhtovp = nullfs_fhtovp, 495 .vfs_init = nullfs_init, 496 .vfs_mount = nullfs_mount, 497 .vfs_quotactl = nullfs_quotactl, 498 .vfs_root = nullfs_root, 499 .vfs_statfs = nullfs_statfs, 500 .vfs_sync = nullfs_sync, 501 .vfs_uninit = nullfs_uninit, 502 .vfs_unmount = nullfs_unmount, 503 .vfs_vget = nullfs_vget, 504 .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, 505 .vfs_unlink_lowervp = nullfs_unlink_lowervp, 506 }; 507 508 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT); 509