1 /* 2 * Copyright (c) 1994, 1995 The Regents of the University of California. 3 * Copyright (c) 1994, 1995 Jan-Simon Pendry. 4 * All rights reserved. 5 * 6 * This code is derived from software donated to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 38 * $FreeBSD$ 39 */ 40 41 /* 42 * Union Layer 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/vnode.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/malloc.h> 53 #include <sys/filedesc.h> 54 #include <miscfs/union/union.h> 55 #include <vm/vm_zone.h> 56 57 static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure"); 58 59 extern int union_init __P((struct vfsconf *)); 60 static int union_mount __P((struct mount *mp, char *path, caddr_t data, 61 struct nameidata *ndp, struct proc *p)); 62 static int union_root __P((struct mount *mp, struct vnode **vpp)); 63 static int union_statfs __P((struct mount *mp, struct statfs *sbp, 64 struct proc *p)); 65 static int union_unmount __P((struct mount *mp, int mntflags, 66 struct proc *p)); 67 68 /* 69 * Mount union filesystem 70 */ 71 static int 72 union_mount(mp, path, data, ndp, p) 73 struct mount *mp; 74 char *path; 75 caddr_t data; 76 struct nameidata *ndp; 77 struct proc *p; 78 { 79 int error = 0; 80 struct union_args args; 81 struct vnode *lowerrootvp = NULLVP; 82 struct vnode *upperrootvp = NULLVP; 83 struct union_mount *um = 0; 84 struct ucred *cred = 0; 85 char *cp = 0; 86 int len; 87 u_int size; 88 89 UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); 90 91 /* 92 * Disable clustered write, otherwise system becomes unstable. 93 */ 94 mp->mnt_flag |= MNT_NOCLUSTERW; 95 96 /* 97 * Update is a no-op 98 */ 99 if (mp->mnt_flag & MNT_UPDATE) { 100 /* 101 * Need to provide. 102 * 1. a way to convert between rdonly and rdwr mounts. 103 * 2. support for nfs exports. 104 */ 105 error = EOPNOTSUPP; 106 goto bad; 107 } 108 109 /* 110 * Get argument 111 */ 112 error = copyin(data, (caddr_t)&args, sizeof(struct union_args)); 113 if (error) 114 goto bad; 115 116 /* 117 * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. 118 * We need to reference it but not lock it. 119 */ 120 121 lowerrootvp = mp->mnt_vnodecovered; 122 VREF(lowerrootvp); 123 124 #if 0 125 /* 126 * Unlock lower node to avoid deadlock. 127 */ 128 if (lowerrootvp->v_op == union_vnodeop_p) 129 VOP_UNLOCK(lowerrootvp, 0, p); 130 #endif 131 132 /* 133 * Obtain upper vnode by calling namei() on the path. The 134 * upperrootvp will be turned referenced but not locked. 135 */ 136 NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, 137 UIO_USERSPACE, args.target, p); 138 139 error = namei(ndp); 140 141 #if 0 142 if (lowerrootvp->v_op == union_vnodeop_p) 143 vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); 144 #endif 145 if (error) 146 goto bad; 147 148 NDFREE(ndp, NDF_ONLY_PNBUF); 149 upperrootvp = ndp->ni_vp; 150 vrele(ndp->ni_dvp); 151 ndp->ni_dvp = NULL; 152 153 UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, 154 VOP_ISLOCKED(upperrootvp, NULL))); 155 156 /* 157 * Check multi union mount to avoid `lock myself again' panic. 158 * Also require that it be a directory. 159 */ 160 if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { 161 #ifdef DIAGNOSTIC 162 printf("union_mount: multi union mount?\n"); 163 #endif 164 error = EDEADLK; 165 goto bad; 166 } 167 168 if (upperrootvp->v_type != VDIR) { 169 error = EINVAL; 170 goto bad; 171 } 172 173 /* 174 * Allocate our union_mount structure and populate the fields. 175 * The vnode references are stored in the union_mount as held, 176 * unlocked references. Depending on the _BELOW flag, the 177 * filesystems are viewed in a different order. In effect this 178 * is the same as providing a mount-under option to the mount 179 * syscall. 180 */ 181 182 um = (struct union_mount *) malloc(sizeof(struct union_mount), 183 M_UNIONFSMNT, M_WAITOK); 184 185 bzero(um, sizeof(struct union_mount)); 186 187 um->um_op = args.mntflags & UNMNT_OPMASK; 188 189 switch (um->um_op) { 190 case UNMNT_ABOVE: 191 um->um_lowervp = lowerrootvp; 192 um->um_uppervp = upperrootvp; 193 upperrootvp = NULL; 194 lowerrootvp = NULL; 195 break; 196 197 case UNMNT_BELOW: 198 um->um_lowervp = upperrootvp; 199 um->um_uppervp = lowerrootvp; 200 upperrootvp = NULL; 201 lowerrootvp = NULL; 202 break; 203 204 case UNMNT_REPLACE: 205 vrele(lowerrootvp); 206 lowerrootvp = NULL; 207 um->um_uppervp = upperrootvp; 208 um->um_lowervp = lowerrootvp; 209 upperrootvp = NULL; 210 break; 211 212 default: 213 error = EINVAL; 214 goto bad; 215 } 216 217 /* 218 * Unless the mount is readonly, ensure that the top layer 219 * supports whiteout operations 220 */ 221 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 222 error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); 223 if (error) 224 goto bad; 225 } 226 227 um->um_cred = p->p_ucred; 228 crhold(um->um_cred); 229 um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask; 230 231 /* 232 * Depending on what you think the MNT_LOCAL flag might mean, 233 * you may want the && to be || on the conditional below. 234 * At the moment it has been defined that the filesystem is 235 * only local if it is all local, ie the MNT_LOCAL flag implies 236 * that the entire namespace is local. If you think the MNT_LOCAL 237 * flag implies that some of the files might be stored locally 238 * then you will want to change the conditional. 239 */ 240 if (um->um_op == UNMNT_ABOVE) { 241 if (((um->um_lowervp == NULLVP) || 242 (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && 243 (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) 244 mp->mnt_flag |= MNT_LOCAL; 245 } 246 247 /* 248 * Copy in the upper layer's RDONLY flag. This is for the benefit 249 * of lookup() which explicitly checks the flag, rather than asking 250 * the filesystem for its own opinion. This means, that an update 251 * mount of the underlying filesystem to go from rdonly to rdwr 252 * will leave the unioned view as read-only. 253 */ 254 mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); 255 256 mp->mnt_data = (qaddr_t) um; 257 vfs_getnewfsid(mp); 258 259 (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); 260 bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); 261 262 switch (um->um_op) { 263 case UNMNT_ABOVE: 264 cp = "<above>:"; 265 break; 266 case UNMNT_BELOW: 267 cp = "<below>:"; 268 break; 269 case UNMNT_REPLACE: 270 cp = ""; 271 break; 272 } 273 len = strlen(cp); 274 bcopy(cp, mp->mnt_stat.f_mntfromname, len); 275 276 cp = mp->mnt_stat.f_mntfromname + len; 277 len = MNAMELEN - len; 278 279 (void) copyinstr(args.target, cp, len - 1, &size); 280 bzero(cp + size, len - size); 281 282 (void)union_statfs(mp, &mp->mnt_stat, p); 283 284 UDEBUG(("union_mount: from %s, on %s\n", 285 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); 286 return (0); 287 288 bad: 289 if (um) { 290 if (um->um_uppervp) 291 vrele(um->um_uppervp); 292 if (um->um_lowervp) 293 vrele(um->um_lowervp); 294 /* XXX other fields */ 295 free(um, M_UNIONFSMNT); 296 } 297 if (cred) 298 crfree(cred); 299 if (upperrootvp) 300 vrele(upperrootvp); 301 if (lowerrootvp) 302 vrele(lowerrootvp); 303 return (error); 304 } 305 306 /* 307 * Free reference to union layer 308 */ 309 static int 310 union_unmount(mp, mntflags, p) 311 struct mount *mp; 312 int mntflags; 313 struct proc *p; 314 { 315 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 316 struct vnode *um_rootvp; 317 int error; 318 int freeing; 319 int flags = 0; 320 321 UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); 322 323 if (mntflags & MNT_FORCE) 324 flags |= FORCECLOSE; 325 326 if ((error = union_root(mp, &um_rootvp)) != 0) 327 return (error); 328 329 /* 330 * Keep flushing vnodes from the mount list. 331 * This is needed because of the un_pvp held 332 * reference to the parent vnode. 333 * If more vnodes have been freed on a given pass, 334 * the try again. The loop will iterate at most 335 * (d) times, where (d) is the maximum tree depth 336 * in the filesystem. 337 */ 338 for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) { 339 struct vnode *vp; 340 int n; 341 342 /* count #vnodes held on mount list */ 343 for (n = 0, vp = mp->mnt_vnodelist.lh_first; 344 vp != NULLVP; 345 vp = vp->v_mntvnodes.le_next) 346 n++; 347 348 /* if this is unchanged then stop */ 349 if (n == freeing) 350 break; 351 352 /* otherwise try once more time */ 353 freeing = n; 354 } 355 356 /* At this point the root vnode should have a single reference */ 357 if (um_rootvp->v_usecount > 1) { 358 vput(um_rootvp); 359 return (EBUSY); 360 } 361 362 #ifdef DEBUG 363 vprint("union root", um_rootvp); 364 #endif 365 /* 366 * Discard references to upper and lower target vnodes. 367 */ 368 if (um->um_lowervp) 369 vrele(um->um_lowervp); 370 vrele(um->um_uppervp); 371 crfree(um->um_cred); 372 /* 373 * Release reference on underlying root vnode 374 */ 375 vput(um_rootvp); 376 /* 377 * And blow it away for future re-use 378 */ 379 vgone(um_rootvp); 380 /* 381 * Finally, throw away the union_mount structure 382 */ 383 free(mp->mnt_data, M_UNIONFSMNT); /* XXX */ 384 mp->mnt_data = 0; 385 return (0); 386 } 387 388 static int 389 union_root(mp, vpp) 390 struct mount *mp; 391 struct vnode **vpp; 392 { 393 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 394 int error; 395 396 /* 397 * Supply an unlocked reference to um_uppervp and to um_lowervp. It 398 * is possible for um_uppervp to be locked without the associated 399 * root union_node being locked. We let union_allocvp() deal with 400 * it. 401 */ 402 UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, 403 VOP_ISLOCKED(um->um_uppervp, NULL))); 404 405 VREF(um->um_uppervp); 406 if (um->um_lowervp) 407 VREF(um->um_lowervp); 408 409 error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, 410 um->um_uppervp, um->um_lowervp, 1); 411 UDEBUG(("error %d\n", error)); 412 UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, 413 VOP_ISLOCKED(um->um_uppervp, NULL))); 414 415 return (error); 416 } 417 418 static int 419 union_statfs(mp, sbp, p) 420 struct mount *mp; 421 struct statfs *sbp; 422 struct proc *p; 423 { 424 int error; 425 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 426 struct statfs mstat; 427 int lbsize; 428 429 UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", 430 (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); 431 432 bzero(&mstat, sizeof(mstat)); 433 434 if (um->um_lowervp) { 435 error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p); 436 if (error) 437 return (error); 438 } 439 440 /* now copy across the "interesting" information and fake the rest */ 441 #if 0 442 sbp->f_type = mstat.f_type; 443 sbp->f_flags = mstat.f_flags; 444 sbp->f_bsize = mstat.f_bsize; 445 sbp->f_iosize = mstat.f_iosize; 446 #endif 447 lbsize = mstat.f_bsize; 448 sbp->f_blocks = mstat.f_blocks; 449 sbp->f_bfree = mstat.f_bfree; 450 sbp->f_bavail = mstat.f_bavail; 451 sbp->f_files = mstat.f_files; 452 sbp->f_ffree = mstat.f_ffree; 453 454 error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p); 455 if (error) 456 return (error); 457 458 sbp->f_flags = mstat.f_flags; 459 sbp->f_bsize = mstat.f_bsize; 460 sbp->f_iosize = mstat.f_iosize; 461 462 /* 463 * if the lower and upper blocksizes differ, then frig the 464 * block counts so that the sizes reported by df make some 465 * kind of sense. none of this makes sense though. 466 */ 467 468 if (mstat.f_bsize != lbsize) 469 sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize; 470 471 /* 472 * The "total" fields count total resources in all layers, 473 * the "free" fields count only those resources which are 474 * free in the upper layer (since only the upper layer 475 * is writeable). 476 */ 477 sbp->f_blocks += mstat.f_blocks; 478 sbp->f_bfree = mstat.f_bfree; 479 sbp->f_bavail = mstat.f_bavail; 480 sbp->f_files += mstat.f_files; 481 sbp->f_ffree = mstat.f_ffree; 482 483 if (sbp != &mp->mnt_stat) { 484 sbp->f_type = mp->mnt_vfc->vfc_typenum; 485 bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); 486 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); 487 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 488 } 489 return (0); 490 } 491 492 static struct vfsops union_vfsops = { 493 union_mount, 494 vfs_stdstart, /* underlying start already done */ 495 union_unmount, 496 union_root, 497 vfs_stdquotactl, 498 union_statfs, 499 vfs_stdsync, /* XXX assumes no cached data on union level */ 500 vfs_stdvget, 501 vfs_stdfhtovp, 502 vfs_stdcheckexp, 503 vfs_stdvptofh, 504 union_init, 505 vfs_stduninit, 506 vfs_stdextattrctl, 507 }; 508 509 VFS_SET(union_vfsops, union, VFCF_LOOPBACK); 510