1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/uio.h> 33 #include <sys/pathname.h> 34 #include <sys/kmem.h> 35 #include <sys/cred.h> 36 #include <sys/statvfs.h> 37 #include <sys/fs/lofs_info.h> 38 #include <sys/fs/lofs_node.h> 39 #include <sys/mount.h> 40 #include <sys/mntent.h> 41 #include <sys/mkdev.h> 42 #include <sys/sysmacros.h> 43 #include <sys/systm.h> 44 #include <sys/cmn_err.h> 45 #include <sys/policy.h> 46 #include "fs/fs_subr.h" 47 48 /* 49 * This is the loadable module wrapper. 50 */ 51 #include <sys/modctl.h> 52 53 static mntopts_t lofs_mntopts; 54 55 static int lofsinit(int, char *); 56 57 static vfsdef_t vfw = { 58 VFSDEF_VERSION, 59 "lofs", 60 lofsinit, 61 VSW_HASPROTO|VSW_STATS, 62 &lofs_mntopts 63 }; 64 65 /* 66 * Stuff needed to support "zonedevfs" mode. 67 */ 68 static major_t lofs_major; 69 static minor_t lofs_minor; 70 static kmutex_t lofs_minor_lock; 71 72 /* 73 * LOFS mount options table 74 */ 75 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 76 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 77 static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL }; 78 static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL }; 79 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 80 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 81 82 static mntopt_t mntopts[] = { 83 /* 84 * option name cancel option default arg flags 85 * private data 86 */ 87 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 88 (void *)0 }, 89 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 90 (void *)0 }, 91 { MNTOPT_LOFS_ZONEDEVFS, zonedevfs_cancel, NULL, 0, 92 (void *)0 }, 93 { MNTOPT_LOFS_NOZONEDEVFS, nozonedevfs_cancel, NULL, 0, 94 (void *)0 }, 95 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 96 (void *)0 }, 97 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 98 (void *)0 }, 99 }; 100 101 static mntopts_t lofs_mntopts = { 102 sizeof (mntopts) / sizeof (mntopt_t), 103 mntopts 104 }; 105 106 /* 107 * Module linkage information for the kernel. 108 */ 109 110 static struct modlfs modlfs = { 111 &mod_fsops, "filesystem for lofs", &vfw 112 }; 113 114 static struct modlinkage modlinkage = { 115 MODREV_1, (void *)&modlfs, NULL 116 }; 117 118 /* 119 * This is the module initialization routine. 120 */ 121 int 122 _init() 123 { 124 int status; 125 126 lofs_subrinit(); 127 status = mod_install(&modlinkage); 128 if (status != 0) { 129 /* 130 * Cleanup previously initialized work. 131 */ 132 lofs_subrfini(); 133 } 134 135 return (status); 136 } 137 138 /* 139 * Don't allow the lofs module to be unloaded for now. 140 * There is a memory leak if it gets unloaded. 141 */ 142 int 143 _fini() 144 { 145 return (EBUSY); 146 } 147 148 int 149 _info(struct modinfo *modinfop) 150 { 151 return (mod_info(&modlinkage, modinfop)); 152 } 153 154 155 static int lofsfstype; 156 vfsops_t *lo_vfsops; 157 158 /* 159 * lo mount vfsop 160 * Set up mount info record and attach it to vfs struct. 161 */ 162 /*ARGSUSED*/ 163 static int 164 lo_mount(struct vfs *vfsp, 165 struct vnode *vp, 166 struct mounta *uap, 167 struct cred *cr) 168 { 169 int error; 170 struct vnode *srootvp = NULL; /* the server's root */ 171 struct vnode *realrootvp; 172 struct loinfo *li; 173 int is_zonedevfs = 0; 174 int nodev; 175 176 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 177 178 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 179 return (EPERM); 180 181 /* 182 * Loopback devices which get "nodevices" added can be done without 183 * "nodevices" set because we cannot import devices into a zone 184 * with loopback. Note that we have all zone privileges when 185 * this happens; if not, we'd have gotten "nosuid". 186 */ 187 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 188 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 189 190 /* 191 * We must ensure that only the global zone applies the 'zonedevfs' 192 * option; we don't want non-global zones to be able to establish 193 * lofs mounts using the special dev_t we use to ensure that the 194 * contents of a zone's /dev cannot be victim to link(2) or rename(2). 195 * See below, where we set all of this up. 196 * 197 * Since this is more like a privilege check, we use crgetzoneid(cr) 198 * instead of getzoneid(). 199 */ 200 is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL); 201 if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs) 202 return (EPERM); 203 204 mutex_enter(&vp->v_lock); 205 if (!(uap->flags & MS_OVERLAY) && 206 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 207 mutex_exit(&vp->v_lock); 208 return (EBUSY); 209 } 210 mutex_exit(&vp->v_lock); 211 212 /* 213 * Find real root, and make vfs point to real vfs 214 */ 215 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 216 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, 217 &realrootvp)) 218 return (error); 219 220 /* 221 * realrootvp may be an AUTOFS node, in which case we 222 * perform a VOP_ACCESS() to trigger the mount of the 223 * intended filesystem, so we loopback mount the intended 224 * filesystem instead of the AUTOFS filesystem. 225 */ 226 (void) VOP_ACCESS(realrootvp, 0, 0, cr); 227 228 /* 229 * We're interested in the top most filesystem. 230 * This is specially important when uap->spec is a trigger 231 * AUTOFS node, since we're really interested in mounting the 232 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 233 * call not the AUTOFS node itself. 234 */ 235 if (vn_mountedvfs(realrootvp) != NULL) { 236 if (error = traverse(&realrootvp)) { 237 VN_RELE(realrootvp); 238 return (error); 239 } 240 } 241 242 /* 243 * Allocate a vfs info struct and attach it 244 */ 245 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 246 li->li_realvfs = realrootvp->v_vfsp; 247 li->li_mountvfs = vfsp; 248 249 /* 250 * Set mount flags to be inherited by loopback vfs's 251 */ 252 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 253 li->li_mflag |= VFS_RDONLY; 254 } 255 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 256 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 257 } 258 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 259 li->li_mflag |= VFS_NODEVICES; 260 } 261 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 262 li->li_mflag |= VFS_NOSETUID; 263 } 264 /* 265 * Permissive flags are added to the "deny" bitmap. 266 */ 267 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 268 li->li_dflag |= VFS_XATTR; 269 } 270 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 271 li->li_dflag |= VFS_NBMAND; 272 } 273 274 /* 275 * Propagate inheritable mount flags from the real vfs. 276 */ 277 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 278 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 279 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 280 VFS_NODISPLAY); 281 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 282 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 283 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 284 VFS_NODISPLAY); 285 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 286 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 287 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 288 VFS_NODISPLAY); 289 /* 290 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 291 * such as VFS_RDONLY, are handled differently. An explicit 292 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 293 */ 294 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 295 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 296 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 297 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 298 VFS_NODISPLAY); 299 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 300 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 301 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 302 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 303 VFS_NODISPLAY); 304 305 li->li_refct = 0; 306 vfsp->vfs_data = (caddr_t)li; 307 vfsp->vfs_bcount = 0; 308 vfsp->vfs_fstype = lofsfstype; 309 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 310 311 /* 312 * Test to see if we need to be in "zone /dev" mode. In zonedevfs 313 * mode, we pull a nasty trick; we make sure that the lofs dev_t does 314 * *not* reflect the underlying device, so that no renames or links 315 * can occur to or from the /dev hierarchy. 316 */ 317 if (is_zonedevfs) { 318 dev_t dev; 319 320 mutex_enter(&lofs_minor_lock); 321 do { 322 lofs_minor = (lofs_minor + 1) & MAXMIN32; 323 dev = makedevice(lofs_major, lofs_minor); 324 } while (vfs_devismounted(dev)); 325 mutex_exit(&lofs_minor_lock); 326 327 vfsp->vfs_dev = dev; 328 vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype); 329 330 li->li_flag |= LO_ZONEDEVFS; 331 } else { 332 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 333 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 334 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 335 } 336 337 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 338 li->li_flag |= LO_NOSUB; 339 } 340 341 /* 342 * Setup the hashtable. If the root of this mount isn't a directory, 343 * there's no point in allocating a large hashtable. A table with one 344 * bucket is sufficient. 345 */ 346 if (realrootvp->v_type != VDIR) 347 lsetup(li, 1); 348 else 349 lsetup(li, 0); 350 351 /* 352 * Make the root vnode 353 */ 354 srootvp = makelonode(realrootvp, li, 0); 355 srootvp->v_flag |= VROOT; 356 li->li_rootvp = srootvp; 357 358 #ifdef LODEBUG 359 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 360 vfsp, li->li_realvfs, srootvp, realrootvp, li); 361 #endif 362 return (0); 363 } 364 365 /* 366 * Undo loopback mount 367 */ 368 static int 369 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 370 { 371 struct loinfo *li; 372 373 if (secpolicy_fs_unmount(cr, vfsp) != 0) 374 return (EPERM); 375 376 /* 377 * Forced unmount is not supported by this file system 378 * and thus, ENOTSUP, is being returned. 379 */ 380 if (flag & MS_FORCE) 381 return (ENOTSUP); 382 383 li = vtoli(vfsp); 384 #ifdef LODEBUG 385 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 386 #endif 387 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 388 #ifdef LODEBUG 389 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 390 li->li_rootvp->v_count); 391 #endif 392 return (EBUSY); 393 } 394 VN_RELE(li->li_rootvp); 395 return (0); 396 } 397 398 /* 399 * Find root of lofs mount. 400 */ 401 static int 402 lo_root(struct vfs *vfsp, struct vnode **vpp) 403 { 404 *vpp = vtoli(vfsp)->li_rootvp; 405 #ifdef LODEBUG 406 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 407 #endif 408 /* 409 * If the root of the filesystem is a special file, return the specvp 410 * version of the vnode. We don't save the specvp vnode in our 411 * hashtable since that's exclusively for lnodes. 412 */ 413 if (IS_DEVVP(*vpp)) { 414 struct vnode *svp; 415 416 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 417 if (svp == NULL) 418 return (ENOSYS); 419 *vpp = svp; 420 } else { 421 VN_HOLD(*vpp); 422 } 423 424 return (0); 425 } 426 427 /* 428 * Get file system statistics. 429 */ 430 static int 431 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 432 { 433 vnode_t *realrootvp; 434 435 #ifdef LODEBUG 436 lo_dprint(4, "lostatvfs %p\n", vfsp); 437 #endif 438 /* 439 * Using realrootvp->v_vfsp (instead of the realvfsp that was 440 * cached) is necessary to make lofs work woth forced UFS unmounts. 441 * In the case of a forced unmount, UFS stores a set of dummy vfsops 442 * in all the (i)vnodes in the filesystem. The dummy ops simply 443 * returns back EIO. 444 */ 445 (void) lo_realvfs(vfsp, &realrootvp); 446 if (realrootvp != NULL) 447 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 448 else 449 return (EIO); 450 } 451 452 /* 453 * LOFS doesn't have any data or metadata to flush, pending I/O on the 454 * underlying filesystem will be flushed when such filesystem is synched. 455 */ 456 /* ARGSUSED */ 457 static int 458 lo_sync(struct vfs *vfsp, 459 short flag, 460 struct cred *cr) 461 { 462 #ifdef LODEBUG 463 lo_dprint(4, "lo_sync: %p\n", vfsp); 464 #endif 465 return (0); 466 } 467 468 /* 469 * Obtain the vnode from the underlying filesystem. 470 */ 471 static int 472 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 473 { 474 vnode_t *realrootvp; 475 476 #ifdef LODEBUG 477 lo_dprint(4, "lo_vget: %p\n", vfsp); 478 #endif 479 (void) lo_realvfs(vfsp, &realrootvp); 480 if (realrootvp != NULL) 481 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 482 else 483 return (EIO); 484 } 485 486 /* 487 * Free mount-specific data. 488 */ 489 static void 490 lo_freevfs(struct vfs *vfsp) 491 { 492 struct loinfo *li = vtoli(vfsp); 493 494 ldestroy(li); 495 kmem_free(li, sizeof (struct loinfo)); 496 } 497 498 static int 499 lofsinit(int fstyp, char *name) 500 { 501 static const fs_operation_def_t lo_vfsops_template[] = { 502 VFSNAME_MOUNT, lo_mount, 503 VFSNAME_UNMOUNT, lo_unmount, 504 VFSNAME_ROOT, lo_root, 505 VFSNAME_STATVFS, lo_statvfs, 506 VFSNAME_SYNC, (fs_generic_func_p) lo_sync, 507 VFSNAME_VGET, lo_vget, 508 VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs, 509 NULL, NULL 510 }; 511 int error; 512 513 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 514 if (error != 0) { 515 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 516 return (error); 517 } 518 519 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 520 if (error != 0) { 521 (void) vfs_freevfsops_by_type(fstyp); 522 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 523 return (error); 524 } 525 526 lofsfstype = fstyp; 527 528 if ((lofs_major = getudev()) == (major_t)-1) { 529 (void) vfs_freevfsops_by_type(fstyp); 530 cmn_err(CE_WARN, "lofsinit: Can't get unique device number."); 531 return (ENXIO); 532 } 533 534 lofs_minor = 0; 535 mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL); 536 537 return (0); 538 } 539