1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/errno.h> 31 #include <sys/vfs.h> 32 #include <sys/vnode.h> 33 #include <sys/uio.h> 34 #include <sys/pathname.h> 35 #include <sys/kmem.h> 36 #include <sys/cred.h> 37 #include <sys/statvfs.h> 38 #include <sys/fs/lofs_info.h> 39 #include <sys/fs/lofs_node.h> 40 #include <sys/mount.h> 41 #include <sys/mntent.h> 42 #include <sys/mkdev.h> 43 #include <sys/sysmacros.h> 44 #include <sys/systm.h> 45 #include <sys/cmn_err.h> 46 #include <sys/policy.h> 47 #include "fs/fs_subr.h" 48 49 /* 50 * This is the loadable module wrapper. 51 */ 52 #include <sys/modctl.h> 53 54 static mntopts_t lofs_mntopts; 55 56 static int lofsinit(int, char *); 57 58 static vfsdef_t vfw = { 59 VFSDEF_VERSION, 60 "lofs", 61 lofsinit, 62 VSW_HASPROTO, 63 &lofs_mntopts 64 }; 65 66 /* 67 * Stuff needed to support "zonedevfs" mode. 68 */ 69 static major_t lofs_major; 70 static minor_t lofs_minor; 71 static kmutex_t lofs_minor_lock; 72 73 /* 74 * LOFS mount options table 75 */ 76 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 77 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 78 static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL }; 79 static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL }; 80 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 81 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 82 83 static mntopt_t mntopts[] = { 84 /* 85 * option name cancel option default arg flags 86 * private data 87 */ 88 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 89 (void *)0 }, 90 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 91 (void *)0 }, 92 { MNTOPT_LOFS_ZONEDEVFS, zonedevfs_cancel, NULL, 0, 93 (void *)0 }, 94 { MNTOPT_LOFS_NOZONEDEVFS, nozonedevfs_cancel, NULL, 0, 95 (void *)0 }, 96 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 97 (void *)0 }, 98 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 99 (void *)0 }, 100 }; 101 102 static mntopts_t lofs_mntopts = { 103 sizeof (mntopts) / sizeof (mntopt_t), 104 mntopts 105 }; 106 107 /* 108 * Module linkage information for the kernel. 109 */ 110 111 static struct modlfs modlfs = { 112 &mod_fsops, "filesystem for lofs", &vfw 113 }; 114 115 static struct modlinkage modlinkage = { 116 MODREV_1, (void *)&modlfs, NULL 117 }; 118 119 /* 120 * This is the module initialization routine. 121 */ 122 int 123 _init() 124 { 125 int status; 126 127 lofs_subrinit(); 128 status = mod_install(&modlinkage); 129 if (status != 0) { 130 /* 131 * Cleanup previously initialized work. 132 */ 133 lofs_subrfini(); 134 } 135 136 return (status); 137 } 138 139 /* 140 * Don't allow the lofs module to be unloaded for now. 141 * There is a memory leak if it gets unloaded. 142 */ 143 int 144 _fini() 145 { 146 return (EBUSY); 147 } 148 149 int 150 _info(struct modinfo *modinfop) 151 { 152 return (mod_info(&modlinkage, modinfop)); 153 } 154 155 156 static int lofsfstype; 157 vfsops_t *lo_vfsops; 158 159 /* 160 * lo mount vfsop 161 * Set up mount info record and attach it to vfs struct. 162 */ 163 /*ARGSUSED*/ 164 static int 165 lo_mount(struct vfs *vfsp, 166 struct vnode *vp, 167 struct mounta *uap, 168 struct cred *cr) 169 { 170 int error; 171 struct vnode *srootvp = NULL; /* the server's root */ 172 struct vnode *realrootvp; 173 struct loinfo *li; 174 int is_zonedevfs = 0; 175 int nodev; 176 177 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 178 179 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 180 return (EPERM); 181 182 /* 183 * Loopback devices which get "nodevices" added can be done without 184 * "nodevices" set because we cannot import devices into a zone 185 * with loopback. Note that we have all zone privileges when 186 * this happens; if not, we'd have gotten "nosuid". 187 */ 188 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 189 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 190 191 /* 192 * We must ensure that only the global zone applies the 'zonedevfs' 193 * option; we don't want non-global zones to be able to establish 194 * lofs mounts using the special dev_t we use to ensure that the 195 * contents of a zone's /dev cannot be victim to link(2) or rename(2). 196 * See below, where we set all of this up. 197 * 198 * Since this is more like a privilege check, we use crgetzoneid(cr) 199 * instead of getzoneid(). 200 */ 201 is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL); 202 if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs) 203 return (EPERM); 204 205 mutex_enter(&vp->v_lock); 206 if (!(uap->flags & MS_OVERLAY) && 207 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 208 mutex_exit(&vp->v_lock); 209 return (EBUSY); 210 } 211 mutex_exit(&vp->v_lock); 212 213 /* 214 * Find real root, and make vfs point to real vfs 215 */ 216 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 217 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, 218 &realrootvp)) 219 return (error); 220 221 /* 222 * realrootvp may be an AUTOFS node, in which case we 223 * perform a VOP_ACCESS() to trigger the mount of the 224 * intended filesystem, so we loopback mount the intended 225 * filesystem instead of the AUTOFS filesystem. 226 */ 227 (void) VOP_ACCESS(realrootvp, 0, 0, cr); 228 229 /* 230 * We're interested in the top most filesystem. 231 * This is specially important when uap->spec is a trigger 232 * AUTOFS node, since we're really interested in mounting the 233 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 234 * call not the AUTOFS node itself. 235 */ 236 if (vn_mountedvfs(realrootvp) != NULL) { 237 if (error = traverse(&realrootvp)) { 238 VN_RELE(realrootvp); 239 return (error); 240 } 241 } 242 243 /* 244 * Allocate a vfs info struct and attach it 245 */ 246 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 247 li->li_realvfs = realrootvp->v_vfsp; 248 li->li_mountvfs = vfsp; 249 250 /* 251 * Set mount flags to be inherited by loopback vfs's 252 */ 253 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 254 li->li_mflag |= VFS_RDONLY; 255 } 256 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 257 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 258 } 259 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 260 li->li_mflag |= VFS_NODEVICES; 261 } 262 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 263 li->li_mflag |= VFS_NOSETUID; 264 } 265 /* 266 * Permissive flags are added to the "deny" bitmap. 267 */ 268 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 269 li->li_dflag |= VFS_XATTR; 270 } 271 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 272 li->li_dflag |= VFS_NBMAND; 273 } 274 275 /* 276 * Propagate inheritable mount flags from the real vfs. 277 */ 278 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 279 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 280 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 281 VFS_NODISPLAY); 282 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 283 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 284 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 285 VFS_NODISPLAY); 286 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 287 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 288 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 289 VFS_NODISPLAY); 290 /* 291 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 292 * such as VFS_RDONLY, are handled differently. An explicit 293 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 294 */ 295 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 296 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 297 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 298 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 299 VFS_NODISPLAY); 300 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 301 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 302 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 303 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 304 VFS_NODISPLAY); 305 306 li->li_refct = 0; 307 vfsp->vfs_data = (caddr_t)li; 308 vfsp->vfs_bcount = 0; 309 vfsp->vfs_fstype = lofsfstype; 310 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 311 312 /* 313 * Test to see if we need to be in "zone /dev" mode. In zonedevfs 314 * mode, we pull a nasty trick; we make sure that the lofs dev_t does 315 * *not* reflect the underlying device, so that no renames or links 316 * can occur to or from the /dev hierarchy. 317 */ 318 if (is_zonedevfs) { 319 dev_t dev; 320 321 mutex_enter(&lofs_minor_lock); 322 do { 323 lofs_minor = (lofs_minor + 1) & MAXMIN32; 324 dev = makedevice(lofs_major, lofs_minor); 325 } while (vfs_devismounted(dev)); 326 mutex_exit(&lofs_minor_lock); 327 328 vfsp->vfs_dev = dev; 329 vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype); 330 331 li->li_flag |= LO_ZONEDEVFS; 332 } else { 333 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 334 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 335 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 336 } 337 338 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 339 li->li_flag |= LO_NOSUB; 340 } 341 342 /* 343 * Setup the hashtable. If the root of this mount isn't a directory, 344 * there's no point in allocating a large hashtable. A table with one 345 * bucket is sufficient. 346 */ 347 if (realrootvp->v_type != VDIR) 348 lsetup(li, 1); 349 else 350 lsetup(li, 0); 351 352 /* 353 * Make the root vnode 354 */ 355 srootvp = makelonode(realrootvp, li); 356 srootvp->v_flag |= VROOT; 357 li->li_rootvp = srootvp; 358 359 #ifdef LODEBUG 360 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 361 vfsp, li->li_realvfs, srootvp, realrootvp, li); 362 #endif 363 return (0); 364 } 365 366 /* 367 * Undo loopback mount 368 */ 369 static int 370 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 371 { 372 struct loinfo *li; 373 374 if (secpolicy_fs_unmount(cr, vfsp) != 0) 375 return (EPERM); 376 377 /* 378 * Forced unmount is not supported by this file system 379 * and thus, ENOTSUP, is being returned. 380 */ 381 if (flag & MS_FORCE) 382 return (ENOTSUP); 383 384 li = vtoli(vfsp); 385 #ifdef LODEBUG 386 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 387 #endif 388 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 389 #ifdef LODEBUG 390 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 391 li->li_rootvp->v_count); 392 #endif 393 return (EBUSY); 394 } 395 VN_RELE(li->li_rootvp); 396 return (0); 397 } 398 399 /* 400 * Find root of lofs mount. 401 */ 402 static int 403 lo_root(struct vfs *vfsp, struct vnode **vpp) 404 { 405 *vpp = vtoli(vfsp)->li_rootvp; 406 #ifdef LODEBUG 407 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 408 #endif 409 /* 410 * If the root of the filesystem is a special file, return the specvp 411 * version of the vnode. We don't save the specvp vnode in our 412 * hashtable since that's exclusively for lnodes. 413 */ 414 if (IS_DEVVP(*vpp)) { 415 struct vnode *svp; 416 417 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 418 if (svp == NULL) 419 return (ENOSYS); 420 *vpp = svp; 421 } else { 422 VN_HOLD(*vpp); 423 } 424 425 return (0); 426 } 427 428 /* 429 * Get file system statistics. 430 */ 431 static int 432 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 433 { 434 vnode_t *realrootvp; 435 436 #ifdef LODEBUG 437 lo_dprint(4, "lostatvfs %p\n", vfsp); 438 #endif 439 /* 440 * Using realrootvp->v_vfsp (instead of the realvfsp that was 441 * cached) is necessary to make lofs work woth forced UFS unmounts. 442 * In the case of a forced unmount, UFS stores a set of dummy vfsops 443 * in all the (i)vnodes in the filesystem. The dummy ops simply 444 * returns back EIO. 445 */ 446 (void) lo_realvfs(vfsp, &realrootvp); 447 if (realrootvp != NULL) 448 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 449 else 450 return (EIO); 451 } 452 453 /* 454 * LOFS doesn't have any data or metadata to flush, pending I/O on the 455 * underlying filesystem will be flushed when such filesystem is synched. 456 */ 457 /* ARGSUSED */ 458 static int 459 lo_sync(struct vfs *vfsp, 460 short flag, 461 struct cred *cr) 462 { 463 #ifdef LODEBUG 464 lo_dprint(4, "lo_sync: %p\n", vfsp); 465 #endif 466 return (0); 467 } 468 469 /* 470 * Obtain the vnode from the underlying filesystem. 471 */ 472 static int 473 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 474 { 475 vnode_t *realrootvp; 476 477 #ifdef LODEBUG 478 lo_dprint(4, "lo_vget: %p\n", vfsp); 479 #endif 480 (void) lo_realvfs(vfsp, &realrootvp); 481 if (realrootvp != NULL) 482 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 483 else 484 return (EIO); 485 } 486 487 /* 488 * Free mount-specific data. 489 */ 490 static void 491 lo_freevfs(struct vfs *vfsp) 492 { 493 struct loinfo *li = vtoli(vfsp); 494 495 ldestroy(li); 496 kmem_free(li, sizeof (struct loinfo)); 497 } 498 499 static int 500 lofsinit(int fstyp, char *name) 501 { 502 static const fs_operation_def_t lo_vfsops_template[] = { 503 VFSNAME_MOUNT, lo_mount, 504 VFSNAME_UNMOUNT, lo_unmount, 505 VFSNAME_ROOT, lo_root, 506 VFSNAME_STATVFS, lo_statvfs, 507 VFSNAME_SYNC, (fs_generic_func_p) lo_sync, 508 VFSNAME_VGET, lo_vget, 509 VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs, 510 NULL, NULL 511 }; 512 int error; 513 514 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 515 if (error != 0) { 516 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 517 return (error); 518 } 519 520 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 521 if (error != 0) { 522 (void) vfs_freevfsops_by_type(fstyp); 523 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 524 return (error); 525 } 526 527 lofsfstype = fstyp; 528 529 if ((lofs_major = getudev()) == (major_t)-1) { 530 (void) vfs_freevfsops_by_type(fstyp); 531 cmn_err(CE_WARN, "lofsinit: Can't get unique device number."); 532 return (ENXIO); 533 } 534 535 lofs_minor = 0; 536 mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL); 537 538 return (0); 539 } 540