1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/uio.h> 33 #include <sys/pathname.h> 34 #include <sys/kmem.h> 35 #include <sys/cred.h> 36 #include <sys/statvfs.h> 37 #include <sys/fs/lofs_info.h> 38 #include <sys/fs/lofs_node.h> 39 #include <sys/mount.h> 40 #include <sys/mntent.h> 41 #include <sys/mkdev.h> 42 #include <sys/priv.h> 43 #include <sys/sysmacros.h> 44 #include <sys/systm.h> 45 #include <sys/cmn_err.h> 46 #include <sys/policy.h> 47 #include <sys/tsol/label.h> 48 #include "fs/fs_subr.h" 49 50 /* 51 * This is the loadable module wrapper. 52 */ 53 #include <sys/modctl.h> 54 55 static mntopts_t lofs_mntopts; 56 57 static int lofsinit(int, char *); 58 59 static vfsdef_t vfw = { 60 VFSDEF_VERSION, 61 "lofs", 62 lofsinit, 63 VSW_HASPROTO|VSW_STATS, 64 &lofs_mntopts 65 }; 66 67 /* 68 * LOFS mount options table 69 */ 70 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 71 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 72 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 73 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 74 75 static mntopt_t mntopts[] = { 76 /* 77 * option name cancel option default arg flags 78 * private data 79 */ 80 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 81 (void *)0 }, 82 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 83 (void *)0 }, 84 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 85 (void *)0 }, 86 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 87 (void *)0 }, 88 }; 89 90 static mntopts_t lofs_mntopts = { 91 sizeof (mntopts) / sizeof (mntopt_t), 92 mntopts 93 }; 94 95 /* 96 * Module linkage information for the kernel. 97 */ 98 99 static struct modlfs modlfs = { 100 &mod_fsops, "filesystem for lofs", &vfw 101 }; 102 103 static struct modlinkage modlinkage = { 104 MODREV_1, (void *)&modlfs, NULL 105 }; 106 107 /* 108 * This is the module initialization routine. 109 */ 110 111 int 112 _init(void) 113 { 114 int status; 115 116 lofs_subrinit(); 117 status = mod_install(&modlinkage); 118 if (status != 0) { 119 /* 120 * Cleanup previously initialized work. 121 */ 122 lofs_subrfini(); 123 } 124 125 return (status); 126 } 127 128 /* 129 * Don't allow the lofs module to be unloaded for now. 130 * There is a memory leak if it gets unloaded. 131 */ 132 133 int 134 _fini(void) 135 { 136 return (EBUSY); 137 } 138 139 int 140 _info(struct modinfo *modinfop) 141 { 142 return (mod_info(&modlinkage, modinfop)); 143 } 144 145 146 static int lofsfstype; 147 vfsops_t *lo_vfsops; 148 149 /* 150 * lo mount vfsop 151 * Set up mount info record and attach it to vfs struct. 152 */ 153 /*ARGSUSED*/ 154 static int 155 lo_mount(struct vfs *vfsp, 156 struct vnode *vp, 157 struct mounta *uap, 158 struct cred *cr) 159 { 160 int error; 161 struct vnode *srootvp = NULL; /* the server's root */ 162 struct vnode *realrootvp; 163 struct loinfo *li; 164 int nodev; 165 166 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 167 168 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 169 return (EPERM); 170 171 /* 172 * Loopback devices which get "nodevices" added can be done without 173 * "nodevices" set because we cannot import devices into a zone 174 * with loopback. Note that we have all zone privileges when 175 * this happens; if not, we'd have gotten "nosuid". 176 */ 177 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 178 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 179 180 mutex_enter(&vp->v_lock); 181 if (!(uap->flags & MS_OVERLAY) && 182 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 183 mutex_exit(&vp->v_lock); 184 return (EBUSY); 185 } 186 mutex_exit(&vp->v_lock); 187 188 /* 189 * Find real root, and make vfs point to real vfs 190 */ 191 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 192 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, 193 &realrootvp)) 194 return (error); 195 196 /* 197 * Enforce MAC policy if needed. 198 * 199 * Loopback mounts must not allow writing up. The dominance test 200 * is intended to prevent a global zone caller from accidentally 201 * creating write-up conditions between two labeled zones. 202 * Local zones can't violate MAC on their own without help from 203 * the global zone because they can't name a pathname that 204 * they don't already have. 205 * 206 * The special case check for the NET_MAC_AWARE process flag is 207 * to support the case of the automounter in the global zone. We 208 * permit automounting of local zone directories such as home 209 * directories, into the global zone as required by setlabel, 210 * zonecopy, and saving of desktop sessions. Such mounts are 211 * trusted not to expose the contents of one zone's directories 212 * to another by leaking them through the global zone. 213 */ 214 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 215 char specname[MAXPATHLEN]; 216 zone_t *from_zptr; 217 zone_t *to_zptr; 218 219 if (vnodetopath(NULL, realrootvp, specname, 220 sizeof (specname), CRED()) != 0) 221 return (EACCES); 222 223 from_zptr = zone_find_by_path(specname); 224 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 225 226 /* 227 * Special case for zone devfs: the zone for /dev will 228 * incorrectly appear as the global zone since it's not 229 * under the zone rootpath. So for zone devfs check allow 230 * read-write mounts. 231 * 232 * Second special case for scratch zones used for Live Upgrade: 233 * this is used to mount the zone's root from /root to /a in 234 * the scratch zone. As with the other special case, this 235 * appears to be outside of the zone because it's not under 236 * the zone rootpath, which is $ZONEPATH/lu in the scratch 237 * zone case. 238 */ 239 240 if (from_zptr != to_zptr && 241 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 242 /* 243 * We know at this point that the labels aren't equal 244 * because the zone pointers aren't equal, and zones 245 * can't share a label. 246 * 247 * If the source is the global zone then making 248 * it available to a local zone must be done in 249 * read-only mode as the label will become admin_low. 250 * 251 * If it is a mount between local zones then if 252 * the current process is in the global zone and has 253 * the NET_MAC_AWARE flag, then regular read-write 254 * access is allowed. If it's in some other zone, but 255 * the label on the mount point dominates the original 256 * source, then allow the mount as read-only 257 * ("read-down"). 258 */ 259 if (from_zptr->zone_id == GLOBAL_ZONEID) { 260 /* make the mount read-only */ 261 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 262 } else { /* cross-zone mount */ 263 if (to_zptr->zone_id == GLOBAL_ZONEID && 264 /* LINTED: no consequent */ 265 getpflags(NET_MAC_AWARE, cr) != 0) { 266 /* Allow the mount as read-write */ 267 } else if (bldominates( 268 label2bslabel(to_zptr->zone_slabel), 269 label2bslabel(from_zptr->zone_slabel))) { 270 /* make the mount read-only */ 271 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 272 } else { 273 zone_rele(to_zptr); 274 zone_rele(from_zptr); 275 return (EACCES); 276 } 277 } 278 } 279 zone_rele(to_zptr); 280 zone_rele(from_zptr); 281 } 282 283 /* 284 * realrootvp may be an AUTOFS node, in which case we 285 * perform a VOP_ACCESS() to trigger the mount of the 286 * intended filesystem, so we loopback mount the intended 287 * filesystem instead of the AUTOFS filesystem. 288 */ 289 (void) VOP_ACCESS(realrootvp, 0, 0, cr); 290 291 /* 292 * We're interested in the top most filesystem. 293 * This is specially important when uap->spec is a trigger 294 * AUTOFS node, since we're really interested in mounting the 295 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 296 * call not the AUTOFS node itself. 297 */ 298 if (vn_mountedvfs(realrootvp) != NULL) { 299 if (error = traverse(&realrootvp)) { 300 VN_RELE(realrootvp); 301 return (error); 302 } 303 } 304 305 /* 306 * Allocate a vfs info struct and attach it 307 */ 308 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 309 li->li_realvfs = realrootvp->v_vfsp; 310 li->li_mountvfs = vfsp; 311 312 /* 313 * Set mount flags to be inherited by loopback vfs's 314 */ 315 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 316 li->li_mflag |= VFS_RDONLY; 317 } 318 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 319 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 320 } 321 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 322 li->li_mflag |= VFS_NODEVICES; 323 } 324 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 325 li->li_mflag |= VFS_NOSETUID; 326 } 327 /* 328 * Permissive flags are added to the "deny" bitmap. 329 */ 330 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 331 li->li_dflag |= VFS_XATTR; 332 } 333 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 334 li->li_dflag |= VFS_NBMAND; 335 } 336 337 /* 338 * Propagate inheritable mount flags from the real vfs. 339 */ 340 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 341 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 342 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 343 VFS_NODISPLAY); 344 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 345 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 346 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 347 VFS_NODISPLAY); 348 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 349 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 350 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 351 VFS_NODISPLAY); 352 /* 353 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 354 * such as VFS_RDONLY, are handled differently. An explicit 355 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 356 */ 357 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 358 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 359 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 360 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 361 VFS_NODISPLAY); 362 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 363 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 364 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 365 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 366 VFS_NODISPLAY); 367 368 li->li_refct = 0; 369 vfsp->vfs_data = (caddr_t)li; 370 vfsp->vfs_bcount = 0; 371 vfsp->vfs_fstype = lofsfstype; 372 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 373 374 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 375 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 376 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 377 378 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 379 li->li_flag |= LO_NOSUB; 380 } 381 382 /* 383 * Setup the hashtable. If the root of this mount isn't a directory, 384 * there's no point in allocating a large hashtable. A table with one 385 * bucket is sufficient. 386 */ 387 if (realrootvp->v_type != VDIR) 388 lsetup(li, 1); 389 else 390 lsetup(li, 0); 391 392 /* 393 * Make the root vnode 394 */ 395 srootvp = makelonode(realrootvp, li, 0); 396 srootvp->v_flag |= VROOT; 397 li->li_rootvp = srootvp; 398 399 #ifdef LODEBUG 400 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 401 vfsp, li->li_realvfs, srootvp, realrootvp, li); 402 #endif 403 return (0); 404 } 405 406 /* 407 * Undo loopback mount 408 */ 409 static int 410 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 411 { 412 struct loinfo *li; 413 414 if (secpolicy_fs_unmount(cr, vfsp) != 0) 415 return (EPERM); 416 417 /* 418 * Forced unmount is not supported by this file system 419 * and thus, ENOTSUP, is being returned. 420 */ 421 if (flag & MS_FORCE) 422 return (ENOTSUP); 423 424 li = vtoli(vfsp); 425 #ifdef LODEBUG 426 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 427 #endif 428 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 429 #ifdef LODEBUG 430 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 431 li->li_rootvp->v_count); 432 #endif 433 return (EBUSY); 434 } 435 VN_RELE(li->li_rootvp); 436 return (0); 437 } 438 439 /* 440 * Find root of lofs mount. 441 */ 442 static int 443 lo_root(struct vfs *vfsp, struct vnode **vpp) 444 { 445 *vpp = vtoli(vfsp)->li_rootvp; 446 #ifdef LODEBUG 447 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 448 #endif 449 /* 450 * If the root of the filesystem is a special file, return the specvp 451 * version of the vnode. We don't save the specvp vnode in our 452 * hashtable since that's exclusively for lnodes. 453 */ 454 if (IS_DEVVP(*vpp)) { 455 struct vnode *svp; 456 457 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 458 if (svp == NULL) 459 return (ENOSYS); 460 *vpp = svp; 461 } else { 462 VN_HOLD(*vpp); 463 } 464 465 return (0); 466 } 467 468 /* 469 * Get file system statistics. 470 */ 471 static int 472 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 473 { 474 vnode_t *realrootvp; 475 476 #ifdef LODEBUG 477 lo_dprint(4, "lostatvfs %p\n", vfsp); 478 #endif 479 /* 480 * Using realrootvp->v_vfsp (instead of the realvfsp that was 481 * cached) is necessary to make lofs work woth forced UFS unmounts. 482 * In the case of a forced unmount, UFS stores a set of dummy vfsops 483 * in all the (i)vnodes in the filesystem. The dummy ops simply 484 * returns back EIO. 485 */ 486 (void) lo_realvfs(vfsp, &realrootvp); 487 if (realrootvp != NULL) 488 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 489 else 490 return (EIO); 491 } 492 493 /* 494 * LOFS doesn't have any data or metadata to flush, pending I/O on the 495 * underlying filesystem will be flushed when such filesystem is synched. 496 */ 497 /* ARGSUSED */ 498 static int 499 lo_sync(struct vfs *vfsp, 500 short flag, 501 struct cred *cr) 502 { 503 #ifdef LODEBUG 504 lo_dprint(4, "lo_sync: %p\n", vfsp); 505 #endif 506 return (0); 507 } 508 509 /* 510 * Obtain the vnode from the underlying filesystem. 511 */ 512 static int 513 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 514 { 515 vnode_t *realrootvp; 516 517 #ifdef LODEBUG 518 lo_dprint(4, "lo_vget: %p\n", vfsp); 519 #endif 520 (void) lo_realvfs(vfsp, &realrootvp); 521 if (realrootvp != NULL) 522 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 523 else 524 return (EIO); 525 } 526 527 /* 528 * Free mount-specific data. 529 */ 530 static void 531 lo_freevfs(struct vfs *vfsp) 532 { 533 struct loinfo *li = vtoli(vfsp); 534 535 ldestroy(li); 536 kmem_free(li, sizeof (struct loinfo)); 537 } 538 539 static int 540 lofsinit(int fstyp, char *name) 541 { 542 static const fs_operation_def_t lo_vfsops_template[] = { 543 VFSNAME_MOUNT, lo_mount, 544 VFSNAME_UNMOUNT, lo_unmount, 545 VFSNAME_ROOT, lo_root, 546 VFSNAME_STATVFS, lo_statvfs, 547 VFSNAME_SYNC, (fs_generic_func_p) lo_sync, 548 VFSNAME_VGET, lo_vget, 549 VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs, 550 NULL, NULL 551 }; 552 int error; 553 554 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 555 if (error != 0) { 556 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 557 return (error); 558 } 559 560 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 561 if (error != 0) { 562 (void) vfs_freevfsops_by_type(fstyp); 563 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 564 return (error); 565 } 566 567 lofsfstype = fstyp; 568 569 return (0); 570 } 571