1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/vfs.h> 31 #include <sys/vfs_opreg.h> 32 #include <sys/vnode.h> 33 #include <sys/uio.h> 34 #include <sys/pathname.h> 35 #include <sys/kmem.h> 36 #include <sys/cred.h> 37 #include <sys/statvfs.h> 38 #include <sys/fs/lofs_info.h> 39 #include <sys/fs/lofs_node.h> 40 #include <sys/mount.h> 41 #include <sys/mntent.h> 42 #include <sys/mkdev.h> 43 #include <sys/priv.h> 44 #include <sys/sysmacros.h> 45 #include <sys/systm.h> 46 #include <sys/cmn_err.h> 47 #include <sys/policy.h> 48 #include <sys/tsol/label.h> 49 #include "fs/fs_subr.h" 50 51 /* 52 * This is the loadable module wrapper. 53 */ 54 #include <sys/modctl.h> 55 56 static mntopts_t lofs_mntopts; 57 58 static int lofsinit(int, char *); 59 60 static vfsdef_t vfw = { 61 VFSDEF_VERSION, 62 "lofs", 63 lofsinit, 64 VSW_HASPROTO|VSW_STATS, 65 &lofs_mntopts 66 }; 67 68 /* 69 * LOFS mount options table 70 */ 71 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 72 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 73 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 74 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 75 76 static mntopt_t mntopts[] = { 77 /* 78 * option name cancel option default arg flags 79 * private data 80 */ 81 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 82 (void *)0 }, 83 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 84 (void *)0 }, 85 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 86 (void *)0 }, 87 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 88 (void *)0 }, 89 }; 90 91 static mntopts_t lofs_mntopts = { 92 sizeof (mntopts) / sizeof (mntopt_t), 93 mntopts 94 }; 95 96 /* 97 * Module linkage information for the kernel. 98 */ 99 100 static struct modlfs modlfs = { 101 &mod_fsops, "filesystem for lofs", &vfw 102 }; 103 104 static struct modlinkage modlinkage = { 105 MODREV_1, (void *)&modlfs, NULL 106 }; 107 108 /* 109 * This is the module initialization routine. 110 */ 111 112 int 113 _init(void) 114 { 115 int status; 116 117 lofs_subrinit(); 118 status = mod_install(&modlinkage); 119 if (status != 0) { 120 /* 121 * Cleanup previously initialized work. 122 */ 123 lofs_subrfini(); 124 } 125 126 return (status); 127 } 128 129 /* 130 * Don't allow the lofs module to be unloaded for now. 131 * There is a memory leak if it gets unloaded. 132 */ 133 134 int 135 _fini(void) 136 { 137 return (EBUSY); 138 } 139 140 int 141 _info(struct modinfo *modinfop) 142 { 143 return (mod_info(&modlinkage, modinfop)); 144 } 145 146 147 static int lofsfstype; 148 vfsops_t *lo_vfsops; 149 150 /* 151 * lo mount vfsop 152 * Set up mount info record and attach it to vfs struct. 153 */ 154 /*ARGSUSED*/ 155 static int 156 lo_mount(struct vfs *vfsp, 157 struct vnode *vp, 158 struct mounta *uap, 159 struct cred *cr) 160 { 161 int error; 162 struct vnode *srootvp = NULL; /* the server's root */ 163 struct vnode *realrootvp; 164 struct loinfo *li; 165 int nodev; 166 167 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 168 169 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 170 return (EPERM); 171 172 /* 173 * Loopback devices which get "nodevices" added can be done without 174 * "nodevices" set because we cannot import devices into a zone 175 * with loopback. Note that we have all zone privileges when 176 * this happens; if not, we'd have gotten "nosuid". 177 */ 178 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 179 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 180 181 mutex_enter(&vp->v_lock); 182 if (!(uap->flags & MS_OVERLAY) && 183 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 184 mutex_exit(&vp->v_lock); 185 return (EBUSY); 186 } 187 mutex_exit(&vp->v_lock); 188 189 /* 190 * Find real root, and make vfs point to real vfs 191 */ 192 193 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 194 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) 195 return (error); 196 197 /* 198 * Enforce MAC policy if needed. 199 * 200 * Loopback mounts must not allow writing up. The dominance test 201 * is intended to prevent a global zone caller from accidentally 202 * creating write-up conditions between two labeled zones. 203 * Local zones can't violate MAC on their own without help from 204 * the global zone because they can't name a pathname that 205 * they don't already have. 206 * 207 * The special case check for the NET_MAC_AWARE process flag is 208 * to support the case of the automounter in the global zone. We 209 * permit automounting of local zone directories such as home 210 * directories, into the global zone as required by setlabel, 211 * zonecopy, and saving of desktop sessions. Such mounts are 212 * trusted not to expose the contents of one zone's directories 213 * to another by leaking them through the global zone. 214 */ 215 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 216 char specname[MAXPATHLEN]; 217 zone_t *from_zptr; 218 zone_t *to_zptr; 219 220 if (vnodetopath(NULL, realrootvp, specname, 221 sizeof (specname), CRED()) != 0) { 222 VN_RELE(realrootvp); 223 return (EACCES); 224 } 225 226 from_zptr = zone_find_by_path(specname); 227 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 228 229 /* 230 * Special case for zone devfs: the zone for /dev will 231 * incorrectly appear as the global zone since it's not 232 * under the zone rootpath. So for zone devfs check allow 233 * read-write mounts. 234 * 235 * Second special case for scratch zones used for Live Upgrade: 236 * this is used to mount the zone's root from /root to /a in 237 * the scratch zone. As with the other special case, this 238 * appears to be outside of the zone because it's not under 239 * the zone rootpath, which is $ZONEPATH/lu in the scratch 240 * zone case. 241 */ 242 243 if (from_zptr != to_zptr && 244 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 245 /* 246 * We know at this point that the labels aren't equal 247 * because the zone pointers aren't equal, and zones 248 * can't share a label. 249 * 250 * If the source is the global zone then making 251 * it available to a local zone must be done in 252 * read-only mode as the label will become admin_low. 253 * 254 * If it is a mount between local zones then if 255 * the current process is in the global zone and has 256 * the NET_MAC_AWARE flag, then regular read-write 257 * access is allowed. If it's in some other zone, but 258 * the label on the mount point dominates the original 259 * source, then allow the mount as read-only 260 * ("read-down"). 261 */ 262 if (from_zptr->zone_id == GLOBAL_ZONEID) { 263 /* make the mount read-only */ 264 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 265 } else { /* cross-zone mount */ 266 if (to_zptr->zone_id == GLOBAL_ZONEID && 267 /* LINTED: no consequent */ 268 getpflags(NET_MAC_AWARE, cr) != 0) { 269 /* Allow the mount as read-write */ 270 } else if (bldominates( 271 label2bslabel(to_zptr->zone_slabel), 272 label2bslabel(from_zptr->zone_slabel))) { 273 /* make the mount read-only */ 274 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 275 } else { 276 VN_RELE(realrootvp); 277 zone_rele(to_zptr); 278 zone_rele(from_zptr); 279 return (EACCES); 280 } 281 } 282 } 283 zone_rele(to_zptr); 284 zone_rele(from_zptr); 285 } 286 287 /* 288 * realrootvp may be an AUTOFS node, in which case we 289 * perform a VOP_ACCESS() to trigger the mount of the 290 * intended filesystem, so we loopback mount the intended 291 * filesystem instead of the AUTOFS filesystem. 292 */ 293 (void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL); 294 295 /* 296 * We're interested in the top most filesystem. 297 * This is specially important when uap->spec is a trigger 298 * AUTOFS node, since we're really interested in mounting the 299 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 300 * call not the AUTOFS node itself. 301 */ 302 if (vn_mountedvfs(realrootvp) != NULL) { 303 if (error = traverse(&realrootvp)) { 304 VN_RELE(realrootvp); 305 return (error); 306 } 307 } 308 309 /* 310 * Allocate a vfs info struct and attach it 311 */ 312 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 313 li->li_realvfs = realrootvp->v_vfsp; 314 li->li_mountvfs = vfsp; 315 316 /* 317 * Set mount flags to be inherited by loopback vfs's 318 */ 319 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 320 li->li_mflag |= VFS_RDONLY; 321 } 322 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 323 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 324 } 325 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 326 li->li_mflag |= VFS_NODEVICES; 327 } 328 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 329 li->li_mflag |= VFS_NOSETUID; 330 } 331 /* 332 * Permissive flags are added to the "deny" bitmap. 333 */ 334 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 335 li->li_dflag |= VFS_XATTR; 336 } 337 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 338 li->li_dflag |= VFS_NBMAND; 339 } 340 341 /* 342 * Propagate inheritable mount flags from the real vfs. 343 */ 344 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 345 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 346 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 347 VFS_NODISPLAY); 348 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 349 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 350 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 351 VFS_NODISPLAY); 352 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 353 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 354 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 355 VFS_NODISPLAY); 356 /* 357 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 358 * such as VFS_RDONLY, are handled differently. An explicit 359 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 360 */ 361 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 362 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 363 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 364 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 365 VFS_NODISPLAY); 366 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 367 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 368 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 369 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 370 VFS_NODISPLAY); 371 372 li->li_refct = 0; 373 vfsp->vfs_data = (caddr_t)li; 374 vfsp->vfs_bcount = 0; 375 vfsp->vfs_fstype = lofsfstype; 376 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 377 378 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 379 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 380 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 381 382 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 383 li->li_flag |= LO_NOSUB; 384 } 385 386 /* 387 * Propagate any VFS features 388 */ 389 390 vfs_propagate_features(li->li_realvfs, vfsp); 391 392 /* 393 * Setup the hashtable. If the root of this mount isn't a directory, 394 * there's no point in allocating a large hashtable. A table with one 395 * bucket is sufficient. 396 */ 397 if (realrootvp->v_type != VDIR) 398 lsetup(li, 1); 399 else 400 lsetup(li, 0); 401 402 /* 403 * Make the root vnode 404 */ 405 srootvp = makelonode(realrootvp, li, 0); 406 srootvp->v_flag |= VROOT; 407 li->li_rootvp = srootvp; 408 409 #ifdef LODEBUG 410 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 411 vfsp, li->li_realvfs, srootvp, realrootvp, li); 412 #endif 413 return (0); 414 } 415 416 /* 417 * Undo loopback mount 418 */ 419 static int 420 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 421 { 422 struct loinfo *li; 423 424 if (secpolicy_fs_unmount(cr, vfsp) != 0) 425 return (EPERM); 426 427 /* 428 * Forced unmount is not supported by this file system 429 * and thus, ENOTSUP, is being returned. 430 */ 431 if (flag & MS_FORCE) 432 return (ENOTSUP); 433 434 li = vtoli(vfsp); 435 #ifdef LODEBUG 436 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 437 #endif 438 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 439 #ifdef LODEBUG 440 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 441 li->li_rootvp->v_count); 442 #endif 443 return (EBUSY); 444 } 445 VN_RELE(li->li_rootvp); 446 return (0); 447 } 448 449 /* 450 * Find root of lofs mount. 451 */ 452 static int 453 lo_root(struct vfs *vfsp, struct vnode **vpp) 454 { 455 *vpp = vtoli(vfsp)->li_rootvp; 456 #ifdef LODEBUG 457 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 458 #endif 459 /* 460 * If the root of the filesystem is a special file, return the specvp 461 * version of the vnode. We don't save the specvp vnode in our 462 * hashtable since that's exclusively for lnodes. 463 */ 464 if (IS_DEVVP(*vpp)) { 465 struct vnode *svp; 466 467 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 468 if (svp == NULL) 469 return (ENOSYS); 470 *vpp = svp; 471 } else { 472 VN_HOLD(*vpp); 473 } 474 475 return (0); 476 } 477 478 /* 479 * Get file system statistics. 480 */ 481 static int 482 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 483 { 484 vnode_t *realrootvp; 485 486 #ifdef LODEBUG 487 lo_dprint(4, "lostatvfs %p\n", vfsp); 488 #endif 489 /* 490 * Using realrootvp->v_vfsp (instead of the realvfsp that was 491 * cached) is necessary to make lofs work woth forced UFS unmounts. 492 * In the case of a forced unmount, UFS stores a set of dummy vfsops 493 * in all the (i)vnodes in the filesystem. The dummy ops simply 494 * returns back EIO. 495 */ 496 (void) lo_realvfs(vfsp, &realrootvp); 497 if (realrootvp != NULL) 498 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 499 else 500 return (EIO); 501 } 502 503 /* 504 * LOFS doesn't have any data or metadata to flush, pending I/O on the 505 * underlying filesystem will be flushed when such filesystem is synched. 506 */ 507 /* ARGSUSED */ 508 static int 509 lo_sync(struct vfs *vfsp, 510 short flag, 511 struct cred *cr) 512 { 513 #ifdef LODEBUG 514 lo_dprint(4, "lo_sync: %p\n", vfsp); 515 #endif 516 return (0); 517 } 518 519 /* 520 * Obtain the vnode from the underlying filesystem. 521 */ 522 static int 523 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 524 { 525 vnode_t *realrootvp; 526 527 #ifdef LODEBUG 528 lo_dprint(4, "lo_vget: %p\n", vfsp); 529 #endif 530 (void) lo_realvfs(vfsp, &realrootvp); 531 if (realrootvp != NULL) 532 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 533 else 534 return (EIO); 535 } 536 537 /* 538 * Free mount-specific data. 539 */ 540 static void 541 lo_freevfs(struct vfs *vfsp) 542 { 543 struct loinfo *li = vtoli(vfsp); 544 545 ldestroy(li); 546 kmem_free(li, sizeof (struct loinfo)); 547 } 548 549 static int 550 lofsinit(int fstyp, char *name) 551 { 552 static const fs_operation_def_t lo_vfsops_template[] = { 553 VFSNAME_MOUNT, { .vfs_mount = lo_mount }, 554 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount }, 555 VFSNAME_ROOT, { .vfs_root = lo_root }, 556 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs }, 557 VFSNAME_SYNC, { .vfs_sync = lo_sync }, 558 VFSNAME_VGET, { .vfs_vget = lo_vget }, 559 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs }, 560 NULL, NULL 561 }; 562 int error; 563 564 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 565 if (error != 0) { 566 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 567 return (error); 568 } 569 570 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 571 if (error != 0) { 572 (void) vfs_freevfsops_by_type(fstyp); 573 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 574 return (error); 575 } 576 577 lofsfstype = fstyp; 578 579 return (0); 580 } 581