1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/vfs.h> 31 #include <sys/vfs_opreg.h> 32 #include <sys/vnode.h> 33 #include <sys/uio.h> 34 #include <sys/pathname.h> 35 #include <sys/kmem.h> 36 #include <sys/cred.h> 37 #include <sys/statvfs.h> 38 #include <sys/fs/lofs_info.h> 39 #include <sys/fs/lofs_node.h> 40 #include <sys/mount.h> 41 #include <sys/mntent.h> 42 #include <sys/mkdev.h> 43 #include <sys/priv.h> 44 #include <sys/sysmacros.h> 45 #include <sys/systm.h> 46 #include <sys/cmn_err.h> 47 #include <sys/policy.h> 48 #include <sys/tsol/label.h> 49 #include "fs/fs_subr.h" 50 51 /* 52 * This is the loadable module wrapper. 53 */ 54 #include <sys/modctl.h> 55 56 static mntopts_t lofs_mntopts; 57 58 static int lofsinit(int, char *); 59 60 static vfsdef_t vfw = { 61 VFSDEF_VERSION, 62 "lofs", 63 lofsinit, 64 VSW_HASPROTO|VSW_STATS, 65 &lofs_mntopts 66 }; 67 68 /* 69 * LOFS mount options table 70 */ 71 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 72 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 73 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 74 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 75 76 static mntopt_t mntopts[] = { 77 /* 78 * option name cancel option default arg flags 79 * private data 80 */ 81 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 82 (void *)0 }, 83 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 84 (void *)0 }, 85 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 86 (void *)0 }, 87 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 88 (void *)0 }, 89 }; 90 91 static mntopts_t lofs_mntopts = { 92 sizeof (mntopts) / sizeof (mntopt_t), 93 mntopts 94 }; 95 96 /* 97 * Module linkage information for the kernel. 98 */ 99 100 static struct modlfs modlfs = { 101 &mod_fsops, "filesystem for lofs", &vfw 102 }; 103 104 static struct modlinkage modlinkage = { 105 MODREV_1, (void *)&modlfs, NULL 106 }; 107 108 /* 109 * This is the module initialization routine. 110 */ 111 112 int 113 _init(void) 114 { 115 int status; 116 117 lofs_subrinit(); 118 status = mod_install(&modlinkage); 119 if (status != 0) { 120 /* 121 * Cleanup previously initialized work. 122 */ 123 lofs_subrfini(); 124 } 125 126 return (status); 127 } 128 129 /* 130 * Don't allow the lofs module to be unloaded for now. 131 * There is a memory leak if it gets unloaded. 132 */ 133 134 int 135 _fini(void) 136 { 137 return (EBUSY); 138 } 139 140 int 141 _info(struct modinfo *modinfop) 142 { 143 return (mod_info(&modlinkage, modinfop)); 144 } 145 146 147 static int lofsfstype; 148 vfsops_t *lo_vfsops; 149 150 /* 151 * lo mount vfsop 152 * Set up mount info record and attach it to vfs struct. 153 */ 154 /*ARGSUSED*/ 155 static int 156 lo_mount(struct vfs *vfsp, 157 struct vnode *vp, 158 struct mounta *uap, 159 struct cred *cr) 160 { 161 int error; 162 struct vnode *srootvp = NULL; /* the server's root */ 163 struct vnode *realrootvp; 164 struct loinfo *li; 165 int nodev; 166 167 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 168 169 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 170 return (EPERM); 171 172 /* 173 * Loopback devices which get "nodevices" added can be done without 174 * "nodevices" set because we cannot import devices into a zone 175 * with loopback. Note that we have all zone privileges when 176 * this happens; if not, we'd have gotten "nosuid". 177 */ 178 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 179 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 180 181 mutex_enter(&vp->v_lock); 182 if (!(uap->flags & MS_OVERLAY) && 183 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 184 mutex_exit(&vp->v_lock); 185 return (EBUSY); 186 } 187 mutex_exit(&vp->v_lock); 188 189 /* 190 * Find real root, and make vfs point to real vfs 191 */ 192 193 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 194 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) 195 return (error); 196 197 /* 198 * Enforce MAC policy if needed. 199 * 200 * Loopback mounts must not allow writing up. The dominance test 201 * is intended to prevent a global zone caller from accidentally 202 * creating write-up conditions between two labeled zones. 203 * Local zones can't violate MAC on their own without help from 204 * the global zone because they can't name a pathname that 205 * they don't already have. 206 * 207 * The special case check for the NET_MAC_AWARE process flag is 208 * to support the case of the automounter in the global zone. We 209 * permit automounting of local zone directories such as home 210 * directories, into the global zone as required by setlabel, 211 * zonecopy, and saving of desktop sessions. Such mounts are 212 * trusted not to expose the contents of one zone's directories 213 * to another by leaking them through the global zone. 214 */ 215 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 216 char specname[MAXPATHLEN]; 217 zone_t *from_zptr; 218 zone_t *to_zptr; 219 220 if (vnodetopath(NULL, realrootvp, specname, 221 sizeof (specname), CRED()) != 0) 222 return (EACCES); 223 224 from_zptr = zone_find_by_path(specname); 225 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 226 227 /* 228 * Special case for zone devfs: the zone for /dev will 229 * incorrectly appear as the global zone since it's not 230 * under the zone rootpath. So for zone devfs check allow 231 * read-write mounts. 232 * 233 * Second special case for scratch zones used for Live Upgrade: 234 * this is used to mount the zone's root from /root to /a in 235 * the scratch zone. As with the other special case, this 236 * appears to be outside of the zone because it's not under 237 * the zone rootpath, which is $ZONEPATH/lu in the scratch 238 * zone case. 239 */ 240 241 if (from_zptr != to_zptr && 242 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 243 /* 244 * We know at this point that the labels aren't equal 245 * because the zone pointers aren't equal, and zones 246 * can't share a label. 247 * 248 * If the source is the global zone then making 249 * it available to a local zone must be done in 250 * read-only mode as the label will become admin_low. 251 * 252 * If it is a mount between local zones then if 253 * the current process is in the global zone and has 254 * the NET_MAC_AWARE flag, then regular read-write 255 * access is allowed. If it's in some other zone, but 256 * the label on the mount point dominates the original 257 * source, then allow the mount as read-only 258 * ("read-down"). 259 */ 260 if (from_zptr->zone_id == GLOBAL_ZONEID) { 261 /* make the mount read-only */ 262 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 263 } else { /* cross-zone mount */ 264 if (to_zptr->zone_id == GLOBAL_ZONEID && 265 /* LINTED: no consequent */ 266 getpflags(NET_MAC_AWARE, cr) != 0) { 267 /* Allow the mount as read-write */ 268 } else if (bldominates( 269 label2bslabel(to_zptr->zone_slabel), 270 label2bslabel(from_zptr->zone_slabel))) { 271 /* make the mount read-only */ 272 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 273 } else { 274 zone_rele(to_zptr); 275 zone_rele(from_zptr); 276 return (EACCES); 277 } 278 } 279 } 280 zone_rele(to_zptr); 281 zone_rele(from_zptr); 282 } 283 284 /* 285 * realrootvp may be an AUTOFS node, in which case we 286 * perform a VOP_ACCESS() to trigger the mount of the 287 * intended filesystem, so we loopback mount the intended 288 * filesystem instead of the AUTOFS filesystem. 289 */ 290 (void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL); 291 292 /* 293 * We're interested in the top most filesystem. 294 * This is specially important when uap->spec is a trigger 295 * AUTOFS node, since we're really interested in mounting the 296 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 297 * call not the AUTOFS node itself. 298 */ 299 if (vn_mountedvfs(realrootvp) != NULL) { 300 if (error = traverse(&realrootvp)) { 301 VN_RELE(realrootvp); 302 return (error); 303 } 304 } 305 306 /* 307 * Allocate a vfs info struct and attach it 308 */ 309 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 310 li->li_realvfs = realrootvp->v_vfsp; 311 li->li_mountvfs = vfsp; 312 313 /* 314 * Set mount flags to be inherited by loopback vfs's 315 */ 316 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 317 li->li_mflag |= VFS_RDONLY; 318 } 319 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 320 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 321 } 322 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 323 li->li_mflag |= VFS_NODEVICES; 324 } 325 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 326 li->li_mflag |= VFS_NOSETUID; 327 } 328 /* 329 * Permissive flags are added to the "deny" bitmap. 330 */ 331 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 332 li->li_dflag |= VFS_XATTR; 333 } 334 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 335 li->li_dflag |= VFS_NBMAND; 336 } 337 338 /* 339 * Propagate inheritable mount flags from the real vfs. 340 */ 341 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 342 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 343 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 344 VFS_NODISPLAY); 345 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 346 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 347 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 348 VFS_NODISPLAY); 349 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 350 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 351 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 352 VFS_NODISPLAY); 353 /* 354 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 355 * such as VFS_RDONLY, are handled differently. An explicit 356 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 357 */ 358 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 359 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 360 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 361 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 362 VFS_NODISPLAY); 363 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 364 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 365 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 366 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 367 VFS_NODISPLAY); 368 369 li->li_refct = 0; 370 vfsp->vfs_data = (caddr_t)li; 371 vfsp->vfs_bcount = 0; 372 vfsp->vfs_fstype = lofsfstype; 373 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 374 375 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 376 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 377 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 378 379 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 380 li->li_flag |= LO_NOSUB; 381 } 382 383 /* 384 * Propagate any VFS features 385 */ 386 387 vfs_propagate_features(li->li_realvfs, vfsp); 388 389 /* 390 * Setup the hashtable. If the root of this mount isn't a directory, 391 * there's no point in allocating a large hashtable. A table with one 392 * bucket is sufficient. 393 */ 394 if (realrootvp->v_type != VDIR) 395 lsetup(li, 1); 396 else 397 lsetup(li, 0); 398 399 /* 400 * Make the root vnode 401 */ 402 srootvp = makelonode(realrootvp, li, 0); 403 srootvp->v_flag |= VROOT; 404 li->li_rootvp = srootvp; 405 406 #ifdef LODEBUG 407 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 408 vfsp, li->li_realvfs, srootvp, realrootvp, li); 409 #endif 410 return (0); 411 } 412 413 /* 414 * Undo loopback mount 415 */ 416 static int 417 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 418 { 419 struct loinfo *li; 420 421 if (secpolicy_fs_unmount(cr, vfsp) != 0) 422 return (EPERM); 423 424 /* 425 * Forced unmount is not supported by this file system 426 * and thus, ENOTSUP, is being returned. 427 */ 428 if (flag & MS_FORCE) 429 return (ENOTSUP); 430 431 li = vtoli(vfsp); 432 #ifdef LODEBUG 433 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 434 #endif 435 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 436 #ifdef LODEBUG 437 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 438 li->li_rootvp->v_count); 439 #endif 440 return (EBUSY); 441 } 442 VN_RELE(li->li_rootvp); 443 return (0); 444 } 445 446 /* 447 * Find root of lofs mount. 448 */ 449 static int 450 lo_root(struct vfs *vfsp, struct vnode **vpp) 451 { 452 *vpp = vtoli(vfsp)->li_rootvp; 453 #ifdef LODEBUG 454 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 455 #endif 456 /* 457 * If the root of the filesystem is a special file, return the specvp 458 * version of the vnode. We don't save the specvp vnode in our 459 * hashtable since that's exclusively for lnodes. 460 */ 461 if (IS_DEVVP(*vpp)) { 462 struct vnode *svp; 463 464 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 465 if (svp == NULL) 466 return (ENOSYS); 467 *vpp = svp; 468 } else { 469 VN_HOLD(*vpp); 470 } 471 472 return (0); 473 } 474 475 /* 476 * Get file system statistics. 477 */ 478 static int 479 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 480 { 481 vnode_t *realrootvp; 482 483 #ifdef LODEBUG 484 lo_dprint(4, "lostatvfs %p\n", vfsp); 485 #endif 486 /* 487 * Using realrootvp->v_vfsp (instead of the realvfsp that was 488 * cached) is necessary to make lofs work woth forced UFS unmounts. 489 * In the case of a forced unmount, UFS stores a set of dummy vfsops 490 * in all the (i)vnodes in the filesystem. The dummy ops simply 491 * returns back EIO. 492 */ 493 (void) lo_realvfs(vfsp, &realrootvp); 494 if (realrootvp != NULL) 495 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 496 else 497 return (EIO); 498 } 499 500 /* 501 * LOFS doesn't have any data or metadata to flush, pending I/O on the 502 * underlying filesystem will be flushed when such filesystem is synched. 503 */ 504 /* ARGSUSED */ 505 static int 506 lo_sync(struct vfs *vfsp, 507 short flag, 508 struct cred *cr) 509 { 510 #ifdef LODEBUG 511 lo_dprint(4, "lo_sync: %p\n", vfsp); 512 #endif 513 return (0); 514 } 515 516 /* 517 * Obtain the vnode from the underlying filesystem. 518 */ 519 static int 520 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 521 { 522 vnode_t *realrootvp; 523 524 #ifdef LODEBUG 525 lo_dprint(4, "lo_vget: %p\n", vfsp); 526 #endif 527 (void) lo_realvfs(vfsp, &realrootvp); 528 if (realrootvp != NULL) 529 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 530 else 531 return (EIO); 532 } 533 534 /* 535 * Free mount-specific data. 536 */ 537 static void 538 lo_freevfs(struct vfs *vfsp) 539 { 540 struct loinfo *li = vtoli(vfsp); 541 542 ldestroy(li); 543 kmem_free(li, sizeof (struct loinfo)); 544 } 545 546 static int 547 lofsinit(int fstyp, char *name) 548 { 549 static const fs_operation_def_t lo_vfsops_template[] = { 550 VFSNAME_MOUNT, { .vfs_mount = lo_mount }, 551 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount }, 552 VFSNAME_ROOT, { .vfs_root = lo_root }, 553 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs }, 554 VFSNAME_SYNC, { .vfs_sync = lo_sync }, 555 VFSNAME_VGET, { .vfs_vget = lo_vget }, 556 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs }, 557 NULL, NULL 558 }; 559 int error; 560 561 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 562 if (error != 0) { 563 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 564 return (error); 565 } 566 567 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 568 if (error != 0) { 569 (void) vfs_freevfsops_by_type(fstyp); 570 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 571 return (error); 572 } 573 574 lofsfstype = fstyp; 575 576 return (0); 577 } 578