1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/param.h> 26 #include <sys/errno.h> 27 #include <sys/vfs.h> 28 #include <sys/vfs_opreg.h> 29 #include <sys/vnode.h> 30 #include <sys/uio.h> 31 #include <sys/pathname.h> 32 #include <sys/kmem.h> 33 #include <sys/cred.h> 34 #include <sys/statvfs.h> 35 #include <sys/fs/lofs_info.h> 36 #include <sys/fs/lofs_node.h> 37 #include <sys/mount.h> 38 #include <sys/mntent.h> 39 #include <sys/mkdev.h> 40 #include <sys/priv.h> 41 #include <sys/sysmacros.h> 42 #include <sys/systm.h> 43 #include <sys/cmn_err.h> 44 #include <sys/policy.h> 45 #include <sys/tsol/label.h> 46 #include "fs/fs_subr.h" 47 48 /* 49 * This is the loadable module wrapper. 50 */ 51 #include <sys/modctl.h> 52 53 static mntopts_t lofs_mntopts; 54 55 static int lofsinit(int, char *); 56 57 static vfsdef_t vfw = { 58 VFSDEF_VERSION, 59 "lofs", 60 lofsinit, 61 VSW_HASPROTO|VSW_STATS, 62 &lofs_mntopts 63 }; 64 65 /* 66 * LOFS mount options table 67 */ 68 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 69 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 70 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 71 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 72 73 static mntopt_t mntopts[] = { 74 /* 75 * option name cancel option default arg flags 76 * private data 77 */ 78 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 79 (void *)0 }, 80 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 81 (void *)0 }, 82 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 83 (void *)0 }, 84 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 85 (void *)0 }, 86 }; 87 88 static mntopts_t lofs_mntopts = { 89 sizeof (mntopts) / sizeof (mntopt_t), 90 mntopts 91 }; 92 93 /* 94 * Module linkage information for the kernel. 95 */ 96 97 static struct modlfs modlfs = { 98 &mod_fsops, "filesystem for lofs", &vfw 99 }; 100 101 static struct modlinkage modlinkage = { 102 MODREV_1, (void *)&modlfs, NULL 103 }; 104 105 /* 106 * This is the module initialization routine. 107 */ 108 109 int 110 _init(void) 111 { 112 int status; 113 114 lofs_subrinit(); 115 status = mod_install(&modlinkage); 116 if (status != 0) { 117 /* 118 * Cleanup previously initialized work. 119 */ 120 lofs_subrfini(); 121 } 122 123 return (status); 124 } 125 126 /* 127 * Don't allow the lofs module to be unloaded for now. 128 * There is a memory leak if it gets unloaded. 129 */ 130 131 int 132 _fini(void) 133 { 134 return (EBUSY); 135 } 136 137 int 138 _info(struct modinfo *modinfop) 139 { 140 return (mod_info(&modlinkage, modinfop)); 141 } 142 143 144 static int lofsfstype; 145 vfsops_t *lo_vfsops; 146 147 /* 148 * lo mount vfsop 149 * Set up mount info record and attach it to vfs struct. 150 */ 151 /*ARGSUSED*/ 152 static int 153 lo_mount(struct vfs *vfsp, 154 struct vnode *vp, 155 struct mounta *uap, 156 struct cred *cr) 157 { 158 int error; 159 struct vnode *srootvp = NULL; /* the server's root */ 160 struct vnode *realrootvp; 161 struct loinfo *li; 162 int nodev; 163 164 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 165 166 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 167 return (EPERM); 168 169 /* 170 * Loopback devices which get "nodevices" added can be done without 171 * "nodevices" set because we cannot import devices into a zone 172 * with loopback. Note that we have all zone privileges when 173 * this happens; if not, we'd have gotten "nosuid". 174 */ 175 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 176 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 177 178 mutex_enter(&vp->v_lock); 179 if (!(uap->flags & MS_OVERLAY) && 180 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 181 mutex_exit(&vp->v_lock); 182 return (EBUSY); 183 } 184 mutex_exit(&vp->v_lock); 185 186 /* 187 * Find real root, and make vfs point to real vfs 188 */ 189 190 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 191 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) 192 return (error); 193 194 /* 195 * Enforce MAC policy if needed. 196 * 197 * Loopback mounts must not allow writing up. The dominance test 198 * is intended to prevent a global zone caller from accidentally 199 * creating write-up conditions between two labeled zones. 200 * Local zones can't violate MAC on their own without help from 201 * the global zone because they can't name a pathname that 202 * they don't already have. 203 * 204 * The special case check for the NET_MAC_AWARE process flag is 205 * to support the case of the automounter in the global zone. We 206 * permit automounting of local zone directories such as home 207 * directories, into the global zone as required by setlabel, 208 * zonecopy, and saving of desktop sessions. Such mounts are 209 * trusted not to expose the contents of one zone's directories 210 * to another by leaking them through the global zone. 211 */ 212 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 213 char specname[MAXPATHLEN]; 214 zone_t *from_zptr; 215 zone_t *to_zptr; 216 217 if (vnodetopath(NULL, realrootvp, specname, 218 sizeof (specname), CRED()) != 0) { 219 VN_RELE(realrootvp); 220 return (EACCES); 221 } 222 223 from_zptr = zone_find_by_path(specname); 224 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 225 226 /* 227 * Special case for scratch zones used for Live Upgrade: 228 * this is used to mount the zone's root from /root to /a in 229 * the scratch zone. As with the other special case, this 230 * appears to be outside of the zone because it's not under 231 * the zone rootpath, which is $ZONEPATH/lu in the scratch 232 * zone case. 233 */ 234 235 if (from_zptr != to_zptr && 236 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 237 /* 238 * We know at this point that the labels aren't equal 239 * because the zone pointers aren't equal, and zones 240 * can't share a label. 241 * 242 * If the source is the global zone then making 243 * it available to a local zone must be done in 244 * read-only mode as the label will become admin_low. 245 * 246 * If it is a mount between local zones then if 247 * the current process is in the global zone and has 248 * the NET_MAC_AWARE flag, then regular read-write 249 * access is allowed. If it's in some other zone, but 250 * the label on the mount point dominates the original 251 * source, then allow the mount as read-only 252 * ("read-down"). 253 */ 254 if (from_zptr->zone_id == GLOBAL_ZONEID) { 255 /* make the mount read-only */ 256 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 257 } else { /* cross-zone mount */ 258 if (to_zptr->zone_id == GLOBAL_ZONEID && 259 /* LINTED: no consequent */ 260 getpflags(NET_MAC_AWARE, cr) != 0) { 261 /* Allow the mount as read-write */ 262 } else if (bldominates( 263 label2bslabel(to_zptr->zone_slabel), 264 label2bslabel(from_zptr->zone_slabel))) { 265 /* make the mount read-only */ 266 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 267 } else { 268 VN_RELE(realrootvp); 269 zone_rele(to_zptr); 270 zone_rele(from_zptr); 271 return (EACCES); 272 } 273 } 274 } 275 zone_rele(to_zptr); 276 zone_rele(from_zptr); 277 } 278 279 /* 280 * realrootvp may be an AUTOFS node, in which case we 281 * perform a VOP_ACCESS() to trigger the mount of the 282 * intended filesystem, so we loopback mount the intended 283 * filesystem instead of the AUTOFS filesystem. 284 */ 285 (void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL); 286 287 /* 288 * We're interested in the top most filesystem. 289 * This is specially important when uap->spec is a trigger 290 * AUTOFS node, since we're really interested in mounting the 291 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 292 * call not the AUTOFS node itself. 293 */ 294 if (vn_mountedvfs(realrootvp) != NULL) { 295 if (error = traverse(&realrootvp)) { 296 VN_RELE(realrootvp); 297 return (error); 298 } 299 } 300 301 /* 302 * Allocate a vfs info struct and attach it 303 */ 304 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 305 li->li_realvfs = realrootvp->v_vfsp; 306 li->li_mountvfs = vfsp; 307 308 /* 309 * Set mount flags to be inherited by loopback vfs's 310 */ 311 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 312 li->li_mflag |= VFS_RDONLY; 313 } 314 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 315 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 316 } 317 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 318 li->li_mflag |= VFS_NODEVICES; 319 } 320 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 321 li->li_mflag |= VFS_NOSETUID; 322 } 323 /* 324 * Permissive flags are added to the "deny" bitmap. 325 */ 326 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 327 li->li_dflag |= VFS_XATTR; 328 } 329 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 330 li->li_dflag |= VFS_NBMAND; 331 } 332 333 /* 334 * Propagate inheritable mount flags from the real vfs. 335 */ 336 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 337 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 338 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 339 VFS_NODISPLAY); 340 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 341 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 342 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 343 VFS_NODISPLAY); 344 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 345 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 346 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 347 VFS_NODISPLAY); 348 /* 349 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 350 * such as VFS_RDONLY, are handled differently. An explicit 351 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 352 */ 353 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 354 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 355 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 356 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 357 VFS_NODISPLAY); 358 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 359 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 360 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 361 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 362 VFS_NODISPLAY); 363 364 li->li_refct = 0; 365 vfsp->vfs_data = (caddr_t)li; 366 vfsp->vfs_bcount = 0; 367 vfsp->vfs_fstype = lofsfstype; 368 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 369 370 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 371 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 372 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 373 374 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 375 li->li_flag |= LO_NOSUB; 376 } 377 378 /* 379 * Propagate any VFS features 380 */ 381 382 vfs_propagate_features(li->li_realvfs, vfsp); 383 384 /* 385 * Setup the hashtable. If the root of this mount isn't a directory, 386 * there's no point in allocating a large hashtable. A table with one 387 * bucket is sufficient. 388 */ 389 if (realrootvp->v_type != VDIR) 390 lsetup(li, 1); 391 else 392 lsetup(li, 0); 393 394 /* 395 * Make the root vnode 396 */ 397 srootvp = makelonode(realrootvp, li, 0); 398 srootvp->v_flag |= VROOT; 399 li->li_rootvp = srootvp; 400 401 #ifdef LODEBUG 402 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 403 vfsp, li->li_realvfs, srootvp, realrootvp, li); 404 #endif 405 return (0); 406 } 407 408 /* 409 * Undo loopback mount 410 */ 411 static int 412 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 413 { 414 struct loinfo *li; 415 416 if (secpolicy_fs_unmount(cr, vfsp) != 0) 417 return (EPERM); 418 419 /* 420 * Forced unmount is not supported by this file system 421 * and thus, ENOTSUP, is being returned. 422 */ 423 if (flag & MS_FORCE) 424 return (ENOTSUP); 425 426 li = vtoli(vfsp); 427 #ifdef LODEBUG 428 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 429 #endif 430 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 431 #ifdef LODEBUG 432 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 433 li->li_rootvp->v_count); 434 #endif 435 return (EBUSY); 436 } 437 VN_RELE(li->li_rootvp); 438 return (0); 439 } 440 441 /* 442 * Find root of lofs mount. 443 */ 444 static int 445 lo_root(struct vfs *vfsp, struct vnode **vpp) 446 { 447 *vpp = vtoli(vfsp)->li_rootvp; 448 #ifdef LODEBUG 449 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 450 #endif 451 /* 452 * If the root of the filesystem is a special file, return the specvp 453 * version of the vnode. We don't save the specvp vnode in our 454 * hashtable since that's exclusively for lnodes. 455 */ 456 if (IS_DEVVP(*vpp)) { 457 struct vnode *svp; 458 459 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 460 if (svp == NULL) 461 return (ENOSYS); 462 *vpp = svp; 463 } else { 464 VN_HOLD(*vpp); 465 } 466 467 return (0); 468 } 469 470 /* 471 * Get file system statistics. 472 */ 473 static int 474 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 475 { 476 vnode_t *realrootvp; 477 478 #ifdef LODEBUG 479 lo_dprint(4, "lostatvfs %p\n", vfsp); 480 #endif 481 /* 482 * Using realrootvp->v_vfsp (instead of the realvfsp that was 483 * cached) is necessary to make lofs work woth forced UFS unmounts. 484 * In the case of a forced unmount, UFS stores a set of dummy vfsops 485 * in all the (i)vnodes in the filesystem. The dummy ops simply 486 * returns back EIO. 487 */ 488 (void) lo_realvfs(vfsp, &realrootvp); 489 if (realrootvp != NULL) 490 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 491 else 492 return (EIO); 493 } 494 495 /* 496 * LOFS doesn't have any data or metadata to flush, pending I/O on the 497 * underlying filesystem will be flushed when such filesystem is synched. 498 */ 499 /* ARGSUSED */ 500 static int 501 lo_sync(struct vfs *vfsp, 502 short flag, 503 struct cred *cr) 504 { 505 #ifdef LODEBUG 506 lo_dprint(4, "lo_sync: %p\n", vfsp); 507 #endif 508 return (0); 509 } 510 511 /* 512 * Obtain the vnode from the underlying filesystem. 513 */ 514 static int 515 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 516 { 517 vnode_t *realrootvp; 518 519 #ifdef LODEBUG 520 lo_dprint(4, "lo_vget: %p\n", vfsp); 521 #endif 522 (void) lo_realvfs(vfsp, &realrootvp); 523 if (realrootvp != NULL) 524 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 525 else 526 return (EIO); 527 } 528 529 /* 530 * Free mount-specific data. 531 */ 532 static void 533 lo_freevfs(struct vfs *vfsp) 534 { 535 struct loinfo *li = vtoli(vfsp); 536 537 ldestroy(li); 538 kmem_free(li, sizeof (struct loinfo)); 539 } 540 541 static int 542 lofsinit(int fstyp, char *name) 543 { 544 static const fs_operation_def_t lo_vfsops_template[] = { 545 VFSNAME_MOUNT, { .vfs_mount = lo_mount }, 546 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount }, 547 VFSNAME_ROOT, { .vfs_root = lo_root }, 548 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs }, 549 VFSNAME_SYNC, { .vfs_sync = lo_sync }, 550 VFSNAME_VGET, { .vfs_vget = lo_vget }, 551 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs }, 552 NULL, NULL 553 }; 554 int error; 555 556 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 557 if (error != 0) { 558 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 559 return (error); 560 } 561 562 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 563 if (error != 0) { 564 (void) vfs_freevfsops_by_type(fstyp); 565 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 566 return (error); 567 } 568 569 lofsfstype = fstyp; 570 571 return (0); 572 } 573