1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2024 Oxide Computer Company 24 */ 25 26 #include <sys/param.h> 27 #include <sys/errno.h> 28 #include <sys/vfs.h> 29 #include <sys/vfs_opreg.h> 30 #include <sys/vnode.h> 31 #include <sys/uio.h> 32 #include <sys/pathname.h> 33 #include <sys/kmem.h> 34 #include <sys/cred.h> 35 #include <sys/statvfs.h> 36 #include <sys/fs/lofs_info.h> 37 #include <sys/fs/lofs_node.h> 38 #include <sys/mount.h> 39 #include <sys/mntent.h> 40 #include <sys/mkdev.h> 41 #include <sys/priv.h> 42 #include <sys/sysmacros.h> 43 #include <sys/systm.h> 44 #include <sys/cmn_err.h> 45 #include <sys/policy.h> 46 #include <sys/tsol/label.h> 47 #include "fs/fs_subr.h" 48 49 /* 50 * This is the loadable module wrapper. 51 */ 52 #include <sys/modctl.h> 53 54 static mntopts_t lofs_mntopts; 55 56 static int lofsinit(int, char *); 57 58 static vfsdef_t vfw = { 59 VFSDEF_VERSION, 60 "lofs", 61 lofsinit, 62 VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, 63 &lofs_mntopts 64 }; 65 66 /* 67 * LOFS mount options table 68 */ 69 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 70 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 71 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 72 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 73 74 static mntopt_t mntopts[] = { 75 /* 76 * option name cancel option default arg flags 77 * private data 78 */ 79 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 80 (void *)0 }, 81 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 82 (void *)0 }, 83 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 84 (void *)0 }, 85 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 86 (void *)0 }, 87 }; 88 89 static mntopts_t lofs_mntopts = { 90 sizeof (mntopts) / sizeof (mntopt_t), 91 mntopts 92 }; 93 94 /* 95 * Module linkage information for the kernel. 96 */ 97 98 static struct modlfs modlfs = { 99 &mod_fsops, "filesystem for lofs", &vfw 100 }; 101 102 static struct modlinkage modlinkage = { 103 MODREV_1, (void *)&modlfs, NULL 104 }; 105 106 /* 107 * This is the module initialization routine. 108 */ 109 110 int 111 _init(void) 112 { 113 int status; 114 115 lofs_subrinit(); 116 status = mod_install(&modlinkage); 117 if (status != 0) { 118 /* 119 * Cleanup previously initialized work. 120 */ 121 lofs_subrfini(); 122 } 123 124 return (status); 125 } 126 127 /* 128 * Don't allow the lofs module to be unloaded for now. 129 * There is a memory leak if it gets unloaded. 130 */ 131 132 int 133 _fini(void) 134 { 135 return (EBUSY); 136 } 137 138 int 139 _info(struct modinfo *modinfop) 140 { 141 return (mod_info(&modlinkage, modinfop)); 142 } 143 144 145 static int lofsfstype; 146 vfsops_t *lo_vfsops; 147 148 /* 149 * lo mount vfsop 150 * Set up mount info record and attach it to vfs struct. 151 */ 152 /*ARGSUSED*/ 153 static int 154 lo_mount(struct vfs *vfsp, struct vnode *vp, struct mounta *uap, 155 struct cred *cr) 156 { 157 int error; 158 struct vnode *srootvp = NULL; /* the server's root */ 159 struct vnode *realrootvp; 160 struct loinfo *li; 161 int nodev; 162 163 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 164 165 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 166 return (EPERM); 167 168 /* 169 * Loopback devices which get "nodevices" added can be done without 170 * "nodevices" set because we cannot import devices into a zone 171 * with loopback. Note that we have all zone privileges when 172 * this happens; if not, we'd have gotten "nosuid". 173 */ 174 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 175 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 176 177 mutex_enter(&vp->v_lock); 178 if (!(uap->flags & MS_OVERLAY) && 179 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 180 mutex_exit(&vp->v_lock); 181 return (EBUSY); 182 } 183 mutex_exit(&vp->v_lock); 184 185 /* 186 * Find real root, and make vfs point to real vfs 187 */ 188 189 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 190 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) 191 return (error); 192 193 /* 194 * Enforce MAC policy if needed. 195 * 196 * Loopback mounts must not allow writing up. The dominance test 197 * is intended to prevent a global zone caller from accidentally 198 * creating write-up conditions between two labeled zones. 199 * Local zones can't violate MAC on their own without help from 200 * the global zone because they can't name a pathname that 201 * they don't already have. 202 * 203 * The special case check for the NET_MAC_AWARE process flag is 204 * to support the case of the automounter in the global zone. We 205 * permit automounting of local zone directories such as home 206 * directories, into the global zone as required by setlabel, 207 * zonecopy, and saving of desktop sessions. Such mounts are 208 * trusted not to expose the contents of one zone's directories 209 * to another by leaking them through the global zone. 210 */ 211 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 212 char specname[MAXPATHLEN]; 213 zone_t *from_zptr; 214 zone_t *to_zptr; 215 216 if (vnodetopath(NULL, realrootvp, specname, 217 sizeof (specname), CRED()) != 0) { 218 VN_RELE(realrootvp); 219 return (EACCES); 220 } 221 222 from_zptr = zone_find_by_path(specname); 223 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 224 225 /* 226 * Special case for scratch zones used for Live Upgrade: 227 * this is used to mount the zone's root from /root to /a in 228 * the scratch zone. As with the other special case, this 229 * appears to be outside of the zone because it's not under 230 * the zone rootpath, which is $ZONEPATH/lu in the scratch 231 * zone case. 232 */ 233 234 if (from_zptr != to_zptr && 235 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 236 /* 237 * We know at this point that the labels aren't equal 238 * because the zone pointers aren't equal, and zones 239 * can't share a label. 240 * 241 * If the source is the global zone then making 242 * it available to a local zone must be done in 243 * read-only mode as the label will become admin_low. 244 * 245 * If it is a mount between local zones then if 246 * the current process is in the global zone and has 247 * the NET_MAC_AWARE flag, then regular read-write 248 * access is allowed. If it's in some other zone, but 249 * the label on the mount point dominates the original 250 * source, then allow the mount as read-only 251 * ("read-down"). 252 */ 253 if (from_zptr->zone_id == GLOBAL_ZONEID) { 254 /* make the mount read-only */ 255 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 256 } else { /* cross-zone mount */ 257 if (to_zptr->zone_id == GLOBAL_ZONEID && 258 /* LINTED: no consequent */ 259 getpflags(NET_MAC_AWARE, cr) != 0) { 260 /* Allow the mount as read-write */ 261 } else if (bldominates( 262 label2bslabel(to_zptr->zone_slabel), 263 label2bslabel(from_zptr->zone_slabel))) { 264 /* make the mount read-only */ 265 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 266 } else { 267 VN_RELE(realrootvp); 268 zone_rele(to_zptr); 269 zone_rele(from_zptr); 270 return (EACCES); 271 } 272 } 273 } 274 zone_rele(to_zptr); 275 zone_rele(from_zptr); 276 } 277 278 /* 279 * realrootvp may be an AUTOFS node, in which case we perform a 280 * VOP_ACCESS() to trigger the mount of the intended filesystem. 281 * This causes a loopback mount of the intended filesystem instead 282 * of the AUTOFS filesystem. 283 * 284 * If a lofs mount creates a mount loop (such that a lofs vfs is 285 * mounted on an autofs node and that lofs vfs points back to the 286 * autofs node which it is mounted on) then a VOP_ACCESS call will 287 * create a deadlock. Once this deadlock is released, VOP_ACCESS will 288 * return EINTR. In such a case we don't want the lofs vfs to be 289 * created as the loop could panic the system. 290 */ 291 if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) { 292 VN_RELE(realrootvp); 293 return (error); 294 } 295 296 /* 297 * We're interested in the top most filesystem. 298 * This is specially important when uap->spec is a trigger 299 * AUTOFS node, since we're really interested in mounting the 300 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 301 * call not the AUTOFS node itself. 302 */ 303 if (vn_mountedvfs(realrootvp) != NULL) { 304 if (error = traverse(&realrootvp)) { 305 VN_RELE(realrootvp); 306 return (error); 307 } 308 } 309 310 /* 311 * Allocate a vfs info struct and attach it 312 */ 313 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 314 li->li_realvfs = realrootvp->v_vfsp; 315 li->li_mountvfs = vfsp; 316 317 /* 318 * Set mount flags to be inherited by loopback vfs's 319 */ 320 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 321 li->li_mflag |= VFS_RDONLY; 322 } 323 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 324 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 325 } 326 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 327 li->li_mflag |= VFS_NODEVICES; 328 } 329 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 330 li->li_mflag |= VFS_NOSETUID; 331 } 332 /* 333 * Permissive flags are added to the "deny" bitmap. 334 */ 335 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 336 li->li_dflag |= VFS_XATTR; 337 } 338 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 339 li->li_dflag |= VFS_NBMAND; 340 } 341 342 /* 343 * Propagate inheritable mount flags from the real vfs. 344 */ 345 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 346 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 347 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 348 VFS_NODISPLAY); 349 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 350 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 351 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 352 VFS_NODISPLAY); 353 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 354 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 355 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 356 VFS_NODISPLAY); 357 /* 358 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 359 * such as VFS_RDONLY, are handled differently. An explicit 360 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 361 */ 362 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 363 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 364 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 365 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 366 VFS_NODISPLAY); 367 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 368 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 369 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 370 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 371 VFS_NODISPLAY); 372 373 li->li_refct = 0; 374 vfsp->vfs_data = (caddr_t)li; 375 vfsp->vfs_bcount = 0; 376 vfsp->vfs_fstype = lofsfstype; 377 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 378 379 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 380 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 381 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 382 383 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 384 li->li_flag |= LO_NOSUB; 385 } 386 387 /* 388 * Propagate any VFS features 389 */ 390 391 vfs_propagate_features(li->li_realvfs, vfsp); 392 393 /* 394 * Setup the hashtable. If the root of this mount isn't a directory, 395 * there's no point in allocating a large hashtable. A table with one 396 * bucket is sufficient. 397 */ 398 if (realrootvp->v_type != VDIR) 399 lsetup(li, 1); 400 else 401 lsetup(li, 0); 402 403 /* 404 * Make the root vnode 405 */ 406 srootvp = makelonode(realrootvp, li, 0); 407 srootvp->v_flag |= VROOT; 408 li->li_rootvp = srootvp; 409 410 #ifdef LODEBUG 411 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 412 vfsp, li->li_realvfs, srootvp, realrootvp, li); 413 #endif 414 return (0); 415 } 416 417 /* 418 * Undo loopback mount 419 */ 420 static int 421 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 422 { 423 struct loinfo *li; 424 425 if (secpolicy_fs_unmount(cr, vfsp) != 0) 426 return (EPERM); 427 428 /* 429 * Forced unmount is not supported by this file system 430 * and thus, ENOTSUP, is being returned. 431 */ 432 if (flag & MS_FORCE) 433 return (ENOTSUP); 434 435 li = vtoli(vfsp); 436 #ifdef LODEBUG 437 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 438 #endif 439 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 440 #ifdef LODEBUG 441 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 442 li->li_rootvp->v_count); 443 #endif 444 return (EBUSY); 445 } 446 VN_RELE(li->li_rootvp); 447 return (0); 448 } 449 450 /* 451 * Find root of lofs mount. 452 */ 453 static int 454 lo_root(struct vfs *vfsp, struct vnode **vpp) 455 { 456 *vpp = vtoli(vfsp)->li_rootvp; 457 #ifdef LODEBUG 458 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 459 #endif 460 /* 461 * If the root of the filesystem is a special file, return the specvp 462 * version of the vnode. We don't save the specvp vnode in our 463 * hashtable since that's exclusively for lnodes. 464 */ 465 if (IS_DEVVP(*vpp)) { 466 struct vnode *svp; 467 468 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 469 if (svp == NULL) 470 return (ENOSYS); 471 *vpp = svp; 472 } else { 473 VN_HOLD(*vpp); 474 } 475 476 return (0); 477 } 478 479 /* 480 * Get file system statistics. 481 */ 482 static int 483 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 484 { 485 vnode_t *realrootvp; 486 487 #ifdef LODEBUG 488 lo_dprint(4, "lostatvfs %p\n", vfsp); 489 #endif 490 /* 491 * Using realrootvp->v_vfsp (instead of the realvfsp that was 492 * cached) is necessary to make lofs work woth forced UFS unmounts. 493 * In the case of a forced unmount, UFS stores a set of dummy vfsops 494 * in all the (i)vnodes in the filesystem. The dummy ops simply 495 * returns back EIO. 496 */ 497 (void) lo_realvfs(vfsp, &realrootvp); 498 if (realrootvp != NULL) 499 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 500 else 501 return (EIO); 502 } 503 504 /* 505 * LOFS doesn't have any data or metadata to flush, pending I/O on the 506 * underlying filesystem will be flushed when such filesystem is synched. 507 */ 508 /* ARGSUSED */ 509 static int 510 lo_sync(struct vfs *vfsp, short flag, struct cred *cr) 511 { 512 #ifdef LODEBUG 513 lo_dprint(4, "lo_sync: %p\n", vfsp); 514 #endif 515 return (0); 516 } 517 518 /* 519 * While the general sync(2) entry point above assumes that the underlying fs 520 * will be synced, we treat this as a directed blocking sync on the file system 521 * which means we should attempt the underlying file system. 522 */ 523 static int 524 lo_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *cr) 525 { 526 vfs_t *realvfs; 527 528 #ifdef LODEBUG 529 lo_dprint(4, "lo_syncfs: %p\n", vfsp); 530 #endif 531 realvfs = lo_realvfs(vfsp, NULL); 532 if (realvfs != NULL) { 533 return (VFS_SYNCFS(realvfs, flags, cr)); 534 } else { 535 return (EIO); 536 } 537 538 } 539 540 /* 541 * Obtain the vnode from the underlying filesystem. 542 */ 543 static int 544 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 545 { 546 vnode_t *realrootvp; 547 548 #ifdef LODEBUG 549 lo_dprint(4, "lo_vget: %p\n", vfsp); 550 #endif 551 (void) lo_realvfs(vfsp, &realrootvp); 552 if (realrootvp != NULL) 553 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 554 else 555 return (EIO); 556 } 557 558 /* 559 * Free mount-specific data. 560 */ 561 static void 562 lo_freevfs(struct vfs *vfsp) 563 { 564 struct loinfo *li = vtoli(vfsp); 565 566 ldestroy(li); 567 kmem_free(li, sizeof (struct loinfo)); 568 } 569 570 static int 571 lofsinit(int fstyp, char *name) 572 { 573 static const fs_operation_def_t lo_vfsops_template[] = { 574 VFSNAME_MOUNT, { .vfs_mount = lo_mount }, 575 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount }, 576 VFSNAME_ROOT, { .vfs_root = lo_root }, 577 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs }, 578 VFSNAME_SYNC, { .vfs_sync = lo_sync }, 579 VFSNAME_VGET, { .vfs_vget = lo_vget }, 580 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs }, 581 VFSNAME_SYNCFS, { .vfs_syncfs = lo_syncfs }, 582 NULL, NULL 583 }; 584 int error; 585 586 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 587 if (error != 0) { 588 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 589 return (error); 590 } 591 592 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 593 if (error != 0) { 594 (void) vfs_freevfsops_by_type(fstyp); 595 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 596 return (error); 597 } 598 599 lofsfstype = fstyp; 600 601 return (0); 602 } 603