1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2019, Joyent, Inc. 25 * Copyright 2016 Toomas Soome <tsoome@me.com> 26 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 27 * Copyright 2016 Nexenta Systems, Inc. 28 * Copyright 2017 RackTop Systems. 29 * Copyright 2024 Oxide Computer Company 30 */ 31 32 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 33 /* All Rights Reserved */ 34 35 /* 36 * University Copyright- Copyright (c) 1982, 1986, 1988 37 * The Regents of the University of California 38 * All Rights Reserved 39 * 40 * University Acknowledgment- Portions of this document are derived from 41 * software developed by the University of California, Berkeley, and its 42 * contributors. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/errno.h> 49 #include <sys/user.h> 50 #include <sys/fstyp.h> 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/vfs.h> 56 #include <sys/vfs_opreg.h> 57 #include <sys/fem.h> 58 #include <sys/mntent.h> 59 #include <sys/stat.h> 60 #include <sys/statvfs.h> 61 #include <sys/statfs.h> 62 #include <sys/cred.h> 63 #include <sys/vnode.h> 64 #include <sys/rwstlock.h> 65 #include <sys/dnlc.h> 66 #include <sys/file.h> 67 #include <sys/time.h> 68 #include <sys/atomic.h> 69 #include <sys/cmn_err.h> 70 #include <sys/buf.h> 71 #include <sys/swap.h> 72 #include <sys/debug.h> 73 #include <sys/vnode.h> 74 #include <sys/modctl.h> 75 #include <sys/ddi.h> 76 #include <sys/pathname.h> 77 #include <sys/bootconf.h> 78 #include <sys/dumphdr.h> 79 #include <sys/dc_ki.h> 80 #include <sys/poll.h> 81 #include <sys/sunddi.h> 82 #include <sys/sysmacros.h> 83 #include <sys/zone.h> 84 #include <sys/policy.h> 85 #include <sys/ctfs.h> 86 #include <sys/objfs.h> 87 #include <sys/console.h> 88 #include <sys/reboot.h> 89 #include <sys/attr.h> 90 #include <sys/zio.h> 91 #include <sys/spa.h> 92 #include <sys/lofi.h> 93 #include <sys/bootprops.h> 94 95 #include <vm/page.h> 96 97 #include <fs/fs_subr.h> 98 /* Private interfaces to create vopstats-related data structures */ 99 extern void initialize_vopstats(vopstats_t *); 100 extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *); 101 extern vsk_anchor_t *get_vskstat_anchor(struct vfs *); 102 103 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int); 104 static void vfs_setmntopt_nolock(mntopts_t *, const char *, 105 const char *, int, int); 106 static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **); 107 static void vfs_freemnttab(struct vfs *); 108 static void vfs_freeopt(mntopt_t *); 109 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *); 110 static void vfs_swapopttbl(mntopts_t *, mntopts_t *); 111 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int); 112 static void vfs_createopttbl_extend(mntopts_t *, const char *, 113 const mntopts_t *); 114 static char **vfs_copycancelopt_extend(char **const, int); 115 static void vfs_freecancelopt(char **); 116 static void getrootfs(char **, char **); 117 static int getmacpath(dev_info_t *, void *); 118 static void vfs_mnttabvp_setup(void); 119 120 struct ipmnt { 121 struct ipmnt *mip_next; 122 dev_t mip_dev; 123 struct vfs *mip_vfsp; 124 }; 125 126 static kmutex_t vfs_miplist_mutex; 127 static struct ipmnt *vfs_miplist = NULL; 128 static struct ipmnt *vfs_miplist_end = NULL; 129 130 static kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */ 131 132 /* 133 * VFS global data. 134 */ 135 vnode_t *rootdir; /* pointer to root inode vnode. */ 136 vnode_t *devicesdir; /* pointer to inode of devices root */ 137 vnode_t *devdir; /* pointer to inode of dev root */ 138 139 char *server_rootpath; /* root path for diskless clients */ 140 char *server_hostname; /* hostname of diskless server */ 141 142 static struct vfs root; 143 static struct vfs devices; 144 static struct vfs dev; 145 struct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */ 146 rvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */ 147 int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ 148 /* must be power of 2! */ 149 timespec_t vfs_mnttab_ctime; /* mnttab created time */ 150 timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ 151 char *vfs_dummyfstype = "\0"; 152 struct pollhead vfs_pollhd; /* for mnttab pollers */ 153 struct vnode *vfs_mntdummyvp; /* to fake mnttab read/write for file events */ 154 int mntfstype; /* will be set once mnt fs is mounted */ 155 156 /* 157 * Table for generic options recognized in the VFS layer and acted 158 * on at this level before parsing file system specific options. 159 * The nosuid option is stronger than any of the devices and setuid 160 * options, so those are canceled when nosuid is seen. 161 * 162 * All options which are added here need to be added to the 163 * list of standard options in usr/src/cmd/fs.d/fslib.c as well. 164 */ 165 /* 166 * VFS Mount options table 167 */ 168 static char *ro_cancel[] = { MNTOPT_RW, NULL }; 169 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 170 static char *suid_cancel[] = { MNTOPT_NOSUID, NULL }; 171 static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES, 172 MNTOPT_NOSETUID, MNTOPT_SETUID, NULL }; 173 static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL }; 174 static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL }; 175 static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL }; 176 static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL }; 177 static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL }; 178 static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL }; 179 static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL }; 180 static char *noexec_cancel[] = { MNTOPT_EXEC, NULL }; 181 182 static const mntopt_t mntopts[] = { 183 /* 184 * option name cancel options default arg flags 185 */ 186 { MNTOPT_REMOUNT, NULL, NULL, 187 MO_NODISPLAY, (void *)0 }, 188 { MNTOPT_RO, ro_cancel, NULL, 0, 189 (void *)0 }, 190 { MNTOPT_RW, rw_cancel, NULL, 0, 191 (void *)0 }, 192 { MNTOPT_SUID, suid_cancel, NULL, 0, 193 (void *)0 }, 194 { MNTOPT_NOSUID, nosuid_cancel, NULL, 0, 195 (void *)0 }, 196 { MNTOPT_DEVICES, devices_cancel, NULL, 0, 197 (void *)0 }, 198 { MNTOPT_NODEVICES, nodevices_cancel, NULL, 0, 199 (void *)0 }, 200 { MNTOPT_SETUID, setuid_cancel, NULL, 0, 201 (void *)0 }, 202 { MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0, 203 (void *)0 }, 204 { MNTOPT_NBMAND, nbmand_cancel, NULL, 0, 205 (void *)0 }, 206 { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0, 207 (void *)0 }, 208 { MNTOPT_EXEC, exec_cancel, NULL, 0, 209 (void *)0 }, 210 { MNTOPT_NOEXEC, noexec_cancel, NULL, 0, 211 (void *)0 }, 212 }; 213 214 const mntopts_t vfs_mntopts = { 215 sizeof (mntopts) / sizeof (mntopt_t), 216 (mntopt_t *)&mntopts[0] 217 }; 218 219 /* 220 * File system operation dispatch functions. 221 */ 222 223 int 224 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 225 { 226 return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); 227 } 228 229 int 230 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr) 231 { 232 return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr); 233 } 234 235 int 236 fsop_root(vfs_t *vfsp, vnode_t **vpp) 237 { 238 refstr_t *mntpt; 239 int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp); 240 /* 241 * Make sure this root has a path. With lofs, it is possible to have 242 * a NULL mountpoint. 243 */ 244 if (ret == 0 && vfsp->vfs_mntpt != NULL && 245 (*vpp)->v_path == vn_vpath_empty) { 246 const char *path; 247 248 mntpt = vfs_getmntpoint(vfsp); 249 path = refstr_value(mntpt); 250 vn_setpath_str(*vpp, path, strlen(path)); 251 refstr_rele(mntpt); 252 } 253 254 return (ret); 255 } 256 257 int 258 fsop_statfs(vfs_t *vfsp, statvfs64_t *sp) 259 { 260 return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp); 261 } 262 263 int 264 fsop_sync(vfs_t *vfsp, short flag, cred_t *cr) 265 { 266 return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr); 267 } 268 269 int 270 fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 271 { 272 /* 273 * In order to handle system attribute fids in a manner 274 * transparent to the underlying fs, we embed the fid for 275 * the sysattr parent object in the sysattr fid and tack on 276 * some extra bytes that only the sysattr layer knows about. 277 * 278 * This guarantees that sysattr fids are larger than other fids 279 * for this vfs. If the vfs supports the sysattr view interface 280 * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size 281 * collision with XATTR_FIDSZ. 282 */ 283 if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) && 284 fidp->fid_len == XATTR_FIDSZ) 285 return (xattr_dir_vget(vfsp, vpp, fidp)); 286 287 return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp); 288 } 289 290 int 291 fsop_mountroot(vfs_t *vfsp, enum whymountroot reason) 292 { 293 return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason); 294 } 295 296 void 297 fsop_freefs(vfs_t *vfsp) 298 { 299 (*(vfsp)->vfs_op->vfs_freevfs)(vfsp); 300 } 301 302 int 303 fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate) 304 { 305 return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate)); 306 } 307 308 int 309 fsop_sync_by_kind(int fstype, short flag, cred_t *cr) 310 { 311 ASSERT((fstype >= 0) && (fstype < nfstype)); 312 313 if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype])) 314 return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr); 315 else 316 return (ENOTSUP); 317 } 318 319 int 320 fsop_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *cr) 321 { 322 return (*(vfsp)->vfs_op->vfs_syncfs)(vfsp, flags, cr); 323 } 324 325 /* 326 * File system initialization. vfs_setfsops() must be called from a file 327 * system's init routine. 328 */ 329 330 static int 331 fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, 332 int *unused_ops) 333 { 334 static const fs_operation_trans_def_t vfs_ops_table[] = { 335 { VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount), 336 fs_nosys, fs_nosys }, 337 338 { VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount), 339 fs_nosys, fs_nosys }, 340 341 { VFSNAME_ROOT, offsetof(vfsops_t, vfs_root), 342 fs_nosys, fs_nosys }, 343 344 { VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs), 345 fs_nosys, fs_nosys }, 346 347 { VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync), 348 (fs_generic_func_p) fs_sync, 349 (fs_generic_func_p) fs_sync }, /* No errors allowed */ 350 351 { VFSNAME_VGET, offsetof(vfsops_t, vfs_vget), 352 fs_nosys, fs_nosys }, 353 354 { VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot), 355 fs_nosys, fs_nosys }, 356 357 { VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs), 358 (fs_generic_func_p)(uintptr_t)fs_freevfs, 359 /* Shouldn't fail */ 360 (fs_generic_func_p)(uintptr_t)fs_freevfs }, 361 362 { VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate), 363 (fs_generic_func_p)fs_nosys, (fs_generic_func_p)fs_nosys }, 364 365 /* 366 * While it is tempting to say that a file system which does not 367 * implement a VFSNAME_SYNC likely doesn't need a VFSNAME_SYNCFS 368 * by default, implementing that policy is challenging with the 369 * way the fs_build_vector logic works and we'd rather a file 370 * system say that it doesn't support this by default rather 371 * than incorrectly claim to sync something that either doesn't 372 * make sense to sync (ala sockfs) or mislead when it didn't 373 * happen. 374 */ 375 { VFSNAME_SYNCFS, offsetof(vfsops_t, vfs_syncfs), 376 (fs_generic_func_p)fs_nosys_syncfs, 377 (fs_generic_func_p)fs_nosys_syncfs }, 378 379 { NULL, 0, NULL, NULL } 380 }; 381 382 return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); 383 } 384 385 void 386 zfs_boot_init(void) 387 { 388 if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0) 389 spa_boot_init(); 390 } 391 392 int 393 vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) 394 { 395 int error; 396 int unused_ops; 397 398 /* 399 * Verify that fstype refers to a valid fs. Note that 400 * 0 is valid since it's used to set "stray" ops. 401 */ 402 if ((fstype < 0) || (fstype >= nfstype)) 403 return (EINVAL); 404 405 if (!ALLOCATED_VFSSW(&vfssw[fstype])) 406 return (EINVAL); 407 408 /* Set up the operations vector. */ 409 410 error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops); 411 412 if (error != 0) 413 return (error); 414 415 vfssw[fstype].vsw_flag |= VSW_INSTALLED; 416 417 if (actual != NULL) 418 *actual = &vfssw[fstype].vsw_vfsops; 419 420 #if DEBUG 421 if (unused_ops != 0) 422 cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied " 423 "but not used", vfssw[fstype].vsw_name, unused_ops); 424 #endif 425 426 return (0); 427 } 428 429 int 430 vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual) 431 { 432 int error; 433 int unused_ops; 434 435 *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP); 436 437 error = fs_copyfsops(template, *actual, &unused_ops); 438 if (error != 0) { 439 kmem_free(*actual, sizeof (vfsops_t)); 440 *actual = NULL; 441 return (error); 442 } 443 444 return (0); 445 } 446 447 /* 448 * Free a vfsops structure created as a result of vfs_makefsops(). 449 * NOTE: For a vfsops structure initialized by vfs_setfsops(), use 450 * vfs_freevfsops_by_type(). 451 */ 452 void 453 vfs_freevfsops(vfsops_t *vfsops) 454 { 455 kmem_free(vfsops, sizeof (vfsops_t)); 456 } 457 458 /* 459 * Since the vfsops structure is part of the vfssw table and wasn't 460 * really allocated, we're not really freeing anything. We keep 461 * the name for consistency with vfs_freevfsops(). We do, however, 462 * need to take care of a little bookkeeping. 463 * NOTE: For a vfsops structure created by vfs_setfsops(), use 464 * vfs_freevfsops_by_type(). 465 */ 466 int 467 vfs_freevfsops_by_type(int fstype) 468 { 469 470 /* Verify that fstype refers to a loaded fs (and not fsid 0). */ 471 if ((fstype <= 0) || (fstype >= nfstype)) 472 return (EINVAL); 473 474 WLOCK_VFSSW(); 475 if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) { 476 WUNLOCK_VFSSW(); 477 return (EINVAL); 478 } 479 480 vfssw[fstype].vsw_flag &= ~VSW_INSTALLED; 481 WUNLOCK_VFSSW(); 482 483 return (0); 484 } 485 486 /* Support routines used to reference vfs_op */ 487 488 /* Set the operations vector for a vfs */ 489 void 490 vfs_setops(vfs_t *vfsp, vfsops_t *vfsops) 491 { 492 vfsops_t *op; 493 494 ASSERT(vfsp != NULL); 495 ASSERT(vfsops != NULL); 496 497 op = vfsp->vfs_op; 498 membar_consumer(); 499 if (vfsp->vfs_femhead == NULL && 500 atomic_cas_ptr(&vfsp->vfs_op, op, vfsops) == op) { 501 return; 502 } 503 fsem_setvfsops(vfsp, vfsops); 504 } 505 506 /* Retrieve the operations vector for a vfs */ 507 vfsops_t * 508 vfs_getops(vfs_t *vfsp) 509 { 510 vfsops_t *op; 511 512 ASSERT(vfsp != NULL); 513 514 op = vfsp->vfs_op; 515 membar_consumer(); 516 if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) { 517 return (op); 518 } else { 519 return (fsem_getvfsops(vfsp)); 520 } 521 } 522 523 /* 524 * Returns non-zero (1) if the vfsops matches that of the vfs. 525 * Returns zero (0) if not. 526 */ 527 int 528 vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops) 529 { 530 return (vfs_getops(vfsp) == vfsops); 531 } 532 533 /* 534 * Returns non-zero (1) if the file system has installed a non-default, 535 * non-error vfs_sync routine. Returns zero (0) otherwise. 536 */ 537 int 538 vfs_can_sync(vfs_t *vfsp) 539 { 540 /* vfs_sync() routine is not the default/error function */ 541 return (vfs_getops(vfsp)->vfs_sync != fs_sync); 542 } 543 544 /* 545 * Initialize a vfs structure. 546 */ 547 void 548 vfs_init(vfs_t *vfsp, vfsops_t *op, void *data) 549 { 550 /* Other initialization has been moved to vfs_alloc() */ 551 vfsp->vfs_count = 0; 552 vfsp->vfs_next = vfsp; 553 vfsp->vfs_prev = vfsp; 554 vfsp->vfs_zone_next = vfsp; 555 vfsp->vfs_zone_prev = vfsp; 556 vfsp->vfs_lofi_id = 0; 557 sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); 558 vfsimpl_setup(vfsp); 559 vfsp->vfs_data = (data); 560 vfs_setops((vfsp), (op)); 561 } 562 563 /* 564 * Allocate and initialize the vfs implementation private data 565 * structure, vfs_impl_t. 566 */ 567 void 568 vfsimpl_setup(vfs_t *vfsp) 569 { 570 int i; 571 572 if (vfsp->vfs_implp != NULL) { 573 return; 574 } 575 576 vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP); 577 /* Note that these are #define'd in vfs.h */ 578 vfsp->vfs_vskap = NULL; 579 vfsp->vfs_fstypevsp = NULL; 580 581 /* Set size of counted array, then zero the array */ 582 vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1; 583 for (i = 1; i < VFS_FEATURE_MAXSZ; i++) { 584 vfsp->vfs_featureset[i] = 0; 585 } 586 } 587 588 /* 589 * Release the vfs_impl_t structure, if it exists. Some unbundled 590 * filesystems may not use the newer version of vfs and thus 591 * would not contain this implementation private data structure. 592 */ 593 void 594 vfsimpl_teardown(vfs_t *vfsp) 595 { 596 vfs_impl_t *vip = vfsp->vfs_implp; 597 598 if (vip == NULL) 599 return; 600 601 kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t)); 602 vfsp->vfs_implp = NULL; 603 } 604 605 /* 606 * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs, 607 * fstatvfs, and sysfs moved to common/syscall. 608 */ 609 610 /* 611 * Update every mounted file system. We call the vfs_sync operation of 612 * each file system type, passing it a NULL vfsp to indicate that all 613 * mounted file systems of that type should be updated. 614 */ 615 void 616 vfs_sync(int flag) 617 { 618 struct vfssw *vswp; 619 RLOCK_VFSSW(); 620 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 621 if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 622 vfs_refvfssw(vswp); 623 RUNLOCK_VFSSW(); 624 (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag, 625 CRED()); 626 vfs_unrefvfssw(vswp); 627 RLOCK_VFSSW(); 628 } 629 } 630 RUNLOCK_VFSSW(); 631 } 632 633 void 634 sync(void) 635 { 636 vfs_sync(0); 637 } 638 639 /* 640 * External routines. 641 */ 642 643 krwlock_t vfssw_lock; /* lock accesses to vfssw */ 644 645 /* 646 * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(), 647 * but otherwise should be accessed only via vfs_list_lock() and 648 * vfs_list_unlock(). Also used to protect the timestamp for mods to the list. 649 */ 650 static krwlock_t vfslist; 651 652 /* 653 * Mount devfs on /devices. This is done right after root is mounted 654 * to provide device access support for the system 655 */ 656 static void 657 vfs_mountdevices(void) 658 { 659 struct vfssw *vsw; 660 struct vnode *mvp; 661 struct mounta mounta = { /* fake mounta for devfs_mount() */ 662 NULL, 663 NULL, 664 MS_SYSSPACE, 665 NULL, 666 NULL, 667 0, 668 NULL, 669 0 670 }; 671 672 /* 673 * _init devfs module to fill in the vfssw 674 */ 675 if (modload("fs", "devfs") == -1) 676 panic("Cannot _init devfs module"); 677 678 /* 679 * Hold vfs 680 */ 681 RLOCK_VFSSW(); 682 vsw = vfs_getvfsswbyname("devfs"); 683 VFS_INIT(&devices, &vsw->vsw_vfsops, NULL); 684 VFS_HOLD(&devices); 685 686 /* 687 * Locate mount point 688 */ 689 if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 690 panic("Cannot find /devices"); 691 692 /* 693 * Perform the mount of /devices 694 */ 695 if (VFS_MOUNT(&devices, mvp, &mounta, CRED())) 696 panic("Cannot mount /devices"); 697 698 RUNLOCK_VFSSW(); 699 700 /* 701 * Set appropriate members and add to vfs list for mnttab display 702 */ 703 vfs_setresource(&devices, "/devices", 0); 704 vfs_setmntpoint(&devices, "/devices", 0); 705 706 /* 707 * Hold the root of /devices so it won't go away 708 */ 709 if (VFS_ROOT(&devices, &devicesdir)) 710 panic("vfs_mountdevices: not devices root"); 711 712 if (vfs_lock(&devices) != 0) { 713 VN_RELE(devicesdir); 714 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices"); 715 return; 716 } 717 718 if (vn_vfswlock(mvp) != 0) { 719 vfs_unlock(&devices); 720 VN_RELE(devicesdir); 721 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices"); 722 return; 723 } 724 725 vfs_add(mvp, &devices, 0); 726 vn_vfsunlock(mvp); 727 vfs_unlock(&devices); 728 VN_RELE(devicesdir); 729 } 730 731 /* 732 * mount the first instance of /dev to root and remain mounted 733 */ 734 static void 735 vfs_mountdev1(void) 736 { 737 struct vfssw *vsw; 738 struct vnode *mvp; 739 struct mounta mounta = { /* fake mounta for sdev_mount() */ 740 NULL, 741 NULL, 742 MS_SYSSPACE | MS_OVERLAY, 743 NULL, 744 NULL, 745 0, 746 NULL, 747 0 748 }; 749 750 /* 751 * _init dev module to fill in the vfssw 752 */ 753 if (modload("fs", "dev") == -1) 754 cmn_err(CE_PANIC, "Cannot _init dev module\n"); 755 756 /* 757 * Hold vfs 758 */ 759 RLOCK_VFSSW(); 760 vsw = vfs_getvfsswbyname("dev"); 761 VFS_INIT(&dev, &vsw->vsw_vfsops, NULL); 762 VFS_HOLD(&dev); 763 764 /* 765 * Locate mount point 766 */ 767 if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 768 cmn_err(CE_PANIC, "Cannot find /dev\n"); 769 770 /* 771 * Perform the mount of /dev 772 */ 773 if (VFS_MOUNT(&dev, mvp, &mounta, CRED())) 774 cmn_err(CE_PANIC, "Cannot mount /dev 1\n"); 775 776 RUNLOCK_VFSSW(); 777 778 /* 779 * Set appropriate members and add to vfs list for mnttab display 780 */ 781 vfs_setresource(&dev, "/dev", 0); 782 vfs_setmntpoint(&dev, "/dev", 0); 783 784 /* 785 * Hold the root of /dev so it won't go away 786 */ 787 if (VFS_ROOT(&dev, &devdir)) 788 cmn_err(CE_PANIC, "vfs_mountdev1: not dev root"); 789 790 if (vfs_lock(&dev) != 0) { 791 VN_RELE(devdir); 792 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev"); 793 return; 794 } 795 796 if (vn_vfswlock(mvp) != 0) { 797 vfs_unlock(&dev); 798 VN_RELE(devdir); 799 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev"); 800 return; 801 } 802 803 vfs_add(mvp, &dev, 0); 804 vn_vfsunlock(mvp); 805 vfs_unlock(&dev); 806 VN_RELE(devdir); 807 } 808 809 /* 810 * Mount required filesystem. This is done right after root is mounted. 811 */ 812 static void 813 vfs_mountfs(char *module, char *spec, char *path) 814 { 815 struct vnode *mvp; 816 struct mounta mounta; 817 vfs_t *vfsp; 818 819 bzero(&mounta, sizeof (mounta)); 820 mounta.flags = MS_SYSSPACE | MS_DATA; 821 mounta.fstype = module; 822 mounta.spec = spec; 823 mounta.dir = path; 824 if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) { 825 cmn_err(CE_WARN, "Cannot find %s", path); 826 return; 827 } 828 if (domount(NULL, &mounta, mvp, CRED(), &vfsp)) 829 cmn_err(CE_WARN, "Cannot mount %s", path); 830 else 831 VFS_RELE(vfsp); 832 VN_RELE(mvp); 833 } 834 835 /* 836 * vfs_mountroot is called by main() to mount the root filesystem. 837 */ 838 void 839 vfs_mountroot(void) 840 { 841 struct vnode *rvp = NULL; 842 char *path; 843 size_t plen; 844 struct vfssw *vswp; 845 proc_t *p; 846 847 rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL); 848 rw_init(&vfslist, NULL, RW_DEFAULT, NULL); 849 850 /* 851 * Alloc the vfs hash bucket array and locks 852 */ 853 rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP); 854 855 /* 856 * Call machine-dependent routine "rootconf" to choose a root 857 * file system type. 858 */ 859 if (rootconf()) 860 panic("vfs_mountroot: cannot mount root"); 861 /* 862 * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir 863 * to point to it. These are used by lookuppn() so that it 864 * knows where to start from ('/' or '.'). 865 */ 866 vfs_setmntpoint(rootvfs, "/", 0); 867 if (VFS_ROOT(rootvfs, &rootdir)) 868 panic("vfs_mountroot: no root vnode"); 869 870 /* 871 * At this point, the process tree consists of p0 and possibly some 872 * direct children of p0. (i.e. there are no grandchildren) 873 * 874 * Walk through them all, setting their current directory. 875 */ 876 mutex_enter(&pidlock); 877 for (p = practive; p != NULL; p = p->p_next) { 878 ASSERT(p == &p0 || p->p_parent == &p0); 879 880 PTOU(p)->u_cdir = rootdir; 881 VN_HOLD(PTOU(p)->u_cdir); 882 PTOU(p)->u_rdir = NULL; 883 } 884 mutex_exit(&pidlock); 885 886 /* 887 * Setup the global zone's rootvp, now that it exists. 888 */ 889 global_zone->zone_rootvp = rootdir; 890 VN_HOLD(global_zone->zone_rootvp); 891 892 /* 893 * Notify the module code that it can begin using the 894 * root filesystem instead of the boot program's services. 895 */ 896 modrootloaded = 1; 897 898 /* 899 * Special handling for a ZFS root file system. 900 */ 901 zfs_boot_init(); 902 903 /* 904 * Set up mnttab information for root 905 */ 906 vfs_setresource(rootvfs, rootfs.bo_name, 0); 907 908 /* 909 * Notify cluster software that the root filesystem is available. 910 */ 911 clboot_mountroot(); 912 913 /* Now that we're all done with the root FS, set up its vopstats */ 914 if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) { 915 /* Set flag for statistics collection */ 916 if (vswp->vsw_flag & VSW_STATS) { 917 initialize_vopstats(&rootvfs->vfs_vopstats); 918 rootvfs->vfs_flag |= VFS_STATS; 919 rootvfs->vfs_fstypevsp = 920 get_fstype_vopstats(rootvfs, vswp); 921 rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs); 922 } 923 vfs_unrefvfssw(vswp); 924 } 925 926 /* 927 * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab, 928 * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc. 929 */ 930 vfs_mountdevices(); 931 vfs_mountdev1(); 932 933 vfs_mountfs("ctfs", "ctfs", CTFS_ROOT); 934 vfs_mountfs("proc", "/proc", "/proc"); 935 vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab"); 936 vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile"); 937 vfs_mountfs("objfs", "objfs", OBJFS_ROOT); 938 vfs_mountfs("bootfs", "bootfs", "/system/boot"); 939 940 if (getzoneid() == GLOBAL_ZONEID) { 941 vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab"); 942 } 943 944 if (strcmp(rootfs.bo_fstype, "zfs") != 0) { 945 /* 946 * Look up the root device via devfs so that a dv_node is 947 * created for it. The vnode is never VN_RELE()ed. 948 * We allocate more than MAXPATHLEN so that the 949 * buffer passed to i_ddi_prompath_to_devfspath() is 950 * exactly MAXPATHLEN (the function expects a buffer 951 * of that length). 952 */ 953 plen = strlen("/devices"); 954 path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP); 955 (void) strcpy(path, "/devices"); 956 957 if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen) 958 != DDI_SUCCESS || 959 lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) { 960 961 /* NUL terminate in case "path" has garbage */ 962 path[plen + MAXPATHLEN - 1] = '\0'; 963 #ifdef DEBUG 964 cmn_err(CE_WARN, "!Cannot lookup root device: %s", 965 path); 966 #endif 967 } 968 kmem_free(path, plen + MAXPATHLEN); 969 } 970 971 vfs_mnttabvp_setup(); 972 } 973 974 /* 975 * Check to see if our "block device" is actually a file. If so, 976 * automatically add a lofi device, and keep track of this fact. 977 */ 978 static int 979 lofi_add(const char *fsname, struct vfs *vfsp, 980 mntopts_t *mntopts, struct mounta *uap) 981 { 982 int fromspace = (uap->flags & MS_SYSSPACE) ? 983 UIO_SYSSPACE : UIO_USERSPACE; 984 struct lofi_ioctl *li = NULL; 985 struct vnode *vp = NULL; 986 struct pathname pn = { NULL }; 987 ldi_ident_t ldi_id; 988 ldi_handle_t ldi_hdl; 989 vfssw_t *vfssw; 990 int id; 991 int err = 0; 992 993 if ((vfssw = vfs_getvfssw(fsname)) == NULL) 994 return (0); 995 996 if (!(vfssw->vsw_flag & VSW_CANLOFI)) { 997 vfs_unrefvfssw(vfssw); 998 return (0); 999 } 1000 1001 vfs_unrefvfssw(vfssw); 1002 vfssw = NULL; 1003 1004 if (pn_get(uap->spec, fromspace, &pn) != 0) 1005 return (0); 1006 1007 if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0) 1008 goto out; 1009 1010 if (vp->v_type != VREG) 1011 goto out; 1012 1013 /* OK, this is a lofi mount. */ 1014 1015 if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) || 1016 vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) || 1017 vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) || 1018 vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) { 1019 err = EINVAL; 1020 goto out; 1021 } 1022 1023 ldi_id = ldi_ident_from_anon(); 1024 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1025 (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN); 1026 1027 err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, 1028 &ldi_hdl, ldi_id); 1029 1030 if (err) 1031 goto out2; 1032 1033 err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 1034 FREAD | FWRITE | FKIOCTL, kcred, &id); 1035 1036 (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); 1037 1038 if (!err) 1039 vfsp->vfs_lofi_id = id; 1040 1041 out2: 1042 ldi_ident_release(ldi_id); 1043 out: 1044 if (li != NULL) 1045 kmem_free(li, sizeof (*li)); 1046 if (vp != NULL) 1047 VN_RELE(vp); 1048 pn_free(&pn); 1049 return (err); 1050 } 1051 1052 static void 1053 lofi_remove(struct vfs *vfsp) 1054 { 1055 struct lofi_ioctl *li; 1056 ldi_ident_t ldi_id; 1057 ldi_handle_t ldi_hdl; 1058 int err; 1059 1060 if (vfsp->vfs_lofi_id == 0) 1061 return; 1062 1063 ldi_id = ldi_ident_from_anon(); 1064 1065 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1066 li->li_id = vfsp->vfs_lofi_id; 1067 li->li_cleanup = B_TRUE; 1068 1069 err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, 1070 &ldi_hdl, ldi_id); 1071 1072 if (err) 1073 goto out; 1074 1075 err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li, 1076 FREAD | FWRITE | FKIOCTL, kcred, NULL); 1077 1078 (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); 1079 1080 if (!err) 1081 vfsp->vfs_lofi_id = 0; 1082 1083 out: 1084 ldi_ident_release(ldi_id); 1085 kmem_free(li, sizeof (*li)); 1086 } 1087 1088 /* 1089 * Common mount code. Called from the system call entry point, from autofs, 1090 * nfsv4 trigger mounts, and from pxfs. 1091 * 1092 * Takes the effective file system type, mount arguments, the mount point 1093 * vnode, flags specifying whether the mount is a remount and whether it 1094 * should be entered into the vfs list, and credentials. Fills in its vfspp 1095 * parameter with the mounted file system instance's vfs. 1096 * 1097 * Note that the effective file system type is specified as a string. It may 1098 * be null, in which case it's determined from the mount arguments, and may 1099 * differ from the type specified in the mount arguments; this is a hook to 1100 * allow interposition when instantiating file system instances. 1101 * 1102 * The caller is responsible for releasing its own hold on the mount point 1103 * vp (this routine does its own hold when necessary). 1104 * Also note that for remounts, the mount point vp should be the vnode for 1105 * the root of the file system rather than the vnode that the file system 1106 * is mounted on top of. 1107 */ 1108 int 1109 domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, 1110 struct vfs **vfspp) 1111 { 1112 struct vfssw *vswp; 1113 vfsops_t *vfsops; 1114 struct vfs *vfsp; 1115 struct vnode *bvp; 1116 dev_t bdev = 0; 1117 mntopts_t mnt_mntopts; 1118 int error = 0; 1119 int copyout_error = 0; 1120 int ovflags = 0; 1121 char *opts = uap->optptr; 1122 char *inargs = opts; 1123 int optlen = uap->optlen; 1124 int remount; 1125 int rdonly; 1126 int nbmand = 0; 1127 int delmip = 0; 1128 int addmip = 0; 1129 int splice = ((uap->flags & MS_NOSPLICE) == 0); 1130 int fromspace = (uap->flags & MS_SYSSPACE) ? 1131 UIO_SYSSPACE : UIO_USERSPACE; 1132 char *resource = NULL, *mountpt = NULL; 1133 refstr_t *oldresource, *oldmntpt; 1134 struct pathname pn, rpn; 1135 vsk_anchor_t *vskap; 1136 char fstname[FSTYPSZ]; 1137 zone_t *zone; 1138 1139 /* 1140 * The v_flag value for the mount point vp is permanently set 1141 * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine 1142 * for mount point locking. 1143 */ 1144 mutex_enter(&vp->v_lock); 1145 vp->v_flag |= VVFSLOCK; 1146 mutex_exit(&vp->v_lock); 1147 1148 mnt_mntopts.mo_count = 0; 1149 /* 1150 * Find the ops vector to use to invoke the file system-specific mount 1151 * method. If the fsname argument is non-NULL, use it directly. 1152 * Otherwise, dig the file system type information out of the mount 1153 * arguments. 1154 * 1155 * A side effect is to hold the vfssw entry. 1156 * 1157 * Mount arguments can be specified in several ways, which are 1158 * distinguished by flag bit settings. The preferred way is to set 1159 * MS_OPTIONSTR, indicating an 8 argument mount with the file system 1160 * type supplied as a character string and the last two arguments 1161 * being a pointer to a character buffer and the size of the buffer. 1162 * On entry, the buffer holds a null terminated list of options; on 1163 * return, the string is the list of options the file system 1164 * recognized. If MS_DATA is set arguments five and six point to a 1165 * block of binary data which the file system interprets. 1166 * A further wrinkle is that some callers don't set MS_FSS and MS_DATA 1167 * consistently with these conventions. To handle them, we check to 1168 * see whether the pointer to the file system name has a numeric value 1169 * less than 256. If so, we treat it as an index. 1170 */ 1171 if (fsname != NULL) { 1172 if ((vswp = vfs_getvfssw(fsname)) == NULL) { 1173 return (EINVAL); 1174 } 1175 } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) { 1176 size_t n; 1177 uint_t fstype; 1178 1179 fsname = fstname; 1180 1181 if ((fstype = (uintptr_t)uap->fstype) < 256) { 1182 RLOCK_VFSSW(); 1183 if (fstype == 0 || fstype >= nfstype || 1184 !ALLOCATED_VFSSW(&vfssw[fstype])) { 1185 RUNLOCK_VFSSW(); 1186 return (EINVAL); 1187 } 1188 (void) strcpy(fsname, vfssw[fstype].vsw_name); 1189 RUNLOCK_VFSSW(); 1190 if ((vswp = vfs_getvfssw(fsname)) == NULL) 1191 return (EINVAL); 1192 } else { 1193 /* 1194 * Handle either kernel or user address space. 1195 */ 1196 if (uap->flags & MS_SYSSPACE) { 1197 error = copystr(uap->fstype, fsname, 1198 FSTYPSZ, &n); 1199 } else { 1200 error = copyinstr(uap->fstype, fsname, 1201 FSTYPSZ, &n); 1202 } 1203 if (error) { 1204 if (error == ENAMETOOLONG) 1205 return (EINVAL); 1206 return (error); 1207 } 1208 if ((vswp = vfs_getvfssw(fsname)) == NULL) 1209 return (EINVAL); 1210 } 1211 } else { 1212 if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) 1213 return (EINVAL); 1214 fsname = vswp->vsw_name; 1215 } 1216 if (!VFS_INSTALLED(vswp)) 1217 return (EINVAL); 1218 1219 if ((error = secpolicy_fs_allowed_mount(fsname)) != 0) { 1220 vfs_unrefvfssw(vswp); 1221 return (error); 1222 } 1223 1224 vfsops = &vswp->vsw_vfsops; 1225 1226 vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); 1227 /* 1228 * Fetch mount options and parse them for generic vfs options 1229 */ 1230 if (uap->flags & MS_OPTIONSTR) { 1231 /* 1232 * Limit the buffer size 1233 */ 1234 if (optlen < 0 || optlen > MAX_MNTOPT_STR) { 1235 error = EINVAL; 1236 goto errout; 1237 } 1238 if ((uap->flags & MS_SYSSPACE) == 0) { 1239 inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 1240 inargs[0] = '\0'; 1241 if (optlen) { 1242 error = copyinstr(opts, inargs, (size_t)optlen, 1243 NULL); 1244 if (error) { 1245 goto errout; 1246 } 1247 } 1248 } 1249 vfs_parsemntopts(&mnt_mntopts, inargs, 0); 1250 } 1251 /* 1252 * Flag bits override the options string. 1253 */ 1254 if (uap->flags & MS_REMOUNT) 1255 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0); 1256 if (uap->flags & MS_RDONLY) 1257 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0); 1258 if (uap->flags & MS_NOSUID) 1259 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); 1260 1261 /* 1262 * Check if this is a remount; must be set in the option string and 1263 * the file system must support a remount option. 1264 */ 1265 if (remount = vfs_optionisset_nolock(&mnt_mntopts, 1266 MNTOPT_REMOUNT, NULL)) { 1267 if (!(vswp->vsw_flag & VSW_CANREMOUNT)) { 1268 error = ENOTSUP; 1269 goto errout; 1270 } 1271 uap->flags |= MS_REMOUNT; 1272 } 1273 1274 /* 1275 * uap->flags and vfs_optionisset() should agree. 1276 */ 1277 if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) { 1278 uap->flags |= MS_RDONLY; 1279 } 1280 if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) { 1281 uap->flags |= MS_NOSUID; 1282 } 1283 nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL); 1284 ASSERT(splice || !remount); 1285 /* 1286 * If we are splicing the fs into the namespace, 1287 * perform mount point checks. 1288 * 1289 * We want to resolve the path for the mount point to eliminate 1290 * '.' and ".." and symlinks in mount points; we can't do the 1291 * same for the resource string, since it would turn 1292 * "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do 1293 * this before grabbing vn_vfswlock(), because otherwise we 1294 * would deadlock with lookuppn(). 1295 */ 1296 if (splice) { 1297 ASSERT(vp->v_count > 0); 1298 1299 /* 1300 * Pick up mount point and device from appropriate space. 1301 */ 1302 if (pn_get(uap->spec, fromspace, &pn) == 0) { 1303 resource = kmem_alloc(pn.pn_pathlen + 1, 1304 KM_SLEEP); 1305 (void) strcpy(resource, pn.pn_path); 1306 pn_free(&pn); 1307 } 1308 /* 1309 * Do a lookupname prior to taking the 1310 * writelock. Mark this as completed if 1311 * successful for later cleanup and addition to 1312 * the mount in progress table. 1313 */ 1314 if ((vswp->vsw_flag & VSW_MOUNTDEV) && 1315 (uap->flags & MS_GLOBAL) == 0 && 1316 lookupname(uap->spec, fromspace, 1317 FOLLOW, NULL, &bvp) == 0) { 1318 addmip = 1; 1319 } 1320 1321 if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) { 1322 pathname_t *pnp; 1323 1324 if (*pn.pn_path != '/') { 1325 error = EINVAL; 1326 pn_free(&pn); 1327 goto errout; 1328 } 1329 pn_alloc(&rpn); 1330 /* 1331 * Kludge to prevent autofs from deadlocking with 1332 * itself when it calls domount(). 1333 * 1334 * If autofs is calling, it is because it is doing 1335 * (autofs) mounts in the process of an NFS mount. A 1336 * lookuppn() here would cause us to block waiting for 1337 * said NFS mount to complete, which can't since this 1338 * is the thread that was supposed to doing it. 1339 */ 1340 if (fromspace == UIO_USERSPACE) { 1341 if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL, 1342 NULL)) == 0) { 1343 pnp = &rpn; 1344 } else { 1345 /* 1346 * The file disappeared or otherwise 1347 * became inaccessible since we opened 1348 * it; might as well fail the mount 1349 * since the mount point is no longer 1350 * accessible. 1351 */ 1352 pn_free(&rpn); 1353 pn_free(&pn); 1354 goto errout; 1355 } 1356 } else { 1357 pnp = &pn; 1358 } 1359 mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP); 1360 (void) strcpy(mountpt, pnp->pn_path); 1361 1362 /* 1363 * If the addition of the zone's rootpath 1364 * would push us over a total path length 1365 * of MAXPATHLEN, we fail the mount with 1366 * ENAMETOOLONG, which is what we would have 1367 * gotten if we were trying to perform the same 1368 * mount in the global zone. 1369 * 1370 * strlen() doesn't count the trailing 1371 * '\0', but zone_rootpathlen counts both a 1372 * trailing '/' and the terminating '\0'. 1373 */ 1374 if ((curproc->p_zone->zone_rootpathlen - 1 + 1375 strlen(mountpt)) > MAXPATHLEN || 1376 (resource != NULL && 1377 (curproc->p_zone->zone_rootpathlen - 1 + 1378 strlen(resource)) > MAXPATHLEN)) { 1379 error = ENAMETOOLONG; 1380 } 1381 1382 pn_free(&rpn); 1383 pn_free(&pn); 1384 } 1385 1386 if (error) 1387 goto errout; 1388 1389 /* 1390 * Prevent path name resolution from proceeding past 1391 * the mount point. 1392 */ 1393 if (vn_vfswlock(vp) != 0) { 1394 error = EBUSY; 1395 goto errout; 1396 } 1397 1398 /* 1399 * Verify that it's legitimate to establish a mount on 1400 * the prospective mount point. 1401 */ 1402 if (vn_mountedvfs(vp) != NULL) { 1403 /* 1404 * The mount point lock was obtained after some 1405 * other thread raced through and established a mount. 1406 */ 1407 vn_vfsunlock(vp); 1408 error = EBUSY; 1409 goto errout; 1410 } 1411 if (vp->v_flag & VNOMOUNT) { 1412 vn_vfsunlock(vp); 1413 error = EINVAL; 1414 goto errout; 1415 } 1416 } 1417 if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) { 1418 uap->dataptr = NULL; 1419 uap->datalen = 0; 1420 } 1421 1422 /* 1423 * If this is a remount, we don't want to create a new VFS. 1424 * Instead, we pass the existing one with a remount flag. 1425 */ 1426 if (remount) { 1427 /* 1428 * Confirm that the mount point is the root vnode of the 1429 * file system that is being remounted. 1430 * This can happen if the user specifies a different 1431 * mount point directory pathname in the (re)mount command. 1432 * 1433 * Code below can only be reached if splice is true, so it's 1434 * safe to do vn_vfsunlock() here. 1435 */ 1436 if ((vp->v_flag & VROOT) == 0) { 1437 vn_vfsunlock(vp); 1438 error = ENOENT; 1439 goto errout; 1440 } 1441 /* 1442 * Disallow making file systems read-only unless file system 1443 * explicitly allows it in its vfssw. Ignore other flags. 1444 */ 1445 if (rdonly && vn_is_readonly(vp) == 0 && 1446 (vswp->vsw_flag & VSW_CANRWRO) == 0) { 1447 vn_vfsunlock(vp); 1448 error = EINVAL; 1449 goto errout; 1450 } 1451 /* 1452 * Disallow changing the NBMAND disposition of the file 1453 * system on remounts. 1454 */ 1455 if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) || 1456 (!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) { 1457 vn_vfsunlock(vp); 1458 error = EINVAL; 1459 goto errout; 1460 } 1461 vfsp = vp->v_vfsp; 1462 ovflags = vfsp->vfs_flag; 1463 vfsp->vfs_flag |= VFS_REMOUNT; 1464 vfsp->vfs_flag &= ~VFS_RDONLY; 1465 } else { 1466 vfsp = vfs_alloc(KM_SLEEP); 1467 VFS_INIT(vfsp, vfsops, NULL); 1468 } 1469 1470 VFS_HOLD(vfsp); 1471 1472 if ((error = lofi_add(fsname, vfsp, &mnt_mntopts, uap)) != 0) { 1473 if (!remount) { 1474 if (splice) 1475 vn_vfsunlock(vp); 1476 vfs_free(vfsp); 1477 } else { 1478 vn_vfsunlock(vp); 1479 VFS_RELE(vfsp); 1480 } 1481 goto errout; 1482 } 1483 1484 /* 1485 * PRIV_SYS_MOUNT doesn't mean you can become root. 1486 */ 1487 if (vfsp->vfs_lofi_id != 0) { 1488 uap->flags |= MS_NOSUID; 1489 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); 1490 } 1491 1492 /* 1493 * The vfs_reflock is not used anymore the code below explicitly 1494 * holds it preventing others accesing it directly. 1495 */ 1496 if ((sema_tryp(&vfsp->vfs_reflock) == 0) && 1497 !(vfsp->vfs_flag & VFS_REMOUNT)) 1498 cmn_err(CE_WARN, 1499 "mount type %s couldn't get vfs_reflock", vswp->vsw_name); 1500 1501 /* 1502 * Lock the vfs. If this is a remount we want to avoid spurious umount 1503 * failures that happen as a side-effect of fsflush() and other mount 1504 * and unmount operations that might be going on simultaneously and 1505 * may have locked the vfs currently. To not return EBUSY immediately 1506 * here we use vfs_lock_wait() instead vfs_lock() for the remount case. 1507 */ 1508 if (!remount) { 1509 if (error = vfs_lock(vfsp)) { 1510 lofi_remove(vfsp); 1511 1512 if (splice) 1513 vn_vfsunlock(vp); 1514 vfs_free(vfsp); 1515 goto errout; 1516 } 1517 } else { 1518 vfs_lock_wait(vfsp); 1519 } 1520 1521 /* 1522 * Add device to mount in progress table, global mounts require special 1523 * handling. It is possible that we have already done the lookupname 1524 * on a spliced, non-global fs. If so, we don't want to do it again 1525 * since we cannot do a lookupname after taking the 1526 * wlock above. This case is for a non-spliced, non-global filesystem. 1527 */ 1528 if (!addmip) { 1529 if ((vswp->vsw_flag & VSW_MOUNTDEV) && 1530 (uap->flags & MS_GLOBAL) == 0 && 1531 lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) { 1532 addmip = 1; 1533 } 1534 } 1535 1536 if (addmip) { 1537 vnode_t *lvp = NULL; 1538 1539 error = vfs_get_lofi(vfsp, &lvp); 1540 if (error > 0) { 1541 lofi_remove(vfsp); 1542 1543 if (splice) 1544 vn_vfsunlock(vp); 1545 vfs_unlock(vfsp); 1546 1547 if (remount) { 1548 VFS_RELE(vfsp); 1549 } else { 1550 vfs_free(vfsp); 1551 } 1552 1553 goto errout; 1554 } else if (error == -1) { 1555 bdev = bvp->v_rdev; 1556 VN_RELE(bvp); 1557 } else { 1558 bdev = lvp->v_rdev; 1559 VN_RELE(lvp); 1560 VN_RELE(bvp); 1561 } 1562 1563 vfs_addmip(bdev, vfsp); 1564 addmip = 0; 1565 delmip = 1; 1566 } 1567 /* 1568 * Invalidate cached entry for the mount point. 1569 */ 1570 if (splice) 1571 dnlc_purge_vp(vp); 1572 1573 /* 1574 * If have an option string but the filesystem doesn't supply a 1575 * prototype options table, create a table with the global 1576 * options and sufficient room to accept all the options in the 1577 * string. Then parse the passed in option string 1578 * accepting all the options in the string. This gives us an 1579 * option table with all the proper cancel properties for the 1580 * global options. 1581 * 1582 * Filesystems that supply a prototype options table are handled 1583 * earlier in this function. 1584 */ 1585 if (uap->flags & MS_OPTIONSTR) { 1586 if (!(vswp->vsw_flag & VSW_HASPROTO)) { 1587 mntopts_t tmp_mntopts; 1588 1589 tmp_mntopts.mo_count = 0; 1590 vfs_createopttbl_extend(&tmp_mntopts, inargs, 1591 &mnt_mntopts); 1592 vfs_parsemntopts(&tmp_mntopts, inargs, 1); 1593 vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts); 1594 vfs_freeopttbl(&tmp_mntopts); 1595 } 1596 } 1597 1598 /* 1599 * Serialize with zone state transitions. 1600 * See vfs_list_add; zone mounted into is: 1601 * zone_find_by_path(refstr_value(vfsp->vfs_mntpt)) 1602 * not the zone doing the mount (curproc->p_zone), but if we're already 1603 * inside a NGZ, then we know what zone we are. 1604 */ 1605 if (INGLOBALZONE(curproc)) { 1606 zone = zone_find_by_path(mountpt); 1607 ASSERT(zone != NULL); 1608 } else { 1609 zone = curproc->p_zone; 1610 /* 1611 * zone_find_by_path does a hold, so do one here too so that 1612 * we can do a zone_rele after mount_completed. 1613 */ 1614 zone_hold(zone); 1615 } 1616 mount_in_progress(zone); 1617 /* 1618 * Instantiate (or reinstantiate) the file system. If appropriate, 1619 * splice it into the file system name space. 1620 * 1621 * We want VFS_MOUNT() to be able to override the vfs_resource 1622 * string if necessary (ie, mntfs), and also for a remount to 1623 * change the same (necessary when remounting '/' during boot). 1624 * So we set up vfs_mntpt and vfs_resource to what we think they 1625 * should be, then hand off control to VFS_MOUNT() which can 1626 * override this. 1627 * 1628 * For safety's sake, when changing vfs_resource or vfs_mntpt of 1629 * a vfs which is on the vfs list (i.e. during a remount), we must 1630 * never set those fields to NULL. Several bits of code make 1631 * assumptions that the fields are always valid. 1632 */ 1633 vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1634 if (remount) { 1635 if ((oldresource = vfsp->vfs_resource) != NULL) 1636 refstr_hold(oldresource); 1637 if ((oldmntpt = vfsp->vfs_mntpt) != NULL) 1638 refstr_hold(oldmntpt); 1639 } 1640 vfs_setresource(vfsp, resource, 0); 1641 vfs_setmntpoint(vfsp, mountpt, 0); 1642 1643 /* 1644 * going to mount on this vnode, so notify. 1645 */ 1646 vnevent_mountedover(vp, NULL); 1647 error = VFS_MOUNT(vfsp, vp, uap, credp); 1648 1649 if (uap->flags & MS_RDONLY) 1650 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1651 if (uap->flags & MS_NOSUID) 1652 vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0); 1653 if (uap->flags & MS_GLOBAL) 1654 vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0); 1655 1656 if (error) { 1657 lofi_remove(vfsp); 1658 1659 if (remount) { 1660 /* put back pre-remount options */ 1661 vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1662 vfs_setmntpoint(vfsp, refstr_value(oldmntpt), 1663 VFSSP_VERBATIM); 1664 if (oldmntpt) 1665 refstr_rele(oldmntpt); 1666 vfs_setresource(vfsp, refstr_value(oldresource), 1667 VFSSP_VERBATIM); 1668 if (oldresource) 1669 refstr_rele(oldresource); 1670 vfsp->vfs_flag = ovflags; 1671 vfs_unlock(vfsp); 1672 VFS_RELE(vfsp); 1673 } else { 1674 vfs_unlock(vfsp); 1675 vfs_freemnttab(vfsp); 1676 vfs_free(vfsp); 1677 } 1678 } else { 1679 /* 1680 * Set the mount time to now 1681 */ 1682 vfsp->vfs_mtime = ddi_get_time(); 1683 if (remount) { 1684 vfsp->vfs_flag &= ~VFS_REMOUNT; 1685 if (oldresource) 1686 refstr_rele(oldresource); 1687 if (oldmntpt) 1688 refstr_rele(oldmntpt); 1689 } else if (splice) { 1690 /* 1691 * Link vfsp into the name space at the mount 1692 * point. Vfs_add() is responsible for 1693 * holding the mount point which will be 1694 * released when vfs_remove() is called. 1695 */ 1696 vfs_add(vp, vfsp, uap->flags); 1697 } else { 1698 /* 1699 * Hold the reference to file system which is 1700 * not linked into the name space. 1701 */ 1702 vfsp->vfs_zone = NULL; 1703 VFS_HOLD(vfsp); 1704 vfsp->vfs_vnodecovered = NULL; 1705 } 1706 /* 1707 * Set flags for global options encountered 1708 */ 1709 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 1710 vfsp->vfs_flag |= VFS_RDONLY; 1711 else 1712 vfsp->vfs_flag &= ~VFS_RDONLY; 1713 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 1714 vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES); 1715 } else { 1716 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 1717 vfsp->vfs_flag |= VFS_NODEVICES; 1718 else 1719 vfsp->vfs_flag &= ~VFS_NODEVICES; 1720 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 1721 vfsp->vfs_flag |= VFS_NOSETUID; 1722 else 1723 vfsp->vfs_flag &= ~VFS_NOSETUID; 1724 } 1725 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 1726 vfsp->vfs_flag |= VFS_NBMAND; 1727 else 1728 vfsp->vfs_flag &= ~VFS_NBMAND; 1729 1730 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 1731 vfsp->vfs_flag |= VFS_XATTR; 1732 else 1733 vfsp->vfs_flag &= ~VFS_XATTR; 1734 1735 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 1736 vfsp->vfs_flag |= VFS_NOEXEC; 1737 else 1738 vfsp->vfs_flag &= ~VFS_NOEXEC; 1739 1740 /* 1741 * Now construct the output option string of options 1742 * we recognized. 1743 */ 1744 if (uap->flags & MS_OPTIONSTR) { 1745 vfs_list_read_lock(); 1746 copyout_error = vfs_buildoptionstr( 1747 &vfsp->vfs_mntopts, inargs, optlen); 1748 vfs_list_unlock(); 1749 if (copyout_error == 0 && 1750 (uap->flags & MS_SYSSPACE) == 0) { 1751 copyout_error = copyoutstr(inargs, opts, 1752 optlen, NULL); 1753 } 1754 } 1755 1756 /* 1757 * If this isn't a remount, set up the vopstats before 1758 * anyone can touch this. We only allow spliced file 1759 * systems (file systems which are in the namespace) to 1760 * have the VFS_STATS flag set. 1761 * NOTE: PxFS mounts the underlying file system with 1762 * MS_NOSPLICE set and copies those vfs_flags to its private 1763 * vfs structure. As a result, PxFS should never have 1764 * the VFS_STATS flag or else we might access the vfs 1765 * statistics-related fields prior to them being 1766 * properly initialized. 1767 */ 1768 if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) { 1769 initialize_vopstats(&vfsp->vfs_vopstats); 1770 /* 1771 * We need to set vfs_vskap to NULL because there's 1772 * a chance it won't be set below. This is checked 1773 * in teardown_vopstats() so we can't have garbage. 1774 */ 1775 vfsp->vfs_vskap = NULL; 1776 vfsp->vfs_flag |= VFS_STATS; 1777 vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp); 1778 } 1779 1780 if (vswp->vsw_flag & VSW_XID) 1781 vfsp->vfs_flag |= VFS_XID; 1782 1783 vfs_unlock(vfsp); 1784 } 1785 mount_completed(zone); 1786 zone_rele(zone); 1787 if (splice) 1788 vn_vfsunlock(vp); 1789 1790 if ((error == 0) && (copyout_error == 0)) { 1791 if (!remount) { 1792 /* 1793 * Don't call get_vskstat_anchor() while holding 1794 * locks since it allocates memory and calls 1795 * VFS_STATVFS(). For NFS, the latter can generate 1796 * an over-the-wire call. 1797 */ 1798 vskap = get_vskstat_anchor(vfsp); 1799 /* Only take the lock if we have something to do */ 1800 if (vskap != NULL) { 1801 vfs_lock_wait(vfsp); 1802 if (vfsp->vfs_flag & VFS_STATS) { 1803 vfsp->vfs_vskap = vskap; 1804 } 1805 vfs_unlock(vfsp); 1806 } 1807 } 1808 /* Return vfsp to caller. */ 1809 *vfspp = vfsp; 1810 } 1811 errout: 1812 vfs_freeopttbl(&mnt_mntopts); 1813 if (resource != NULL) 1814 kmem_free(resource, strlen(resource) + 1); 1815 if (mountpt != NULL) 1816 kmem_free(mountpt, strlen(mountpt) + 1); 1817 /* 1818 * It is possible we errored prior to adding to mount in progress 1819 * table. Must free vnode we acquired with successful lookupname. 1820 */ 1821 if (addmip) 1822 VN_RELE(bvp); 1823 if (delmip) 1824 vfs_delmip(vfsp); 1825 ASSERT(vswp != NULL); 1826 vfs_unrefvfssw(vswp); 1827 if (inargs != opts) 1828 kmem_free(inargs, MAX_MNTOPT_STR); 1829 if (copyout_error) { 1830 lofi_remove(vfsp); 1831 VFS_RELE(vfsp); 1832 error = copyout_error; 1833 } 1834 return (error); 1835 } 1836 1837 static void 1838 vfs_setpath( 1839 struct vfs *vfsp, /* vfs being updated */ 1840 refstr_t **refp, /* Ref-count string to contain the new path */ 1841 const char *newpath, /* Path to add to refp (above) */ 1842 uint32_t flag) /* flag */ 1843 { 1844 size_t len; 1845 refstr_t *ref; 1846 zone_t *zone = curproc->p_zone; 1847 char *sp; 1848 int have_list_lock = 0; 1849 1850 ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp)); 1851 1852 /* 1853 * New path must be less than MAXPATHLEN because mntfs 1854 * will only display up to MAXPATHLEN bytes. This is currently 1855 * safe, because domount() uses pn_get(), and other callers 1856 * similarly cap the size to fewer than MAXPATHLEN bytes. 1857 */ 1858 1859 ASSERT(strlen(newpath) < MAXPATHLEN); 1860 1861 /* mntfs requires consistency while vfs list lock is held */ 1862 1863 if (VFS_ON_LIST(vfsp)) { 1864 have_list_lock = 1; 1865 vfs_list_lock(); 1866 } 1867 1868 if (*refp != NULL) 1869 refstr_rele(*refp); 1870 1871 /* 1872 * If we are in a non-global zone then we prefix the supplied path, 1873 * newpath, with the zone's root path, with two exceptions. The first 1874 * is where we have been explicitly directed to avoid doing so; this 1875 * will be the case following a failed remount, where the path supplied 1876 * will be a saved version which must now be restored. The second 1877 * exception is where newpath is not a pathname but a descriptive name, 1878 * e.g. "procfs". 1879 */ 1880 if (zone == global_zone || (flag & VFSSP_VERBATIM) || *newpath != '/') { 1881 ref = refstr_alloc(newpath); 1882 goto out; 1883 } 1884 1885 /* 1886 * Truncate the trailing '/' in the zoneroot, and merge 1887 * in the zone's rootpath with the "newpath" (resource 1888 * or mountpoint) passed in. 1889 * 1890 * The size of the required buffer is thus the size of 1891 * the buffer required for the passed-in newpath 1892 * (strlen(newpath) + 1), plus the size of the buffer 1893 * required to hold zone_rootpath (zone_rootpathlen) 1894 * minus one for one of the now-superfluous NUL 1895 * terminations, minus one for the trailing '/'. 1896 * 1897 * That gives us: 1898 * 1899 * (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1 1900 * 1901 * Which is what we have below. 1902 */ 1903 1904 len = strlen(newpath) + zone->zone_rootpathlen - 1; 1905 sp = kmem_alloc(len, KM_SLEEP); 1906 1907 /* 1908 * Copy everything including the trailing slash, which 1909 * we then overwrite with the NUL character. 1910 */ 1911 1912 (void) strcpy(sp, zone->zone_rootpath); 1913 sp[zone->zone_rootpathlen - 2] = '\0'; 1914 (void) strcat(sp, newpath); 1915 1916 ref = refstr_alloc(sp); 1917 kmem_free(sp, len); 1918 out: 1919 *refp = ref; 1920 1921 if (have_list_lock) { 1922 vfs_mnttab_modtimeupd(); 1923 vfs_list_unlock(); 1924 } 1925 } 1926 1927 /* 1928 * Record a mounted resource name in a vfs structure. 1929 * If vfsp is already mounted, caller must hold the vfs lock. 1930 */ 1931 void 1932 vfs_setresource(struct vfs *vfsp, const char *resource, uint32_t flag) 1933 { 1934 if (resource == NULL || resource[0] == '\0') 1935 resource = VFS_NORESOURCE; 1936 vfs_setpath(vfsp, &vfsp->vfs_resource, resource, flag); 1937 } 1938 1939 /* 1940 * Record a mount point name in a vfs structure. 1941 * If vfsp is already mounted, caller must hold the vfs lock. 1942 */ 1943 void 1944 vfs_setmntpoint(struct vfs *vfsp, const char *mntpt, uint32_t flag) 1945 { 1946 if (mntpt == NULL || mntpt[0] == '\0') 1947 mntpt = VFS_NOMNTPT; 1948 vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt, flag); 1949 } 1950 1951 /* Returns the vfs_resource. Caller must call refstr_rele() when finished. */ 1952 1953 refstr_t * 1954 vfs_getresource(const struct vfs *vfsp) 1955 { 1956 refstr_t *resource; 1957 1958 vfs_list_read_lock(); 1959 resource = vfsp->vfs_resource; 1960 refstr_hold(resource); 1961 vfs_list_unlock(); 1962 1963 return (resource); 1964 } 1965 1966 /* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */ 1967 1968 refstr_t * 1969 vfs_getmntpoint(const struct vfs *vfsp) 1970 { 1971 refstr_t *mntpt; 1972 1973 vfs_list_read_lock(); 1974 mntpt = vfsp->vfs_mntpt; 1975 refstr_hold(mntpt); 1976 vfs_list_unlock(); 1977 1978 return (mntpt); 1979 } 1980 1981 /* 1982 * Create an empty options table with enough empty slots to hold all 1983 * The options in the options string passed as an argument. 1984 * Potentially prepend another options table. 1985 * 1986 * Note: caller is responsible for locking the vfs list, if needed, 1987 * to protect mops. 1988 */ 1989 static void 1990 vfs_createopttbl_extend(mntopts_t *mops, const char *opts, 1991 const mntopts_t *mtmpl) 1992 { 1993 const char *s = opts; 1994 uint_t count; 1995 1996 if (opts == NULL || *opts == '\0') { 1997 count = 0; 1998 } else { 1999 count = 1; 2000 2001 /* 2002 * Count number of options in the string 2003 */ 2004 for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) { 2005 count++; 2006 s++; 2007 } 2008 } 2009 vfs_copyopttbl_extend(mtmpl, mops, count); 2010 } 2011 2012 /* 2013 * Create an empty options table with enough empty slots to hold all 2014 * The options in the options string passed as an argument. 2015 * 2016 * This function is *not* for general use by filesystems. 2017 * 2018 * Note: caller is responsible for locking the vfs list, if needed, 2019 * to protect mops. 2020 */ 2021 void 2022 vfs_createopttbl(mntopts_t *mops, const char *opts) 2023 { 2024 vfs_createopttbl_extend(mops, opts, NULL); 2025 } 2026 2027 2028 /* 2029 * Swap two mount options tables 2030 */ 2031 static void 2032 vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2) 2033 { 2034 uint_t tmpcnt; 2035 mntopt_t *tmplist; 2036 2037 tmpcnt = optbl2->mo_count; 2038 tmplist = optbl2->mo_list; 2039 optbl2->mo_count = optbl1->mo_count; 2040 optbl2->mo_list = optbl1->mo_list; 2041 optbl1->mo_count = tmpcnt; 2042 optbl1->mo_list = tmplist; 2043 } 2044 2045 static void 2046 vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2) 2047 { 2048 vfs_list_lock(); 2049 vfs_swapopttbl_nolock(optbl1, optbl2); 2050 vfs_mnttab_modtimeupd(); 2051 vfs_list_unlock(); 2052 } 2053 2054 static char ** 2055 vfs_copycancelopt_extend(char **const moc, int extend) 2056 { 2057 int i = 0; 2058 int j; 2059 char **result; 2060 2061 if (moc != NULL) { 2062 for (; moc[i] != NULL; i++) 2063 /* count number of options to cancel */; 2064 } 2065 2066 if (i + extend == 0) 2067 return (NULL); 2068 2069 result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP); 2070 2071 for (j = 0; j < i; j++) { 2072 result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP); 2073 (void) strcpy(result[j], moc[j]); 2074 } 2075 for (; j <= i + extend; j++) 2076 result[j] = NULL; 2077 2078 return (result); 2079 } 2080 2081 static void 2082 vfs_copyopt(const mntopt_t *s, mntopt_t *d) 2083 { 2084 char *sp, *dp; 2085 2086 d->mo_flags = s->mo_flags; 2087 d->mo_data = s->mo_data; 2088 sp = s->mo_name; 2089 if (sp != NULL) { 2090 dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 2091 (void) strcpy(dp, sp); 2092 d->mo_name = dp; 2093 } else { 2094 d->mo_name = NULL; /* should never happen */ 2095 } 2096 2097 d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0); 2098 2099 sp = s->mo_arg; 2100 if (sp != NULL) { 2101 dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 2102 (void) strcpy(dp, sp); 2103 d->mo_arg = dp; 2104 } else { 2105 d->mo_arg = NULL; 2106 } 2107 } 2108 2109 /* 2110 * Copy a mount options table, possibly allocating some spare 2111 * slots at the end. It is permissible to copy_extend the NULL table. 2112 */ 2113 static void 2114 vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra) 2115 { 2116 uint_t i, count; 2117 mntopt_t *motbl; 2118 2119 /* 2120 * Clear out any existing stuff in the options table being initialized 2121 */ 2122 vfs_freeopttbl(dmo); 2123 count = (smo == NULL) ? 0 : smo->mo_count; 2124 if ((count + extra) == 0) /* nothing to do */ 2125 return; 2126 dmo->mo_count = count + extra; 2127 motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP); 2128 dmo->mo_list = motbl; 2129 for (i = 0; i < count; i++) { 2130 vfs_copyopt(&smo->mo_list[i], &motbl[i]); 2131 } 2132 for (i = count; i < count + extra; i++) { 2133 motbl[i].mo_flags = MO_EMPTY; 2134 } 2135 } 2136 2137 /* 2138 * Copy a mount options table. 2139 * 2140 * This function is *not* for general use by filesystems. 2141 * 2142 * Note: caller is responsible for locking the vfs list, if needed, 2143 * to protect smo and dmo. 2144 */ 2145 void 2146 vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo) 2147 { 2148 vfs_copyopttbl_extend(smo, dmo, 0); 2149 } 2150 2151 static char ** 2152 vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2) 2153 { 2154 int c1 = 0; 2155 int c2 = 0; 2156 char **result; 2157 char **sp1, **sp2, **dp; 2158 2159 /* 2160 * First we count both lists of cancel options. 2161 * If either is NULL or has no elements, we return a copy of 2162 * the other. 2163 */ 2164 if (mop1->mo_cancel != NULL) { 2165 for (; mop1->mo_cancel[c1] != NULL; c1++) 2166 /* count cancel options in mop1 */; 2167 } 2168 2169 if (c1 == 0) 2170 return (vfs_copycancelopt_extend(mop2->mo_cancel, 0)); 2171 2172 if (mop2->mo_cancel != NULL) { 2173 for (; mop2->mo_cancel[c2] != NULL; c2++) 2174 /* count cancel options in mop2 */; 2175 } 2176 2177 result = vfs_copycancelopt_extend(mop1->mo_cancel, c2); 2178 2179 if (c2 == 0) 2180 return (result); 2181 2182 /* 2183 * When we get here, we've got two sets of cancel options; 2184 * we need to merge the two sets. We know that the result 2185 * array has "c1+c2+1" entries and in the end we might shrink 2186 * it. 2187 * Result now has a copy of the c1 entries from mop1; we'll 2188 * now lookup all the entries of mop2 in mop1 and copy it if 2189 * it is unique. 2190 * This operation is O(n^2) but it's only called once per 2191 * filesystem per duplicate option. This is a situation 2192 * which doesn't arise with the filesystems in ON and 2193 * n is generally 1. 2194 */ 2195 2196 dp = &result[c1]; 2197 for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) { 2198 for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) { 2199 if (strcmp(*sp1, *sp2) == 0) 2200 break; 2201 } 2202 if (*sp1 == NULL) { 2203 /* 2204 * Option *sp2 not found in mop1, so copy it. 2205 * The calls to vfs_copycancelopt_extend() 2206 * guarantee that there's enough room. 2207 */ 2208 *dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP); 2209 (void) strcpy(*dp++, *sp2); 2210 } 2211 } 2212 if (dp != &result[c1+c2]) { 2213 size_t bytes = (dp - result + 1) * sizeof (char *); 2214 char **nres = kmem_alloc(bytes, KM_SLEEP); 2215 2216 bcopy(result, nres, bytes); 2217 kmem_free(result, (c1 + c2 + 1) * sizeof (char *)); 2218 result = nres; 2219 } 2220 return (result); 2221 } 2222 2223 /* 2224 * Merge two mount option tables (outer and inner) into one. This is very 2225 * similar to "merging" global variables and automatic variables in C. 2226 * 2227 * This isn't (and doesn't have to be) fast. 2228 * 2229 * This function is *not* for general use by filesystems. 2230 * 2231 * Note: caller is responsible for locking the vfs list, if needed, 2232 * to protect omo, imo & dmo. 2233 */ 2234 void 2235 vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo) 2236 { 2237 uint_t i, count; 2238 mntopt_t *mop, *motbl; 2239 uint_t freeidx; 2240 2241 /* 2242 * First determine how much space we need to allocate. 2243 */ 2244 count = omo->mo_count; 2245 for (i = 0; i < imo->mo_count; i++) { 2246 if (imo->mo_list[i].mo_flags & MO_EMPTY) 2247 continue; 2248 if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL) 2249 count++; 2250 } 2251 ASSERT(count >= omo->mo_count && 2252 count <= omo->mo_count + imo->mo_count); 2253 motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP); 2254 for (i = 0; i < omo->mo_count; i++) 2255 vfs_copyopt(&omo->mo_list[i], &motbl[i]); 2256 freeidx = omo->mo_count; 2257 for (i = 0; i < imo->mo_count; i++) { 2258 if (imo->mo_list[i].mo_flags & MO_EMPTY) 2259 continue; 2260 if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) { 2261 char **newcanp; 2262 uint_t index = mop - omo->mo_list; 2263 2264 newcanp = vfs_mergecancelopts(mop, &motbl[index]); 2265 2266 vfs_freeopt(&motbl[index]); 2267 vfs_copyopt(&imo->mo_list[i], &motbl[index]); 2268 2269 vfs_freecancelopt(motbl[index].mo_cancel); 2270 motbl[index].mo_cancel = newcanp; 2271 } else { 2272 /* 2273 * If it's a new option, just copy it over to the first 2274 * free location. 2275 */ 2276 vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]); 2277 } 2278 } 2279 dmo->mo_count = count; 2280 dmo->mo_list = motbl; 2281 } 2282 2283 /* 2284 * Functions to set and clear mount options in a mount options table. 2285 */ 2286 2287 /* 2288 * Clear a mount option, if it exists. 2289 * 2290 * The update_mnttab arg indicates whether mops is part of a vfs that is on 2291 * the vfs list. 2292 */ 2293 static void 2294 vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab) 2295 { 2296 struct mntopt *mop; 2297 uint_t i, count; 2298 2299 ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 2300 2301 count = mops->mo_count; 2302 for (i = 0; i < count; i++) { 2303 mop = &mops->mo_list[i]; 2304 2305 if (mop->mo_flags & MO_EMPTY) 2306 continue; 2307 if (strcmp(opt, mop->mo_name)) 2308 continue; 2309 mop->mo_flags &= ~MO_SET; 2310 if (mop->mo_arg != NULL) { 2311 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2312 } 2313 mop->mo_arg = NULL; 2314 if (update_mnttab) 2315 vfs_mnttab_modtimeupd(); 2316 break; 2317 } 2318 } 2319 2320 void 2321 vfs_clearmntopt(struct vfs *vfsp, const char *opt) 2322 { 2323 int gotlock = 0; 2324 2325 if (VFS_ON_LIST(vfsp)) { 2326 gotlock = 1; 2327 vfs_list_lock(); 2328 } 2329 vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock); 2330 if (gotlock) 2331 vfs_list_unlock(); 2332 } 2333 2334 2335 /* 2336 * Set a mount option on. If it's not found in the table, it's silently 2337 * ignored. If the option has MO_IGNORE set, it is still set unless the 2338 * VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag 2339 * bits can be used to toggle the MO_NODISPLAY bit for the option. 2340 * If the VFS_CREATEOPT flag bit is set then the first option slot with 2341 * MO_EMPTY set is created as the option passed in. 2342 * 2343 * The update_mnttab arg indicates whether mops is part of a vfs that is on 2344 * the vfs list. 2345 */ 2346 static void 2347 vfs_setmntopt_nolock(mntopts_t *mops, const char *opt, 2348 const char *arg, int flags, int update_mnttab) 2349 { 2350 mntopt_t *mop; 2351 uint_t i, count; 2352 char *sp; 2353 2354 ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 2355 2356 if (flags & VFS_CREATEOPT) { 2357 if (vfs_hasopt(mops, opt) != NULL) { 2358 flags &= ~VFS_CREATEOPT; 2359 } 2360 } 2361 count = mops->mo_count; 2362 for (i = 0; i < count; i++) { 2363 mop = &mops->mo_list[i]; 2364 2365 if (mop->mo_flags & MO_EMPTY) { 2366 if ((flags & VFS_CREATEOPT) == 0) 2367 continue; 2368 sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP); 2369 (void) strcpy(sp, opt); 2370 mop->mo_name = sp; 2371 if (arg != NULL) 2372 mop->mo_flags = MO_HASVALUE; 2373 else 2374 mop->mo_flags = 0; 2375 } else if (strcmp(opt, mop->mo_name)) { 2376 continue; 2377 } 2378 if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT)) 2379 break; 2380 if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) { 2381 sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP); 2382 (void) strcpy(sp, arg); 2383 } else { 2384 sp = NULL; 2385 } 2386 if (mop->mo_arg != NULL) 2387 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2388 mop->mo_arg = sp; 2389 if (flags & VFS_DISPLAY) 2390 mop->mo_flags &= ~MO_NODISPLAY; 2391 if (flags & VFS_NODISPLAY) 2392 mop->mo_flags |= MO_NODISPLAY; 2393 mop->mo_flags |= MO_SET; 2394 if (mop->mo_cancel != NULL) { 2395 char **cp; 2396 2397 for (cp = mop->mo_cancel; *cp != NULL; cp++) 2398 vfs_clearmntopt_nolock(mops, *cp, 0); 2399 } 2400 if (update_mnttab) 2401 vfs_mnttab_modtimeupd(); 2402 break; 2403 } 2404 } 2405 2406 void 2407 vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags) 2408 { 2409 int gotlock = 0; 2410 2411 if (VFS_ON_LIST(vfsp)) { 2412 gotlock = 1; 2413 vfs_list_lock(); 2414 } 2415 vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock); 2416 if (gotlock) 2417 vfs_list_unlock(); 2418 } 2419 2420 2421 /* 2422 * Add a "tag" option to a mounted file system's options list. 2423 * 2424 * Note: caller is responsible for locking the vfs list, if needed, 2425 * to protect mops. 2426 */ 2427 static mntopt_t * 2428 vfs_addtag(mntopts_t *mops, const char *tag) 2429 { 2430 uint_t count; 2431 mntopt_t *mop, *motbl; 2432 2433 count = mops->mo_count + 1; 2434 motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP); 2435 if (mops->mo_count) { 2436 size_t len = (count - 1) * sizeof (mntopt_t); 2437 2438 bcopy(mops->mo_list, motbl, len); 2439 kmem_free(mops->mo_list, len); 2440 } 2441 mops->mo_count = count; 2442 mops->mo_list = motbl; 2443 mop = &motbl[count - 1]; 2444 mop->mo_flags = MO_TAG; 2445 mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP); 2446 (void) strcpy(mop->mo_name, tag); 2447 return (mop); 2448 } 2449 2450 /* 2451 * Allow users to set arbitrary "tags" in a vfs's mount options. 2452 * Broader use within the kernel is discouraged. 2453 */ 2454 int 2455 vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2456 cred_t *cr) 2457 { 2458 vfs_t *vfsp; 2459 mntopts_t *mops; 2460 mntopt_t *mop; 2461 int found = 0; 2462 dev_t dev = makedevice(major, minor); 2463 int err = 0; 2464 char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 2465 2466 /* 2467 * Find the desired mounted file system 2468 */ 2469 vfs_list_lock(); 2470 vfsp = rootvfs; 2471 do { 2472 if (vfsp->vfs_dev == dev && 2473 strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2474 found = 1; 2475 break; 2476 } 2477 vfsp = vfsp->vfs_next; 2478 } while (vfsp != rootvfs); 2479 2480 if (!found) { 2481 err = EINVAL; 2482 goto out; 2483 } 2484 err = secpolicy_fs_config(cr, vfsp); 2485 if (err != 0) 2486 goto out; 2487 2488 mops = &vfsp->vfs_mntopts; 2489 /* 2490 * Add tag if it doesn't already exist 2491 */ 2492 if ((mop = vfs_hasopt(mops, tag)) == NULL) { 2493 int len; 2494 2495 (void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR); 2496 len = strlen(buf); 2497 if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) { 2498 err = ENAMETOOLONG; 2499 goto out; 2500 } 2501 mop = vfs_addtag(mops, tag); 2502 } 2503 if ((mop->mo_flags & MO_TAG) == 0) { 2504 err = EINVAL; 2505 goto out; 2506 } 2507 vfs_setmntopt_nolock(mops, tag, NULL, 0, 1); 2508 out: 2509 vfs_list_unlock(); 2510 kmem_free(buf, MAX_MNTOPT_STR); 2511 return (err); 2512 } 2513 2514 /* 2515 * Allow users to remove arbitrary "tags" in a vfs's mount options. 2516 * Broader use within the kernel is discouraged. 2517 */ 2518 int 2519 vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2520 cred_t *cr) 2521 { 2522 vfs_t *vfsp; 2523 mntopt_t *mop; 2524 int found = 0; 2525 dev_t dev = makedevice(major, minor); 2526 int err = 0; 2527 2528 /* 2529 * Find the desired mounted file system 2530 */ 2531 vfs_list_lock(); 2532 vfsp = rootvfs; 2533 do { 2534 if (vfsp->vfs_dev == dev && 2535 strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2536 found = 1; 2537 break; 2538 } 2539 vfsp = vfsp->vfs_next; 2540 } while (vfsp != rootvfs); 2541 2542 if (!found) { 2543 err = EINVAL; 2544 goto out; 2545 } 2546 err = secpolicy_fs_config(cr, vfsp); 2547 if (err != 0) 2548 goto out; 2549 2550 if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) { 2551 err = EINVAL; 2552 goto out; 2553 } 2554 if ((mop->mo_flags & MO_TAG) == 0) { 2555 err = EINVAL; 2556 goto out; 2557 } 2558 vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1); 2559 out: 2560 vfs_list_unlock(); 2561 return (err); 2562 } 2563 2564 /* 2565 * Function to parse an option string and fill in a mount options table. 2566 * Unknown options are silently ignored. The input option string is modified 2567 * by replacing separators with nulls. If the create flag is set, options 2568 * not found in the table are just added on the fly. The table must have 2569 * an option slot marked MO_EMPTY to add an option on the fly. 2570 * 2571 * This function is *not* for general use by filesystems. 2572 * 2573 * Note: caller is responsible for locking the vfs list, if needed, 2574 * to protect mops.. 2575 */ 2576 void 2577 vfs_parsemntopts(mntopts_t *mops, char *osp, int create) 2578 { 2579 char *s = osp, *p, *nextop, *valp, *cp, *ep; 2580 int setflg = VFS_NOFORCEOPT; 2581 2582 if (osp == NULL) 2583 return; 2584 while (*s != '\0') { 2585 p = strchr(s, ','); /* find next option */ 2586 if (p == NULL) { 2587 cp = NULL; 2588 p = s + strlen(s); 2589 } else { 2590 cp = p; /* save location of comma */ 2591 *p++ = '\0'; /* mark end and point to next option */ 2592 } 2593 nextop = p; 2594 p = strchr(s, '='); /* look for value */ 2595 if (p == NULL) { 2596 valp = NULL; /* no value supplied */ 2597 } else { 2598 ep = p; /* save location of equals */ 2599 *p++ = '\0'; /* end option and point to value */ 2600 valp = p; 2601 } 2602 /* 2603 * set option into options table 2604 */ 2605 if (create) 2606 setflg |= VFS_CREATEOPT; 2607 vfs_setmntopt_nolock(mops, s, valp, setflg, 0); 2608 if (cp != NULL) 2609 *cp = ','; /* restore the comma */ 2610 if (valp != NULL) 2611 *ep = '='; /* restore the equals */ 2612 s = nextop; 2613 } 2614 } 2615 2616 /* 2617 * Function to inquire if an option exists in a mount options table. 2618 * Returns a pointer to the option if it exists, else NULL. 2619 * 2620 * This function is *not* for general use by filesystems. 2621 * 2622 * Note: caller is responsible for locking the vfs list, if needed, 2623 * to protect mops. 2624 */ 2625 struct mntopt * 2626 vfs_hasopt(const mntopts_t *mops, const char *opt) 2627 { 2628 struct mntopt *mop; 2629 uint_t i, count; 2630 2631 count = mops->mo_count; 2632 for (i = 0; i < count; i++) { 2633 mop = &mops->mo_list[i]; 2634 2635 if (mop->mo_flags & MO_EMPTY) 2636 continue; 2637 if (strcmp(opt, mop->mo_name) == 0) 2638 return (mop); 2639 } 2640 return (NULL); 2641 } 2642 2643 /* 2644 * Function to inquire if an option is set in a mount options table. 2645 * Returns non-zero if set and fills in the arg pointer with a pointer to 2646 * the argument string or NULL if there is no argument string. 2647 */ 2648 static int 2649 vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp) 2650 { 2651 struct mntopt *mop; 2652 uint_t i, count; 2653 2654 count = mops->mo_count; 2655 for (i = 0; i < count; i++) { 2656 mop = &mops->mo_list[i]; 2657 2658 if (mop->mo_flags & MO_EMPTY) 2659 continue; 2660 if (strcmp(opt, mop->mo_name)) 2661 continue; 2662 if ((mop->mo_flags & MO_SET) == 0) 2663 return (0); 2664 if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0) 2665 *argp = mop->mo_arg; 2666 return (1); 2667 } 2668 return (0); 2669 } 2670 2671 2672 int 2673 vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp) 2674 { 2675 int ret; 2676 2677 vfs_list_read_lock(); 2678 ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp); 2679 vfs_list_unlock(); 2680 return (ret); 2681 } 2682 2683 2684 /* 2685 * Construct a comma separated string of the options set in the given 2686 * mount table, return the string in the given buffer. Return non-zero if 2687 * the buffer would overflow. 2688 * 2689 * This function is *not* for general use by filesystems. 2690 * 2691 * Note: caller is responsible for locking the vfs list, if needed, 2692 * to protect mp. 2693 */ 2694 int 2695 vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len) 2696 { 2697 char *cp; 2698 uint_t i; 2699 2700 buf[0] = '\0'; 2701 cp = buf; 2702 for (i = 0; i < mp->mo_count; i++) { 2703 struct mntopt *mop; 2704 2705 mop = &mp->mo_list[i]; 2706 if (mop->mo_flags & MO_SET) { 2707 int optlen, comma = 0; 2708 2709 if (buf[0] != '\0') 2710 comma = 1; 2711 optlen = strlen(mop->mo_name); 2712 if (strlen(buf) + comma + optlen + 1 > len) 2713 goto err; 2714 if (comma) 2715 *cp++ = ','; 2716 (void) strcpy(cp, mop->mo_name); 2717 cp += optlen; 2718 /* 2719 * Append option value if there is one 2720 */ 2721 if (mop->mo_arg != NULL) { 2722 int arglen; 2723 2724 arglen = strlen(mop->mo_arg); 2725 if (strlen(buf) + arglen + 2 > len) 2726 goto err; 2727 *cp++ = '='; 2728 (void) strcpy(cp, mop->mo_arg); 2729 cp += arglen; 2730 } 2731 } 2732 } 2733 return (0); 2734 err: 2735 return (EOVERFLOW); 2736 } 2737 2738 static void 2739 vfs_freecancelopt(char **moc) 2740 { 2741 if (moc != NULL) { 2742 int ccnt = 0; 2743 char **cp; 2744 2745 for (cp = moc; *cp != NULL; cp++) { 2746 kmem_free(*cp, strlen(*cp) + 1); 2747 ccnt++; 2748 } 2749 kmem_free(moc, (ccnt + 1) * sizeof (char *)); 2750 } 2751 } 2752 2753 static void 2754 vfs_freeopt(mntopt_t *mop) 2755 { 2756 if (mop->mo_name != NULL) 2757 kmem_free(mop->mo_name, strlen(mop->mo_name) + 1); 2758 2759 vfs_freecancelopt(mop->mo_cancel); 2760 2761 if (mop->mo_arg != NULL) 2762 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2763 } 2764 2765 /* 2766 * Free a mount options table 2767 * 2768 * This function is *not* for general use by filesystems. 2769 * 2770 * Note: caller is responsible for locking the vfs list, if needed, 2771 * to protect mp. 2772 */ 2773 void 2774 vfs_freeopttbl(mntopts_t *mp) 2775 { 2776 uint_t i, count; 2777 2778 count = mp->mo_count; 2779 for (i = 0; i < count; i++) { 2780 vfs_freeopt(&mp->mo_list[i]); 2781 } 2782 if (count) { 2783 kmem_free(mp->mo_list, sizeof (mntopt_t) * count); 2784 mp->mo_count = 0; 2785 mp->mo_list = NULL; 2786 } 2787 } 2788 2789 2790 /* ARGSUSED */ 2791 static int 2792 vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 2793 caller_context_t *ct) 2794 { 2795 return (0); 2796 } 2797 2798 /* ARGSUSED */ 2799 static int 2800 vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 2801 caller_context_t *ct) 2802 { 2803 return (0); 2804 } 2805 2806 /* 2807 * The dummy vnode is currently used only by file events notification 2808 * module which is just interested in the timestamps. 2809 */ 2810 /* ARGSUSED */ 2811 static int 2812 vfs_mntdummygetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2813 caller_context_t *ct) 2814 { 2815 bzero(vap, sizeof (vattr_t)); 2816 vap->va_type = VREG; 2817 vap->va_nlink = 1; 2818 vap->va_ctime = vfs_mnttab_ctime; 2819 /* 2820 * it is ok to just copy mtime as the time will be monotonically 2821 * increasing. 2822 */ 2823 vap->va_mtime = vfs_mnttab_mtime; 2824 vap->va_atime = vap->va_mtime; 2825 return (0); 2826 } 2827 2828 static void 2829 vfs_mnttabvp_setup(void) 2830 { 2831 vnode_t *tvp; 2832 vnodeops_t *vfs_mntdummyvnops; 2833 const fs_operation_def_t mnt_dummyvnodeops_template[] = { 2834 VOPNAME_READ, { .vop_read = vfs_mntdummyread }, 2835 VOPNAME_WRITE, { .vop_write = vfs_mntdummywrite }, 2836 VOPNAME_GETATTR, { .vop_getattr = vfs_mntdummygetattr }, 2837 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2838 NULL, NULL 2839 }; 2840 2841 if (vn_make_ops("mnttab", mnt_dummyvnodeops_template, 2842 &vfs_mntdummyvnops) != 0) { 2843 cmn_err(CE_WARN, "vfs_mnttabvp_setup: vn_make_ops failed"); 2844 /* Shouldn't happen, but not bad enough to panic */ 2845 return; 2846 } 2847 2848 /* 2849 * A global dummy vnode is allocated to represent mntfs files. 2850 * The mntfs file (/etc/mnttab) can be monitored for file events 2851 * and receive an event when mnttab changes. Dummy VOP calls 2852 * will be made on this vnode. The file events notification module 2853 * intercepts this vnode and delivers relevant events. 2854 */ 2855 tvp = vn_alloc(KM_SLEEP); 2856 tvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE; 2857 vn_setops(tvp, vfs_mntdummyvnops); 2858 tvp->v_type = VREG; 2859 /* 2860 * The mnt dummy ops do not reference v_data. 2861 * No other module intercepting this vnode should either. 2862 * Just set it to point to itself. 2863 */ 2864 tvp->v_data = (caddr_t)tvp; 2865 tvp->v_vfsp = rootvfs; 2866 vfs_mntdummyvp = tvp; 2867 } 2868 2869 /* 2870 * performs fake read/write ops 2871 */ 2872 static void 2873 vfs_mnttab_rwop(int rw) 2874 { 2875 struct uio uio; 2876 struct iovec iov; 2877 char buf[1]; 2878 2879 if (vfs_mntdummyvp == NULL) 2880 return; 2881 2882 bzero(&uio, sizeof (uio)); 2883 bzero(&iov, sizeof (iov)); 2884 iov.iov_base = buf; 2885 iov.iov_len = 0; 2886 uio.uio_iov = &iov; 2887 uio.uio_iovcnt = 1; 2888 uio.uio_loffset = 0; 2889 uio.uio_segflg = UIO_SYSSPACE; 2890 uio.uio_resid = 0; 2891 if (rw) { 2892 (void) VOP_WRITE(vfs_mntdummyvp, &uio, 0, kcred, NULL); 2893 } else { 2894 (void) VOP_READ(vfs_mntdummyvp, &uio, 0, kcred, NULL); 2895 } 2896 } 2897 2898 /* 2899 * Generate a write operation. 2900 */ 2901 void 2902 vfs_mnttab_writeop(void) 2903 { 2904 vfs_mnttab_rwop(1); 2905 } 2906 2907 /* 2908 * Generate a read operation. 2909 */ 2910 void 2911 vfs_mnttab_readop(void) 2912 { 2913 vfs_mnttab_rwop(0); 2914 } 2915 2916 /* 2917 * Free any mnttab information recorded in the vfs struct. 2918 * The vfs must not be on the vfs list. 2919 */ 2920 static void 2921 vfs_freemnttab(struct vfs *vfsp) 2922 { 2923 ASSERT(!VFS_ON_LIST(vfsp)); 2924 2925 /* 2926 * Free device and mount point information 2927 */ 2928 if (vfsp->vfs_mntpt != NULL) { 2929 refstr_rele(vfsp->vfs_mntpt); 2930 vfsp->vfs_mntpt = NULL; 2931 } 2932 if (vfsp->vfs_resource != NULL) { 2933 refstr_rele(vfsp->vfs_resource); 2934 vfsp->vfs_resource = NULL; 2935 } 2936 /* 2937 * Now free mount options information 2938 */ 2939 vfs_freeopttbl(&vfsp->vfs_mntopts); 2940 } 2941 2942 /* 2943 * Return the last mnttab modification time 2944 */ 2945 void 2946 vfs_mnttab_modtime(timespec_t *ts) 2947 { 2948 ASSERT(RW_LOCK_HELD(&vfslist)); 2949 *ts = vfs_mnttab_mtime; 2950 } 2951 2952 /* 2953 * See if mnttab is changed 2954 */ 2955 void 2956 vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp) 2957 { 2958 int changed; 2959 2960 *phpp = (struct pollhead *)NULL; 2961 2962 /* 2963 * Note: don't grab vfs list lock before accessing vfs_mnttab_mtime. 2964 * Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe 2965 * to not grab the vfs list lock because tv_sec is monotonically 2966 * increasing. 2967 */ 2968 2969 changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) || 2970 (old->tv_sec != vfs_mnttab_mtime.tv_sec); 2971 if (!changed) { 2972 *phpp = &vfs_pollhd; 2973 } 2974 } 2975 2976 /* Provide a unique and monotonically-increasing timestamp. */ 2977 void 2978 vfs_mono_time(timespec_t *ts) 2979 { 2980 static volatile hrtime_t hrt; /* The saved time. */ 2981 hrtime_t newhrt, oldhrt; /* For effecting the CAS. */ 2982 timespec_t newts; 2983 2984 /* 2985 * Try gethrestime() first, but be prepared to fabricate a sensible 2986 * answer at the first sign of any trouble. 2987 */ 2988 gethrestime(&newts); 2989 newhrt = ts2hrt(&newts); 2990 for (;;) { 2991 oldhrt = hrt; 2992 if (newhrt <= hrt) 2993 newhrt = hrt + 1; 2994 if (atomic_cas_64((uint64_t *)&hrt, oldhrt, newhrt) == oldhrt) 2995 break; 2996 } 2997 hrt2ts(newhrt, ts); 2998 } 2999 3000 /* 3001 * Update the mnttab modification time and wake up any waiters for 3002 * mnttab changes 3003 */ 3004 void 3005 vfs_mnttab_modtimeupd() 3006 { 3007 hrtime_t oldhrt, newhrt; 3008 3009 ASSERT(RW_WRITE_HELD(&vfslist)); 3010 oldhrt = ts2hrt(&vfs_mnttab_mtime); 3011 gethrestime(&vfs_mnttab_mtime); 3012 newhrt = ts2hrt(&vfs_mnttab_mtime); 3013 if (oldhrt == (hrtime_t)0) 3014 vfs_mnttab_ctime = vfs_mnttab_mtime; 3015 /* 3016 * Attempt to provide unique mtime (like uniqtime but not). 3017 */ 3018 if (newhrt == oldhrt) { 3019 newhrt++; 3020 hrt2ts(newhrt, &vfs_mnttab_mtime); 3021 } 3022 pollwakeup(&vfs_pollhd, (short)POLLRDBAND); 3023 vfs_mnttab_writeop(); 3024 } 3025 3026 int 3027 dounmount(struct vfs *vfsp, int flag, cred_t *cr) 3028 { 3029 vnode_t *coveredvp; 3030 int error; 3031 extern void teardown_vopstats(vfs_t *); 3032 3033 /* 3034 * Get covered vnode. This will be NULL if the vfs is not linked 3035 * into the file system name space (i.e., domount() with MNT_NOSPICE). 3036 */ 3037 coveredvp = vfsp->vfs_vnodecovered; 3038 ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp)); 3039 3040 /* 3041 * Purge all dnlc entries for this vfs. 3042 */ 3043 (void) dnlc_purge_vfsp(vfsp, 0); 3044 3045 /* For forcible umount, skip VFS_SYNC() since it may hang */ 3046 if ((flag & MS_FORCE) == 0) 3047 (void) VFS_SYNC(vfsp, 0, cr); 3048 3049 /* 3050 * Lock the vfs to maintain fs status quo during unmount. This 3051 * has to be done after the sync because ufs_update tries to acquire 3052 * the vfs_reflock. 3053 */ 3054 vfs_lock_wait(vfsp); 3055 3056 if (error = VFS_UNMOUNT(vfsp, flag, cr)) { 3057 vfs_unlock(vfsp); 3058 if (coveredvp != NULL) 3059 vn_vfsunlock(coveredvp); 3060 } else if (coveredvp != NULL) { 3061 teardown_vopstats(vfsp); 3062 /* 3063 * vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered) 3064 * when it frees vfsp so we do a VN_HOLD() so we can 3065 * continue to use coveredvp afterwards. 3066 */ 3067 VN_HOLD(coveredvp); 3068 vfs_remove(vfsp); 3069 vn_vfsunlock(coveredvp); 3070 VN_RELE(coveredvp); 3071 } else { 3072 teardown_vopstats(vfsp); 3073 /* 3074 * Release the reference to vfs that is not linked 3075 * into the name space. 3076 */ 3077 vfs_unlock(vfsp); 3078 VFS_RELE(vfsp); 3079 } 3080 return (error); 3081 } 3082 3083 3084 /* 3085 * Vfs_unmountall() is called by uadmin() to unmount all 3086 * mounted file systems (except the root file system) during shutdown. 3087 * It follows the existing locking protocol when traversing the vfs list 3088 * to sync and unmount vfses. Even though there should be no 3089 * other thread running while the system is shutting down, it is prudent 3090 * to still follow the locking protocol. 3091 */ 3092 void 3093 vfs_unmountall(void) 3094 { 3095 struct vfs *vfsp; 3096 struct vfs *prev_vfsp = NULL; 3097 int error; 3098 3099 /* 3100 * Toss all dnlc entries now so that the per-vfs sync 3101 * and unmount operations don't have to slog through 3102 * a bunch of uninteresting vnodes over and over again. 3103 */ 3104 dnlc_purge(); 3105 3106 vfs_list_lock(); 3107 for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) { 3108 prev_vfsp = vfsp->vfs_prev; 3109 3110 if (vfs_lock(vfsp) != 0) 3111 continue; 3112 error = vn_vfswlock(vfsp->vfs_vnodecovered); 3113 vfs_unlock(vfsp); 3114 if (error) 3115 continue; 3116 3117 vfs_list_unlock(); 3118 3119 (void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED()); 3120 (void) dounmount(vfsp, 0, CRED()); 3121 3122 /* 3123 * Since we dropped the vfslist lock above we must 3124 * verify that next_vfsp still exists, else start over. 3125 */ 3126 vfs_list_lock(); 3127 for (vfsp = rootvfs->vfs_prev; 3128 vfsp != rootvfs; vfsp = vfsp->vfs_prev) 3129 if (vfsp == prev_vfsp) 3130 break; 3131 if (vfsp == rootvfs && prev_vfsp != rootvfs) 3132 prev_vfsp = rootvfs->vfs_prev; 3133 } 3134 vfs_list_unlock(); 3135 } 3136 3137 /* 3138 * Called to add an entry to the end of the vfs mount in progress list 3139 */ 3140 void 3141 vfs_addmip(dev_t dev, struct vfs *vfsp) 3142 { 3143 struct ipmnt *mipp; 3144 3145 mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP); 3146 mipp->mip_next = NULL; 3147 mipp->mip_dev = dev; 3148 mipp->mip_vfsp = vfsp; 3149 mutex_enter(&vfs_miplist_mutex); 3150 if (vfs_miplist_end != NULL) 3151 vfs_miplist_end->mip_next = mipp; 3152 else 3153 vfs_miplist = mipp; 3154 vfs_miplist_end = mipp; 3155 mutex_exit(&vfs_miplist_mutex); 3156 } 3157 3158 /* 3159 * Called to remove an entry from the mount in progress list 3160 * Either because the mount completed or it failed. 3161 */ 3162 void 3163 vfs_delmip(struct vfs *vfsp) 3164 { 3165 struct ipmnt *mipp, *mipprev; 3166 3167 mutex_enter(&vfs_miplist_mutex); 3168 mipprev = NULL; 3169 for (mipp = vfs_miplist; 3170 mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) { 3171 mipprev = mipp; 3172 } 3173 if (mipp == NULL) 3174 return; /* shouldn't happen */ 3175 if (mipp == vfs_miplist_end) 3176 vfs_miplist_end = mipprev; 3177 if (mipprev == NULL) 3178 vfs_miplist = mipp->mip_next; 3179 else 3180 mipprev->mip_next = mipp->mip_next; 3181 mutex_exit(&vfs_miplist_mutex); 3182 kmem_free(mipp, sizeof (struct ipmnt)); 3183 } 3184 3185 /* 3186 * vfs_add is called by a specific filesystem's mount routine to add 3187 * the new vfs into the vfs list/hash and to cover the mounted-on vnode. 3188 * The vfs should already have been locked by the caller. 3189 * 3190 * coveredvp is NULL if this is the root. 3191 */ 3192 void 3193 vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag) 3194 { 3195 int newflag; 3196 3197 ASSERT(vfs_lock_held(vfsp)); 3198 VFS_HOLD(vfsp); 3199 newflag = vfsp->vfs_flag; 3200 if (mflag & MS_RDONLY) 3201 newflag |= VFS_RDONLY; 3202 else 3203 newflag &= ~VFS_RDONLY; 3204 if (mflag & MS_NOSUID) 3205 newflag |= (VFS_NOSETUID|VFS_NODEVICES); 3206 else 3207 newflag &= ~(VFS_NOSETUID|VFS_NODEVICES); 3208 if (mflag & MS_NOMNTTAB) 3209 newflag |= VFS_NOMNTTAB; 3210 else 3211 newflag &= ~VFS_NOMNTTAB; 3212 3213 if (coveredvp != NULL) { 3214 ASSERT(vn_vfswlock_held(coveredvp)); 3215 coveredvp->v_vfsmountedhere = vfsp; 3216 VN_HOLD(coveredvp); 3217 } 3218 vfsp->vfs_vnodecovered = coveredvp; 3219 vfsp->vfs_flag = newflag; 3220 3221 vfs_list_add(vfsp); 3222 } 3223 3224 /* 3225 * Remove a vfs from the vfs list, null out the pointer from the 3226 * covered vnode to the vfs (v_vfsmountedhere), and null out the pointer 3227 * from the vfs to the covered vnode (vfs_vnodecovered). Release the 3228 * reference to the vfs and to the covered vnode. 3229 * 3230 * Called from dounmount after it's confirmed with the file system 3231 * that the unmount is legal. 3232 */ 3233 void 3234 vfs_remove(struct vfs *vfsp) 3235 { 3236 vnode_t *vp; 3237 3238 ASSERT(vfs_lock_held(vfsp)); 3239 3240 /* 3241 * Can't unmount root. Should never happen because fs will 3242 * be busy. 3243 */ 3244 if (vfsp == rootvfs) 3245 panic("vfs_remove: unmounting root"); 3246 3247 vfs_list_remove(vfsp); 3248 3249 /* 3250 * Unhook from the file system name space. 3251 */ 3252 vp = vfsp->vfs_vnodecovered; 3253 ASSERT(vn_vfswlock_held(vp)); 3254 vp->v_vfsmountedhere = NULL; 3255 vfsp->vfs_vnodecovered = NULL; 3256 VN_RELE(vp); 3257 3258 /* 3259 * Release lock and wakeup anybody waiting. 3260 */ 3261 vfs_unlock(vfsp); 3262 VFS_RELE(vfsp); 3263 } 3264 3265 /* 3266 * Lock a filesystem to prevent access to it while mounting, 3267 * unmounting and syncing. Return EBUSY immediately if lock 3268 * can't be acquired. 3269 */ 3270 int 3271 vfs_lock(vfs_t *vfsp) 3272 { 3273 vn_vfslocks_entry_t *vpvfsentry; 3274 3275 vpvfsentry = vn_vfslocks_getlock(vfsp); 3276 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER)) 3277 return (0); 3278 3279 vn_vfslocks_rele(vpvfsentry); 3280 return (EBUSY); 3281 } 3282 3283 int 3284 vfs_rlock(vfs_t *vfsp) 3285 { 3286 vn_vfslocks_entry_t *vpvfsentry; 3287 3288 vpvfsentry = vn_vfslocks_getlock(vfsp); 3289 3290 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER)) 3291 return (0); 3292 3293 vn_vfslocks_rele(vpvfsentry); 3294 return (EBUSY); 3295 } 3296 3297 void 3298 vfs_lock_wait(vfs_t *vfsp) 3299 { 3300 vn_vfslocks_entry_t *vpvfsentry; 3301 3302 vpvfsentry = vn_vfslocks_getlock(vfsp); 3303 rwst_enter(&vpvfsentry->ve_lock, RW_WRITER); 3304 } 3305 3306 void 3307 vfs_rlock_wait(vfs_t *vfsp) 3308 { 3309 vn_vfslocks_entry_t *vpvfsentry; 3310 3311 vpvfsentry = vn_vfslocks_getlock(vfsp); 3312 rwst_enter(&vpvfsentry->ve_lock, RW_READER); 3313 } 3314 3315 /* 3316 * Unlock a locked filesystem. 3317 */ 3318 void 3319 vfs_unlock(vfs_t *vfsp) 3320 { 3321 vn_vfslocks_entry_t *vpvfsentry; 3322 3323 /* 3324 * vfs_unlock will mimic sema_v behaviour to fix 4748018. 3325 * And these changes should remain for the patch changes as it is. 3326 */ 3327 if (panicstr) 3328 return; 3329 3330 /* 3331 * ve_refcount needs to be dropped twice here. 3332 * 1. To release refernce after a call to vfs_locks_getlock() 3333 * 2. To release the reference from the locking routines like 3334 * vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,. 3335 */ 3336 3337 vpvfsentry = vn_vfslocks_getlock(vfsp); 3338 vn_vfslocks_rele(vpvfsentry); 3339 3340 rwst_exit(&vpvfsentry->ve_lock); 3341 vn_vfslocks_rele(vpvfsentry); 3342 } 3343 3344 /* 3345 * Utility routine that allows a filesystem to construct its 3346 * fsid in "the usual way" - by munging some underlying dev_t and 3347 * the filesystem type number into the 64-bit fsid. Note that 3348 * this implicitly relies on dev_t persistence to make filesystem 3349 * id's persistent. 3350 * 3351 * There's nothing to prevent an individual fs from constructing its 3352 * fsid in a different way, and indeed they should. 3353 * 3354 * Since we want fsids to be 32-bit quantities (so that they can be 3355 * exported identically by either 32-bit or 64-bit APIs, as well as 3356 * the fact that fsid's are "known" to NFS), we compress the device 3357 * number given down to 32-bits, and panic if that isn't possible. 3358 */ 3359 void 3360 vfs_make_fsid(fsid_t *fsi, dev_t dev, int val) 3361 { 3362 if (!cmpldev((dev32_t *)&fsi->val[0], dev)) 3363 panic("device number too big for fsid!"); 3364 fsi->val[1] = val; 3365 } 3366 3367 int 3368 vfs_lock_held(vfs_t *vfsp) 3369 { 3370 int held; 3371 vn_vfslocks_entry_t *vpvfsentry; 3372 3373 /* 3374 * vfs_lock_held will mimic sema_held behaviour 3375 * if panicstr is set. And these changes should remain 3376 * for the patch changes as it is. 3377 */ 3378 if (panicstr) 3379 return (1); 3380 3381 vpvfsentry = vn_vfslocks_getlock(vfsp); 3382 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER); 3383 3384 vn_vfslocks_rele(vpvfsentry); 3385 return (held); 3386 } 3387 3388 struct _kthread * 3389 vfs_lock_owner(vfs_t *vfsp) 3390 { 3391 struct _kthread *owner; 3392 vn_vfslocks_entry_t *vpvfsentry; 3393 3394 /* 3395 * vfs_wlock_held will mimic sema_held behaviour 3396 * if panicstr is set. And these changes should remain 3397 * for the patch changes as it is. 3398 */ 3399 if (panicstr) 3400 return (NULL); 3401 3402 vpvfsentry = vn_vfslocks_getlock(vfsp); 3403 owner = rwst_owner(&vpvfsentry->ve_lock); 3404 3405 vn_vfslocks_rele(vpvfsentry); 3406 return (owner); 3407 } 3408 3409 /* 3410 * vfs list locking. 3411 * 3412 * Rather than manipulate the vfslist lock directly, we abstract into lock 3413 * and unlock routines to allow the locking implementation to be changed for 3414 * clustering. 3415 * 3416 * Whenever the vfs list is modified through its hash links, the overall list 3417 * lock must be obtained before locking the relevant hash bucket. But to see 3418 * whether a given vfs is on the list, it suffices to obtain the lock for the 3419 * hash bucket without getting the overall list lock. (See getvfs() below.) 3420 */ 3421 3422 void 3423 vfs_list_lock() 3424 { 3425 rw_enter(&vfslist, RW_WRITER); 3426 } 3427 3428 void 3429 vfs_list_read_lock() 3430 { 3431 rw_enter(&vfslist, RW_READER); 3432 } 3433 3434 void 3435 vfs_list_unlock() 3436 { 3437 rw_exit(&vfslist); 3438 } 3439 3440 /* 3441 * Low level worker routines for adding entries to and removing entries from 3442 * the vfs list. 3443 */ 3444 3445 static void 3446 vfs_hash_add(struct vfs *vfsp, int insert_at_head) 3447 { 3448 int vhno; 3449 struct vfs **hp; 3450 dev_t dev; 3451 3452 ASSERT(RW_WRITE_HELD(&vfslist)); 3453 3454 dev = expldev(vfsp->vfs_fsid.val[0]); 3455 vhno = VFSHASH(getmajor(dev), getminor(dev)); 3456 3457 mutex_enter(&rvfs_list[vhno].rvfs_lock); 3458 3459 /* 3460 * Link into the hash table, inserting it at the end, so that LOFS 3461 * with the same fsid as UFS (or other) file systems will not hide the 3462 * UFS. 3463 */ 3464 if (insert_at_head) { 3465 vfsp->vfs_hash = rvfs_list[vhno].rvfs_head; 3466 rvfs_list[vhno].rvfs_head = vfsp; 3467 } else { 3468 for (hp = &rvfs_list[vhno].rvfs_head; *hp != NULL; 3469 hp = &(*hp)->vfs_hash) 3470 continue; 3471 /* 3472 * hp now contains the address of the pointer to update 3473 * to effect the insertion. 3474 */ 3475 vfsp->vfs_hash = NULL; 3476 *hp = vfsp; 3477 } 3478 3479 rvfs_list[vhno].rvfs_len++; 3480 mutex_exit(&rvfs_list[vhno].rvfs_lock); 3481 } 3482 3483 3484 static void 3485 vfs_hash_remove(struct vfs *vfsp) 3486 { 3487 int vhno; 3488 struct vfs *tvfsp; 3489 dev_t dev; 3490 3491 ASSERT(RW_WRITE_HELD(&vfslist)); 3492 3493 dev = expldev(vfsp->vfs_fsid.val[0]); 3494 vhno = VFSHASH(getmajor(dev), getminor(dev)); 3495 3496 mutex_enter(&rvfs_list[vhno].rvfs_lock); 3497 3498 /* 3499 * Remove from hash. 3500 */ 3501 if (rvfs_list[vhno].rvfs_head == vfsp) { 3502 rvfs_list[vhno].rvfs_head = vfsp->vfs_hash; 3503 rvfs_list[vhno].rvfs_len--; 3504 goto foundit; 3505 } 3506 for (tvfsp = rvfs_list[vhno].rvfs_head; tvfsp != NULL; 3507 tvfsp = tvfsp->vfs_hash) { 3508 if (tvfsp->vfs_hash == vfsp) { 3509 tvfsp->vfs_hash = vfsp->vfs_hash; 3510 rvfs_list[vhno].rvfs_len--; 3511 goto foundit; 3512 } 3513 } 3514 cmn_err(CE_WARN, "vfs_list_remove: vfs not found in hash"); 3515 3516 foundit: 3517 3518 mutex_exit(&rvfs_list[vhno].rvfs_lock); 3519 } 3520 3521 3522 void 3523 vfs_list_add(struct vfs *vfsp) 3524 { 3525 zone_t *zone; 3526 3527 /* 3528 * Typically, the vfs_t will have been created on behalf of the file 3529 * system in vfs_init, where it will have been provided with a 3530 * vfs_impl_t. This, however, might be lacking if the vfs_t was created 3531 * by an unbundled file system. We therefore check for such an example 3532 * before stamping the vfs_t with its creation time for the benefit of 3533 * mntfs. 3534 */ 3535 if (vfsp->vfs_implp == NULL) 3536 vfsimpl_setup(vfsp); 3537 vfs_mono_time(&vfsp->vfs_hrctime); 3538 3539 /* 3540 * The zone that owns the mount is the one that performed the mount. 3541 * Note that this isn't necessarily the same as the zone mounted into. 3542 * The corresponding zone_rele_ref() will be done when the vfs_t 3543 * is being free'd. 3544 */ 3545 vfsp->vfs_zone = curproc->p_zone; 3546 zone_init_ref(&vfsp->vfs_implp->vi_zone_ref); 3547 zone_hold_ref(vfsp->vfs_zone, &vfsp->vfs_implp->vi_zone_ref, 3548 ZONE_REF_VFS); 3549 3550 /* 3551 * Find the zone mounted into, and put this mount on its vfs list. 3552 */ 3553 zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 3554 ASSERT(zone != NULL); 3555 /* 3556 * Special casing for the root vfs. This structure is allocated 3557 * statically and hooked onto rootvfs at link time. During the 3558 * vfs_mountroot call at system startup time, the root file system's 3559 * VFS_MOUNTROOT routine will call vfs_add with this root vfs struct 3560 * as argument. The code below must detect and handle this special 3561 * case. The only apparent justification for this special casing is 3562 * to ensure that the root file system appears at the head of the 3563 * list. 3564 * 3565 * XXX: I'm assuming that it's ok to do normal list locking when 3566 * adding the entry for the root file system (this used to be 3567 * done with no locks held). 3568 */ 3569 vfs_list_lock(); 3570 /* 3571 * Link into the vfs list proper. 3572 */ 3573 if (vfsp == &root) { 3574 /* 3575 * Assert: This vfs is already on the list as its first entry. 3576 * Thus, there's nothing to do. 3577 */ 3578 ASSERT(rootvfs == vfsp); 3579 /* 3580 * Add it to the head of the global zone's vfslist. 3581 */ 3582 ASSERT(zone == global_zone); 3583 ASSERT(zone->zone_vfslist == NULL); 3584 zone->zone_vfslist = vfsp; 3585 } else { 3586 /* 3587 * Link to end of list using vfs_prev (as rootvfs is now a 3588 * doubly linked circular list) so list is in mount order for 3589 * mnttab use. 3590 */ 3591 rootvfs->vfs_prev->vfs_next = vfsp; 3592 vfsp->vfs_prev = rootvfs->vfs_prev; 3593 rootvfs->vfs_prev = vfsp; 3594 vfsp->vfs_next = rootvfs; 3595 3596 /* 3597 * Do it again for the zone-private list (which may be NULL). 3598 */ 3599 if (zone->zone_vfslist == NULL) { 3600 ASSERT(zone != global_zone); 3601 zone->zone_vfslist = vfsp; 3602 } else { 3603 zone->zone_vfslist->vfs_zone_prev->vfs_zone_next = vfsp; 3604 vfsp->vfs_zone_prev = zone->zone_vfslist->vfs_zone_prev; 3605 zone->zone_vfslist->vfs_zone_prev = vfsp; 3606 vfsp->vfs_zone_next = zone->zone_vfslist; 3607 } 3608 } 3609 3610 /* 3611 * Link into the hash table, inserting it at the end, so that LOFS 3612 * with the same fsid as UFS (or other) file systems will not hide 3613 * the UFS. 3614 */ 3615 vfs_hash_add(vfsp, 0); 3616 3617 /* 3618 * update the mnttab modification time 3619 */ 3620 vfs_mnttab_modtimeupd(); 3621 vfs_list_unlock(); 3622 zone_rele(zone); 3623 } 3624 3625 void 3626 vfs_list_remove(struct vfs *vfsp) 3627 { 3628 zone_t *zone; 3629 3630 zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 3631 ASSERT(zone != NULL); 3632 /* 3633 * Callers are responsible for preventing attempts to unmount the 3634 * root. 3635 */ 3636 ASSERT(vfsp != rootvfs); 3637 3638 vfs_list_lock(); 3639 3640 /* 3641 * Remove from hash. 3642 */ 3643 vfs_hash_remove(vfsp); 3644 3645 /* 3646 * Remove from vfs list. 3647 */ 3648 vfsp->vfs_prev->vfs_next = vfsp->vfs_next; 3649 vfsp->vfs_next->vfs_prev = vfsp->vfs_prev; 3650 vfsp->vfs_next = vfsp->vfs_prev = NULL; 3651 3652 /* 3653 * Remove from zone-specific vfs list. 3654 */ 3655 if (zone->zone_vfslist == vfsp) 3656 zone->zone_vfslist = vfsp->vfs_zone_next; 3657 3658 if (vfsp->vfs_zone_next == vfsp) { 3659 ASSERT(vfsp->vfs_zone_prev == vfsp); 3660 ASSERT(zone->zone_vfslist == vfsp); 3661 zone->zone_vfslist = NULL; 3662 } 3663 3664 vfsp->vfs_zone_prev->vfs_zone_next = vfsp->vfs_zone_next; 3665 vfsp->vfs_zone_next->vfs_zone_prev = vfsp->vfs_zone_prev; 3666 vfsp->vfs_zone_next = vfsp->vfs_zone_prev = NULL; 3667 3668 /* 3669 * update the mnttab modification time 3670 */ 3671 vfs_mnttab_modtimeupd(); 3672 vfs_list_unlock(); 3673 zone_rele(zone); 3674 } 3675 3676 struct vfs * 3677 getvfs(fsid_t *fsid) 3678 { 3679 struct vfs *vfsp; 3680 int val0 = fsid->val[0]; 3681 int val1 = fsid->val[1]; 3682 dev_t dev = expldev(val0); 3683 int vhno = VFSHASH(getmajor(dev), getminor(dev)); 3684 kmutex_t *hmp = &rvfs_list[vhno].rvfs_lock; 3685 3686 mutex_enter(hmp); 3687 for (vfsp = rvfs_list[vhno].rvfs_head; vfsp; vfsp = vfsp->vfs_hash) { 3688 if (vfsp->vfs_fsid.val[0] == val0 && 3689 vfsp->vfs_fsid.val[1] == val1) { 3690 VFS_HOLD(vfsp); 3691 mutex_exit(hmp); 3692 return (vfsp); 3693 } 3694 } 3695 mutex_exit(hmp); 3696 return (NULL); 3697 } 3698 3699 /* 3700 * Search the vfs mount in progress list for a specified device/vfs entry. 3701 * Returns 0 if the first entry in the list that the device matches has the 3702 * given vfs pointer as well. If the device matches but a different vfs 3703 * pointer is encountered in the list before the given vfs pointer then 3704 * a 1 is returned. 3705 */ 3706 3707 int 3708 vfs_devmounting(dev_t dev, struct vfs *vfsp) 3709 { 3710 int retval = 0; 3711 struct ipmnt *mipp; 3712 3713 mutex_enter(&vfs_miplist_mutex); 3714 for (mipp = vfs_miplist; mipp != NULL; mipp = mipp->mip_next) { 3715 if (mipp->mip_dev == dev) { 3716 if (mipp->mip_vfsp != vfsp) 3717 retval = 1; 3718 break; 3719 } 3720 } 3721 mutex_exit(&vfs_miplist_mutex); 3722 return (retval); 3723 } 3724 3725 /* 3726 * Search the vfs list for a specified device. Returns 1, if entry is found 3727 * or 0 if no suitable entry is found. 3728 */ 3729 3730 int 3731 vfs_devismounted(dev_t dev) 3732 { 3733 struct vfs *vfsp; 3734 int found; 3735 3736 vfs_list_read_lock(); 3737 vfsp = rootvfs; 3738 found = 0; 3739 do { 3740 if (vfsp->vfs_dev == dev) { 3741 found = 1; 3742 break; 3743 } 3744 vfsp = vfsp->vfs_next; 3745 } while (vfsp != rootvfs); 3746 3747 vfs_list_unlock(); 3748 return (found); 3749 } 3750 3751 /* 3752 * Search the vfs list for a specified device. Returns a pointer to it 3753 * or NULL if no suitable entry is found. The caller of this routine 3754 * is responsible for releasing the returned vfs pointer. 3755 */ 3756 struct vfs * 3757 vfs_dev2vfsp(dev_t dev) 3758 { 3759 struct vfs *vfsp; 3760 int found; 3761 3762 vfs_list_read_lock(); 3763 vfsp = rootvfs; 3764 found = 0; 3765 do { 3766 /* 3767 * The following could be made more efficient by making 3768 * the entire loop use vfs_zone_next if the call is from 3769 * a zone. The only callers, however, ustat(2) and 3770 * umount2(2), don't seem to justify the added 3771 * complexity at present. 3772 */ 3773 if (vfsp->vfs_dev == dev && 3774 ZONE_PATH_VISIBLE(refstr_value(vfsp->vfs_mntpt), 3775 curproc->p_zone)) { 3776 VFS_HOLD(vfsp); 3777 found = 1; 3778 break; 3779 } 3780 vfsp = vfsp->vfs_next; 3781 } while (vfsp != rootvfs); 3782 vfs_list_unlock(); 3783 return (found ? vfsp: NULL); 3784 } 3785 3786 /* 3787 * Search the vfs list for a specified mntpoint. Returns a pointer to it 3788 * or NULL if no suitable entry is found. The caller of this routine 3789 * is responsible for releasing the returned vfs pointer. 3790 * 3791 * Note that if multiple mntpoints match, the last one matching is 3792 * returned in an attempt to return the "top" mount when overlay 3793 * mounts are covering the same mount point. This is accomplished by starting 3794 * at the end of the list and working our way backwards, stopping at the first 3795 * matching mount. 3796 */ 3797 struct vfs * 3798 vfs_mntpoint2vfsp(const char *mp) 3799 { 3800 struct vfs *vfsp; 3801 struct vfs *retvfsp = NULL; 3802 zone_t *zone = curproc->p_zone; 3803 struct vfs *list; 3804 3805 vfs_list_read_lock(); 3806 if (getzoneid() == GLOBAL_ZONEID) { 3807 /* 3808 * The global zone may see filesystems in any zone. 3809 */ 3810 vfsp = rootvfs->vfs_prev; 3811 do { 3812 if (strcmp(refstr_value(vfsp->vfs_mntpt), mp) == 0) { 3813 retvfsp = vfsp; 3814 break; 3815 } 3816 vfsp = vfsp->vfs_prev; 3817 } while (vfsp != rootvfs->vfs_prev); 3818 } else if ((list = zone->zone_vfslist) != NULL) { 3819 const char *mntpt; 3820 3821 vfsp = list->vfs_zone_prev; 3822 do { 3823 mntpt = refstr_value(vfsp->vfs_mntpt); 3824 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3825 if (strcmp(mntpt, mp) == 0) { 3826 retvfsp = vfsp; 3827 break; 3828 } 3829 vfsp = vfsp->vfs_zone_prev; 3830 } while (vfsp != list->vfs_zone_prev); 3831 } 3832 if (retvfsp) 3833 VFS_HOLD(retvfsp); 3834 vfs_list_unlock(); 3835 return (retvfsp); 3836 } 3837 3838 /* 3839 * Search the vfs list for a specified vfsops. 3840 * if vfs entry is found then return 1, else 0. 3841 */ 3842 int 3843 vfs_opsinuse(vfsops_t *ops) 3844 { 3845 struct vfs *vfsp; 3846 int found; 3847 3848 vfs_list_read_lock(); 3849 vfsp = rootvfs; 3850 found = 0; 3851 do { 3852 if (vfs_getops(vfsp) == ops) { 3853 found = 1; 3854 break; 3855 } 3856 vfsp = vfsp->vfs_next; 3857 } while (vfsp != rootvfs); 3858 vfs_list_unlock(); 3859 return (found); 3860 } 3861 3862 /* 3863 * Allocate an entry in vfssw for a file system type 3864 */ 3865 struct vfssw * 3866 allocate_vfssw(const char *type) 3867 { 3868 struct vfssw *vswp; 3869 3870 if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) { 3871 /* 3872 * The vfssw table uses the empty string to identify an 3873 * available entry; we cannot add any type which has 3874 * a leading NUL. The string length is limited to 3875 * the size of the st_fstype array in struct stat. 3876 */ 3877 return (NULL); 3878 } 3879 3880 ASSERT(VFSSW_WRITE_LOCKED()); 3881 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) 3882 if (!ALLOCATED_VFSSW(vswp)) { 3883 vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP); 3884 (void) strcpy(vswp->vsw_name, type); 3885 ASSERT(vswp->vsw_count == 0); 3886 vswp->vsw_count = 1; 3887 mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL); 3888 return (vswp); 3889 } 3890 return (NULL); 3891 } 3892 3893 /* 3894 * Impose additional layer of translation between vfstype names 3895 * and module names in the filesystem. 3896 */ 3897 static const char * 3898 vfs_to_modname(const char *vfstype) 3899 { 3900 if (strcmp(vfstype, "proc") == 0) { 3901 vfstype = "procfs"; 3902 } else if (strcmp(vfstype, "fd") == 0) { 3903 vfstype = "fdfs"; 3904 } else if (strncmp(vfstype, "nfs", 3) == 0) { 3905 vfstype = "nfs"; 3906 } 3907 3908 return (vfstype); 3909 } 3910 3911 /* 3912 * Find a vfssw entry given a file system type name. 3913 * Try to autoload the filesystem if it's not found. 3914 * If it's installed, return the vfssw locked to prevent unloading. 3915 */ 3916 struct vfssw * 3917 vfs_getvfssw(const char *type) 3918 { 3919 struct vfssw *vswp; 3920 const char *modname; 3921 3922 RLOCK_VFSSW(); 3923 vswp = vfs_getvfsswbyname(type); 3924 modname = vfs_to_modname(type); 3925 3926 if (rootdir == NULL) { 3927 /* 3928 * If we haven't yet loaded the root file system, then our 3929 * _init won't be called until later. Allocate vfssw entry, 3930 * because mod_installfs won't be called. 3931 */ 3932 if (vswp == NULL) { 3933 RUNLOCK_VFSSW(); 3934 WLOCK_VFSSW(); 3935 if ((vswp = vfs_getvfsswbyname(type)) == NULL) { 3936 if ((vswp = allocate_vfssw(type)) == NULL) { 3937 WUNLOCK_VFSSW(); 3938 return (NULL); 3939 } 3940 } 3941 WUNLOCK_VFSSW(); 3942 RLOCK_VFSSW(); 3943 } 3944 if (!VFS_INSTALLED(vswp)) { 3945 RUNLOCK_VFSSW(); 3946 (void) modloadonly("fs", modname); 3947 } else 3948 RUNLOCK_VFSSW(); 3949 return (vswp); 3950 } 3951 3952 /* 3953 * Try to load the filesystem. Before calling modload(), we drop 3954 * our lock on the VFS switch table, and pick it up after the 3955 * module is loaded. However, there is a potential race: the 3956 * module could be unloaded after the call to modload() completes 3957 * but before we pick up the lock and drive on. Therefore, 3958 * we keep reloading the module until we've loaded the module 3959 * _and_ we have the lock on the VFS switch table. 3960 */ 3961 while (vswp == NULL || !VFS_INSTALLED(vswp)) { 3962 RUNLOCK_VFSSW(); 3963 if (modload("fs", modname) == -1) 3964 return (NULL); 3965 RLOCK_VFSSW(); 3966 if (vswp == NULL) 3967 if ((vswp = vfs_getvfsswbyname(type)) == NULL) 3968 break; 3969 } 3970 RUNLOCK_VFSSW(); 3971 3972 return (vswp); 3973 } 3974 3975 /* 3976 * Find a vfssw entry given a file system type name. 3977 */ 3978 struct vfssw * 3979 vfs_getvfsswbyname(const char *type) 3980 { 3981 struct vfssw *vswp; 3982 3983 ASSERT(VFSSW_LOCKED()); 3984 if (type == NULL || *type == '\0') 3985 return (NULL); 3986 3987 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 3988 if (strcmp(type, vswp->vsw_name) == 0) { 3989 vfs_refvfssw(vswp); 3990 return (vswp); 3991 } 3992 } 3993 3994 return (NULL); 3995 } 3996 3997 /* 3998 * Find a vfssw entry given a set of vfsops. 3999 */ 4000 struct vfssw * 4001 vfs_getvfsswbyvfsops(vfsops_t *vfsops) 4002 { 4003 struct vfssw *vswp; 4004 4005 RLOCK_VFSSW(); 4006 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 4007 if (ALLOCATED_VFSSW(vswp) && &vswp->vsw_vfsops == vfsops) { 4008 vfs_refvfssw(vswp); 4009 RUNLOCK_VFSSW(); 4010 return (vswp); 4011 } 4012 } 4013 RUNLOCK_VFSSW(); 4014 4015 return (NULL); 4016 } 4017 4018 /* 4019 * Reference a vfssw entry. 4020 */ 4021 void 4022 vfs_refvfssw(struct vfssw *vswp) 4023 { 4024 4025 mutex_enter(&vswp->vsw_lock); 4026 vswp->vsw_count++; 4027 mutex_exit(&vswp->vsw_lock); 4028 } 4029 4030 /* 4031 * Unreference a vfssw entry. 4032 */ 4033 void 4034 vfs_unrefvfssw(struct vfssw *vswp) 4035 { 4036 4037 mutex_enter(&vswp->vsw_lock); 4038 vswp->vsw_count--; 4039 mutex_exit(&vswp->vsw_lock); 4040 } 4041 4042 static int sync_retries = 20; /* number of retries when not making progress */ 4043 static int sync_triesleft; /* portion of sync_retries remaining */ 4044 4045 static pgcnt_t old_pgcnt, new_pgcnt; 4046 static int new_bufcnt, old_bufcnt; 4047 4048 /* 4049 * Sync all of the mounted filesystems, and then wait for the actual i/o to 4050 * complete. We wait by counting the number of dirty pages and buffers, 4051 * pushing them out using bio_busy() and page_busy(), and then counting again. 4052 * This routine is used during the uadmin A_SHUTDOWN code. It should only 4053 * be used after some higher-level mechanism has quiesced the system so that 4054 * new writes are not being initiated while we are waiting for completion. 4055 * 4056 * To ensure finite running time, our algorithm uses sync_triesleft (a progress 4057 * counter used by the vfs_syncall() loop below). It is declared above so 4058 * it can be found easily in the debugger. 4059 * 4060 * The sync_triesleft counter is updated by vfs_syncall() itself. If we make 4061 * sync_retries consecutive calls to bio_busy() and page_busy() without 4062 * decreasing either the number of dirty buffers or dirty pages below the 4063 * lowest count we have seen so far, we give up and return from vfs_syncall(). 4064 * 4065 * Each loop iteration ends with a call to delay() one second to allow time for 4066 * i/o completion and to permit the user time to read our progress messages. 4067 */ 4068 void 4069 vfs_syncall(void) 4070 { 4071 if (rootdir == NULL && !modrootloaded) 4072 return; /* no filesystems have been loaded yet */ 4073 4074 printf("syncing file systems..."); 4075 sync(); 4076 4077 sync_triesleft = sync_retries; 4078 4079 old_bufcnt = new_bufcnt = INT_MAX; 4080 old_pgcnt = new_pgcnt = ULONG_MAX; 4081 4082 while (sync_triesleft > 0) { 4083 old_bufcnt = MIN(old_bufcnt, new_bufcnt); 4084 old_pgcnt = MIN(old_pgcnt, new_pgcnt); 4085 4086 new_bufcnt = bio_busy(B_TRUE); 4087 new_pgcnt = page_busy(B_TRUE); 4088 4089 if (new_bufcnt == 0 && new_pgcnt == 0) 4090 break; 4091 4092 if (new_bufcnt < old_bufcnt || new_pgcnt < old_pgcnt) 4093 sync_triesleft = sync_retries; 4094 else 4095 sync_triesleft--; 4096 4097 if (new_bufcnt) 4098 printf(" [%d]", new_bufcnt); 4099 if (new_pgcnt) 4100 printf(" %lu", new_pgcnt); 4101 4102 delay(hz); 4103 } 4104 4105 if (new_bufcnt != 0 || new_pgcnt != 0) 4106 printf(" done (not all i/o completed)\n"); 4107 else 4108 printf(" done\n"); 4109 4110 delay(hz); 4111 } 4112 4113 /* 4114 * Map VFS flags to statvfs flags. These shouldn't really be separate 4115 * flags at all. 4116 */ 4117 uint_t 4118 vf_to_stf(uint_t vf) 4119 { 4120 uint_t stf = 0; 4121 4122 if (vf & VFS_RDONLY) 4123 stf |= ST_RDONLY; 4124 if (vf & VFS_NOSETUID) 4125 stf |= ST_NOSUID; 4126 if (vf & VFS_NOTRUNC) 4127 stf |= ST_NOTRUNC; 4128 4129 return (stf); 4130 } 4131 4132 /* 4133 * Entries for (illegal) fstype 0. 4134 */ 4135 /* ARGSUSED */ 4136 int 4137 vfsstray_sync(struct vfs *vfsp, short arg, struct cred *cr) 4138 { 4139 cmn_err(CE_PANIC, "stray vfs operation"); 4140 return (0); 4141 } 4142 4143 /* 4144 * Entries for (illegal) fstype 0. 4145 */ 4146 int 4147 vfsstray(void) 4148 { 4149 cmn_err(CE_PANIC, "stray vfs operation"); 4150 return (0); 4151 } 4152 4153 /* 4154 * Support for dealing with forced UFS unmount and its interaction with 4155 * LOFS. Could be used by any filesystem. 4156 * See bug 1203132. 4157 */ 4158 int 4159 vfs_EIO(void) 4160 { 4161 return (EIO); 4162 } 4163 4164 /* 4165 * We've gotta define the op for sync separately, since the compiler gets 4166 * confused if we mix and match ANSI and normal style prototypes when 4167 * a "short" argument is present and spits out a warning. 4168 */ 4169 /*ARGSUSED*/ 4170 int 4171 vfs_EIO_sync(struct vfs *vfsp, short arg, struct cred *cr) 4172 { 4173 return (EIO); 4174 } 4175 4176 vfs_t EIO_vfs; 4177 vfsops_t *EIO_vfsops; 4178 4179 /* 4180 * Called from startup() to initialize all loaded vfs's 4181 */ 4182 void 4183 vfsinit(void) 4184 { 4185 struct vfssw *vswp; 4186 int error; 4187 extern int vopstats_enabled; 4188 extern void vopstats_startup(); 4189 4190 static const fs_operation_def_t EIO_vfsops_template[] = { 4191 VFSNAME_MOUNT, { .error = vfs_EIO }, 4192 VFSNAME_UNMOUNT, { .error = vfs_EIO }, 4193 VFSNAME_ROOT, { .error = vfs_EIO }, 4194 VFSNAME_STATVFS, { .error = vfs_EIO }, 4195 VFSNAME_SYNC, { .vfs_sync = vfs_EIO_sync }, 4196 VFSNAME_VGET, { .error = vfs_EIO }, 4197 VFSNAME_MOUNTROOT, { .error = vfs_EIO }, 4198 VFSNAME_FREEVFS, { .error = vfs_EIO }, 4199 VFSNAME_VNSTATE, { .error = vfs_EIO }, 4200 VFSNAME_SYNCFS, { .error = vfs_EIO }, 4201 NULL, NULL 4202 }; 4203 4204 static const fs_operation_def_t stray_vfsops_template[] = { 4205 VFSNAME_MOUNT, { .error = vfsstray }, 4206 VFSNAME_UNMOUNT, { .error = vfsstray }, 4207 VFSNAME_ROOT, { .error = vfsstray }, 4208 VFSNAME_STATVFS, { .error = vfsstray }, 4209 VFSNAME_SYNC, { .vfs_sync = vfsstray_sync }, 4210 VFSNAME_VGET, { .error = vfsstray }, 4211 VFSNAME_MOUNTROOT, { .error = vfsstray }, 4212 VFSNAME_FREEVFS, { .error = vfsstray }, 4213 VFSNAME_VNSTATE, { .error = vfsstray }, 4214 VFSNAME_SYNCFS, { .error = vfsstray }, 4215 NULL, NULL 4216 }; 4217 4218 /* Create vfs cache */ 4219 vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs), 4220 sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0); 4221 4222 /* Initialize the vnode cache (file systems may use it during init). */ 4223 vn_create_cache(); 4224 4225 /* Setup event monitor framework */ 4226 fem_init(); 4227 4228 /* Initialize the dummy stray file system type. */ 4229 error = vfs_setfsops(0, stray_vfsops_template, NULL); 4230 4231 /* Initialize the dummy EIO file system. */ 4232 error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops); 4233 if (error != 0) { 4234 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template"); 4235 /* Shouldn't happen, but not bad enough to panic */ 4236 } 4237 4238 VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL); 4239 4240 /* 4241 * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup 4242 * on this vfs can immediately notice it's invalid. 4243 */ 4244 EIO_vfs.vfs_flag |= VFS_UNMOUNTED; 4245 4246 /* 4247 * Call the init routines of non-loadable filesystems only. 4248 * Filesystems which are loaded as separate modules will be 4249 * initialized by the module loading code instead. 4250 */ 4251 4252 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 4253 RLOCK_VFSSW(); 4254 if (vswp->vsw_init != NULL) 4255 (void) (*vswp->vsw_init)(vswp - vfssw, vswp->vsw_name); 4256 RUNLOCK_VFSSW(); 4257 } 4258 4259 vopstats_startup(); 4260 4261 if (vopstats_enabled) { 4262 /* EIO_vfs can collect stats, but we don't retrieve them */ 4263 initialize_vopstats(&EIO_vfs.vfs_vopstats); 4264 EIO_vfs.vfs_fstypevsp = NULL; 4265 EIO_vfs.vfs_vskap = NULL; 4266 EIO_vfs.vfs_flag |= VFS_STATS; 4267 } 4268 4269 xattr_init(); 4270 4271 reparse_point_init(); 4272 } 4273 4274 vfs_t * 4275 vfs_alloc(int kmflag) 4276 { 4277 vfs_t *vfsp; 4278 4279 vfsp = kmem_cache_alloc(vfs_cache, kmflag); 4280 4281 /* 4282 * Do the simplest initialization here. 4283 * Everything else gets done in vfs_init() 4284 */ 4285 bzero(vfsp, sizeof (vfs_t)); 4286 return (vfsp); 4287 } 4288 4289 void 4290 vfs_free(vfs_t *vfsp) 4291 { 4292 /* 4293 * One would be tempted to assert that "vfsp->vfs_count == 0". 4294 * The problem is that this gets called out of domount() with 4295 * a partially initialized vfs and a vfs_count of 1. This is 4296 * also called from vfs_rele() with a vfs_count of 0. We can't 4297 * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully 4298 * returned. This is because VFS_MOUNT() fully initializes the 4299 * vfs structure and its associated data. VFS_RELE() will call 4300 * VFS_FREEVFS() which may panic the system if the data structures 4301 * aren't fully initialized from a successful VFS_MOUNT()). 4302 */ 4303 4304 /* If FEM was in use, make sure everything gets cleaned up */ 4305 if (vfsp->vfs_femhead) { 4306 ASSERT(vfsp->vfs_femhead->femh_list == NULL); 4307 mutex_destroy(&vfsp->vfs_femhead->femh_lock); 4308 kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead))); 4309 vfsp->vfs_femhead = NULL; 4310 } 4311 4312 if (vfsp->vfs_implp) 4313 vfsimpl_teardown(vfsp); 4314 sema_destroy(&vfsp->vfs_reflock); 4315 kmem_cache_free(vfs_cache, vfsp); 4316 } 4317 4318 /* 4319 * Increments the vfs reference count by one atomically. 4320 */ 4321 void 4322 vfs_hold(vfs_t *vfsp) 4323 { 4324 atomic_inc_32(&vfsp->vfs_count); 4325 ASSERT(vfsp->vfs_count != 0); 4326 } 4327 4328 /* 4329 * Decrements the vfs reference count by one atomically. When 4330 * vfs reference count becomes zero, it calls the file system 4331 * specific vfs_freevfs() to free up the resources. 4332 */ 4333 void 4334 vfs_rele(vfs_t *vfsp) 4335 { 4336 ASSERT(vfsp->vfs_count != 0); 4337 if (atomic_dec_32_nv(&vfsp->vfs_count) == 0) { 4338 VFS_FREEVFS(vfsp); 4339 lofi_remove(vfsp); 4340 if (vfsp->vfs_zone) 4341 zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref, 4342 ZONE_REF_VFS); 4343 vfs_freemnttab(vfsp); 4344 vfs_free(vfsp); 4345 } 4346 } 4347 4348 /* 4349 * Generic operations vector support. 4350 * 4351 * This is used to build operations vectors for both the vfs and vnode. 4352 * It's normally called only when a file system is loaded. 4353 * 4354 * There are many possible algorithms for this, including the following: 4355 * 4356 * (1) scan the list of known operations; for each, see if the file system 4357 * includes an entry for it, and fill it in as appropriate. 4358 * 4359 * (2) set up defaults for all known operations. scan the list of ops 4360 * supplied by the file system; for each which is both supplied and 4361 * known, fill it in. 4362 * 4363 * (3) sort the lists of known ops & supplied ops; scan the list, filling 4364 * in entries as we go. 4365 * 4366 * we choose (1) for simplicity, and because performance isn't critical here. 4367 * note that (2) could be sped up using a precomputed hash table on known ops. 4368 * (3) could be faster than either, but only if the lists were very large or 4369 * supplied in sorted order. 4370 * 4371 */ 4372 4373 int 4374 fs_build_vector(void *vector, int *unused_ops, 4375 const fs_operation_trans_def_t *translation, 4376 const fs_operation_def_t *operations) 4377 { 4378 int i, num_trans, num_ops, used; 4379 4380 /* 4381 * Count the number of translations and the number of supplied 4382 * operations. 4383 */ 4384 4385 { 4386 const fs_operation_trans_def_t *p; 4387 4388 for (num_trans = 0, p = translation; 4389 p->name != NULL; 4390 num_trans++, p++) 4391 ; 4392 } 4393 4394 { 4395 const fs_operation_def_t *p; 4396 4397 for (num_ops = 0, p = operations; 4398 p->name != NULL; 4399 num_ops++, p++) 4400 ; 4401 } 4402 4403 /* Walk through each operation known to our caller. There will be */ 4404 /* one entry in the supplied "translation table" for each. */ 4405 4406 used = 0; 4407 4408 for (i = 0; i < num_trans; i++) { 4409 int j, found; 4410 char *curname; 4411 fs_generic_func_p result; 4412 fs_generic_func_p *location; 4413 4414 curname = translation[i].name; 4415 4416 /* Look for a matching operation in the list supplied by the */ 4417 /* file system. */ 4418 4419 found = 0; 4420 4421 for (j = 0; j < num_ops; j++) { 4422 if (strcmp(operations[j].name, curname) == 0) { 4423 used++; 4424 found = 1; 4425 break; 4426 } 4427 } 4428 4429 /* 4430 * If the file system is using a "placeholder" for default 4431 * or error functions, grab the appropriate function out of 4432 * the translation table. If the file system didn't supply 4433 * this operation at all, use the default function. 4434 */ 4435 4436 if (found) { 4437 result = operations[j].func.fs_generic; 4438 if (result == fs_default) { 4439 result = translation[i].defaultFunc; 4440 } else if (result == fs_error) { 4441 result = translation[i].errorFunc; 4442 } else if (result == NULL) { 4443 /* Null values are PROHIBITED */ 4444 return (EINVAL); 4445 } 4446 } else { 4447 result = translation[i].defaultFunc; 4448 } 4449 4450 /* Now store the function into the operations vector. */ 4451 4452 location = (fs_generic_func_p *) 4453 (((char *)vector) + translation[i].offset); 4454 4455 *location = result; 4456 } 4457 4458 *unused_ops = num_ops - used; 4459 4460 return (0); 4461 } 4462 4463 /* Placeholder functions, should never be called. */ 4464 4465 int 4466 fs_error(void) 4467 { 4468 cmn_err(CE_PANIC, "fs_error called"); 4469 return (0); 4470 } 4471 4472 int 4473 fs_default(void) 4474 { 4475 cmn_err(CE_PANIC, "fs_default called"); 4476 return (0); 4477 } 4478 4479 #ifdef __sparc 4480 4481 /* 4482 * Part of the implementation of booting off a mirrored root 4483 * involves a change of dev_t for the root device. To 4484 * accomplish this, first remove the existing hash table 4485 * entry for the root device, convert to the new dev_t, 4486 * then re-insert in the hash table at the head of the list. 4487 */ 4488 void 4489 vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype) 4490 { 4491 vfs_list_lock(); 4492 4493 vfs_hash_remove(vfsp); 4494 4495 vfsp->vfs_dev = ndev; 4496 vfs_make_fsid(&vfsp->vfs_fsid, ndev, fstype); 4497 4498 vfs_hash_add(vfsp, 1); 4499 4500 vfs_list_unlock(); 4501 } 4502 4503 #else /* x86 NEWBOOT */ 4504 4505 #if defined(__x86) 4506 extern int hvmboot_rootconf(); 4507 #endif /* __x86 */ 4508 4509 extern ib_boot_prop_t *iscsiboot_prop; 4510 4511 int 4512 rootconf() 4513 { 4514 int error; 4515 struct vfssw *vsw; 4516 extern void pm_init(); 4517 char *fstyp, *fsmod; 4518 int ret = -1; 4519 4520 getrootfs(&fstyp, &fsmod); 4521 4522 #if defined(__x86) 4523 /* 4524 * hvmboot_rootconf() is defined in the hvm_bootstrap misc module, 4525 * which lives in /platform/i86hvm, and hence is only available when 4526 * booted in an x86 hvm environment. If the hvm_bootstrap misc module 4527 * is not available then the modstub for this function will return 0. 4528 * If the hvm_bootstrap misc module is available it will be loaded 4529 * and hvmboot_rootconf() will be invoked. 4530 */ 4531 if (error = hvmboot_rootconf()) 4532 return (error); 4533 #endif /* __x86 */ 4534 4535 if (error = clboot_rootconf()) 4536 return (error); 4537 4538 if (modload("fs", fsmod) == -1) 4539 panic("Cannot _init %s module", fsmod); 4540 4541 RLOCK_VFSSW(); 4542 vsw = vfs_getvfsswbyname(fstyp); 4543 RUNLOCK_VFSSW(); 4544 if (vsw == NULL) { 4545 cmn_err(CE_CONT, "Cannot find %s filesystem\n", fstyp); 4546 return (ENXIO); 4547 } 4548 VFS_INIT(rootvfs, &vsw->vsw_vfsops, 0); 4549 VFS_HOLD(rootvfs); 4550 4551 /* always mount readonly first */ 4552 rootvfs->vfs_flag |= VFS_RDONLY; 4553 4554 pm_init(); 4555 4556 if (netboot && iscsiboot_prop) { 4557 cmn_err(CE_WARN, "NFS boot and iSCSI boot" 4558 " shouldn't happen in the same time"); 4559 return (EINVAL); 4560 } 4561 4562 if (netboot || iscsiboot_prop) { 4563 ret = strplumb(); 4564 if (ret != 0) { 4565 cmn_err(CE_WARN, "Cannot plumb network device %d", ret); 4566 return (EFAULT); 4567 } 4568 } 4569 4570 if ((ret == 0) && iscsiboot_prop) { 4571 ret = modload("drv", "iscsi"); 4572 /* -1 indicates fail */ 4573 if (ret == -1) { 4574 cmn_err(CE_WARN, "Failed to load iscsi module"); 4575 iscsi_boot_prop_free(); 4576 return (EINVAL); 4577 } else { 4578 if (!i_ddi_attach_pseudo_node("iscsi")) { 4579 cmn_err(CE_WARN, 4580 "Failed to attach iscsi driver"); 4581 iscsi_boot_prop_free(); 4582 return (ENODEV); 4583 } 4584 } 4585 } 4586 4587 error = VFS_MOUNTROOT(rootvfs, ROOT_INIT); 4588 vfs_unrefvfssw(vsw); 4589 rootdev = rootvfs->vfs_dev; 4590 4591 if (error) 4592 cmn_err(CE_CONT, "Cannot mount root on %s fstype %s\n", 4593 rootfs.bo_name, fstyp); 4594 else 4595 cmn_err(CE_CONT, "?root on %s fstype %s\n", 4596 rootfs.bo_name, fstyp); 4597 return (error); 4598 } 4599 4600 /* 4601 * XXX this is called by nfs only and should probably be removed 4602 * If booted with ASKNAME, prompt on the console for a filesystem 4603 * name and return it. 4604 */ 4605 void 4606 getfsname(char *askfor, char *name, size_t namelen) 4607 { 4608 if (boothowto & RB_ASKNAME) { 4609 printf("%s name: ", askfor); 4610 console_gets(name, namelen); 4611 } 4612 } 4613 4614 /* 4615 * Init the root filesystem type (rootfs.bo_fstype) from the "fstype" 4616 * property. 4617 * 4618 * Filesystem types starting with the prefix "nfs" are diskless clients; 4619 * init the root filename name (rootfs.bo_name), too. 4620 * 4621 * If we are booting via NFS we currently have these options: 4622 * nfs - dynamically choose NFS V2, V3, or V4 (default) 4623 * nfs2 - force NFS V2 4624 * nfs3 - force NFS V3 4625 * nfs4 - force NFS V4 4626 * Because we need to maintain backward compatibility with the naming 4627 * convention that the NFS V2 filesystem name is "nfs" (see vfs_conf.c) 4628 * we need to map "nfs" => "nfsdyn" and "nfs2" => "nfs". The dynamic 4629 * nfs module will map the type back to either "nfs", "nfs3", or "nfs4". 4630 * This is only for root filesystems, all other uses will expect 4631 * that "nfs" == NFS V2. 4632 */ 4633 static void 4634 getrootfs(char **fstypp, char **fsmodp) 4635 { 4636 char *propstr = NULL; 4637 4638 /* 4639 * Check fstype property; for diskless it should be one of "nfs", 4640 * "nfs2", "nfs3" or "nfs4". 4641 */ 4642 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 4643 DDI_PROP_DONTPASS, "fstype", &propstr) 4644 == DDI_SUCCESS) { 4645 (void) strncpy(rootfs.bo_fstype, propstr, BO_MAXFSNAME); 4646 ddi_prop_free(propstr); 4647 4648 /* 4649 * if the boot property 'fstype' is not set, but 'zfs-bootfs' is set, 4650 * assume the type of this root filesystem is 'zfs'. 4651 */ 4652 } else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 4653 DDI_PROP_DONTPASS, "zfs-bootfs", &propstr) 4654 == DDI_SUCCESS) { 4655 (void) strncpy(rootfs.bo_fstype, "zfs", BO_MAXFSNAME); 4656 ddi_prop_free(propstr); 4657 } 4658 4659 if (strncmp(rootfs.bo_fstype, "nfs", 3) != 0) { 4660 *fstypp = *fsmodp = rootfs.bo_fstype; 4661 return; 4662 } 4663 4664 ++netboot; 4665 4666 if (strcmp(rootfs.bo_fstype, "nfs2") == 0) 4667 (void) strcpy(rootfs.bo_fstype, "nfs"); 4668 else if (strcmp(rootfs.bo_fstype, "nfs") == 0) 4669 (void) strcpy(rootfs.bo_fstype, "nfsdyn"); 4670 4671 /* 4672 * check if path to network interface is specified in bootpath 4673 * or by a hypervisor domain configuration file. 4674 * XXPV - enable strlumb_get_netdev_path() 4675 */ 4676 if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS, 4677 "xpv-nfsroot")) { 4678 (void) strcpy(rootfs.bo_name, "/xpvd/xnf@0"); 4679 } else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 4680 DDI_PROP_DONTPASS, "bootpath", &propstr) 4681 == DDI_SUCCESS) { 4682 (void) strncpy(rootfs.bo_name, propstr, BO_MAXOBJNAME); 4683 ddi_prop_free(propstr); 4684 } else { 4685 rootfs.bo_name[0] = '\0'; 4686 } 4687 *fstypp = rootfs.bo_fstype; 4688 *fsmodp = "nfs"; 4689 } 4690 #endif 4691 4692 /* 4693 * VFS feature routines 4694 */ 4695 4696 #define VFTINDEX(feature) (((feature) >> 32) & 0xFFFFFFFF) 4697 #define VFTBITS(feature) ((feature) & 0xFFFFFFFFLL) 4698 4699 /* Register a feature in the vfs */ 4700 void 4701 vfs_set_feature(vfs_t *vfsp, vfs_feature_t feature) 4702 { 4703 /* Note that vfs_featureset[] is found in *vfsp->vfs_implp */ 4704 if (vfsp->vfs_implp == NULL) 4705 return; 4706 4707 vfsp->vfs_featureset[VFTINDEX(feature)] |= VFTBITS(feature); 4708 } 4709 4710 void 4711 vfs_clear_feature(vfs_t *vfsp, vfs_feature_t feature) 4712 { 4713 /* Note that vfs_featureset[] is found in *vfsp->vfs_implp */ 4714 if (vfsp->vfs_implp == NULL) 4715 return; 4716 vfsp->vfs_featureset[VFTINDEX(feature)] &= VFTBITS(~feature); 4717 } 4718 4719 /* 4720 * Query a vfs for a feature. 4721 * Returns 1 if feature is present, 0 if not 4722 */ 4723 int 4724 vfs_has_feature(vfs_t *vfsp, vfs_feature_t feature) 4725 { 4726 int ret = 0; 4727 4728 /* Note that vfs_featureset[] is found in *vfsp->vfs_implp */ 4729 if (vfsp->vfs_implp == NULL) 4730 return (ret); 4731 4732 if (vfsp->vfs_featureset[VFTINDEX(feature)] & VFTBITS(feature)) 4733 ret = 1; 4734 4735 return (ret); 4736 } 4737 4738 /* 4739 * Propagate feature set from one vfs to another 4740 */ 4741 void 4742 vfs_propagate_features(vfs_t *from, vfs_t *to) 4743 { 4744 int i; 4745 4746 if (to->vfs_implp == NULL || from->vfs_implp == NULL) 4747 return; 4748 4749 for (i = 1; i <= to->vfs_featureset[0]; i++) { 4750 to->vfs_featureset[i] = from->vfs_featureset[i]; 4751 } 4752 } 4753 4754 #define LOFINODE_PATH "/dev/lofi/%d" 4755 4756 /* 4757 * Return the vnode for the lofi node if there's a lofi mount in place. 4758 * Returns -1 when there's no lofi node, 0 on success, and > 0 on 4759 * failure. 4760 */ 4761 int 4762 vfs_get_lofi(vfs_t *vfsp, vnode_t **vpp) 4763 { 4764 char *path = NULL; 4765 int strsize; 4766 int err; 4767 4768 if (vfsp->vfs_lofi_id == 0) { 4769 *vpp = NULL; 4770 return (-1); 4771 } 4772 4773 strsize = snprintf(NULL, 0, LOFINODE_PATH, vfsp->vfs_lofi_id); 4774 path = kmem_alloc(strsize + 1, KM_SLEEP); 4775 (void) snprintf(path, strsize + 1, LOFINODE_PATH, vfsp->vfs_lofi_id); 4776 4777 /* 4778 * We may be inside a zone, so we need to use the /dev path, but 4779 * it's created asynchronously, so we wait here. 4780 */ 4781 for (;;) { 4782 err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp); 4783 4784 if (err != ENOENT) 4785 break; 4786 4787 if ((err = delay_sig(hz / 8)) == EINTR) 4788 break; 4789 } 4790 4791 if (err) 4792 *vpp = NULL; 4793 4794 kmem_free(path, strsize + 1); 4795 return (err); 4796 } 4797