1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 40 #pragma ident "%Z%%M% %I% %E% SMI" 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/user.h> 47 #include <sys/fstyp.h> 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/vfs.h> 53 #include <sys/fem.h> 54 #include <sys/mntent.h> 55 #include <sys/stat.h> 56 #include <sys/statvfs.h> 57 #include <sys/statfs.h> 58 #include <sys/cred.h> 59 #include <sys/vnode.h> 60 #include <sys/rwstlock.h> 61 #include <sys/dnlc.h> 62 #include <sys/file.h> 63 #include <sys/time.h> 64 #include <sys/atomic.h> 65 #include <sys/cmn_err.h> 66 #include <sys/buf.h> 67 #include <sys/swap.h> 68 #include <sys/debug.h> 69 #include <sys/vnode.h> 70 #include <sys/modctl.h> 71 #include <sys/ddi.h> 72 #include <sys/pathname.h> 73 #include <sys/bootconf.h> 74 #include <sys/dumphdr.h> 75 #include <sys/dc_ki.h> 76 #include <sys/poll.h> 77 #include <sys/sunddi.h> 78 #include <sys/sysmacros.h> 79 #include <sys/zone.h> 80 #include <sys/policy.h> 81 #include <sys/ctfs.h> 82 #include <sys/objfs.h> 83 #include <sys/console.h> 84 #include <sys/reboot.h> 85 86 #include <vm/page.h> 87 88 #include <fs/fs_subr.h> 89 90 /* Private interfaces to create vopstats-related data structures */ 91 extern void initialize_vopstats(vopstats_t *); 92 extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *); 93 extern vsk_anchor_t *get_vskstat_anchor(struct vfs *); 94 95 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int); 96 static void vfs_setmntopt_nolock(mntopts_t *, const char *, 97 const char *, int, int); 98 static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **); 99 static void vfs_freemnttab(struct vfs *); 100 static void vfs_freeopt(mntopt_t *); 101 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *); 102 static void vfs_swapopttbl(mntopts_t *, mntopts_t *); 103 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int); 104 static void vfs_createopttbl_extend(mntopts_t *, const char *, 105 const mntopts_t *); 106 static char **vfs_copycancelopt_extend(char **const, int); 107 static void vfs_freecancelopt(char **); 108 static char *getrootfs(void); 109 static int getmacpath(dev_info_t *, void *); 110 111 struct ipmnt { 112 struct ipmnt *mip_next; 113 dev_t mip_dev; 114 struct vfs *mip_vfsp; 115 }; 116 117 static kmutex_t vfs_miplist_mutex; 118 static struct ipmnt *vfs_miplist = NULL; 119 static struct ipmnt *vfs_miplist_end = NULL; 120 121 /* 122 * VFS global data. 123 */ 124 vnode_t *rootdir; /* pointer to root inode vnode. */ 125 vnode_t *devicesdir; /* pointer to inode of devices root */ 126 127 char *server_rootpath; /* root path for diskless clients */ 128 char *server_hostname; /* hostname of diskless server */ 129 130 static struct vfs root; 131 static struct vfs devices; 132 struct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */ 133 rvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */ 134 int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ 135 /* must be power of 2! */ 136 timespec_t vfs_mnttab_ctime; /* mnttab created time */ 137 timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ 138 char *vfs_dummyfstype = "\0"; 139 struct pollhead vfs_pollhd; /* for mnttab pollers */ 140 141 /* 142 * Table for generic options recognized in the VFS layer and acted 143 * on at this level before parsing file system specific options. 144 * The nosuid option is stronger than any of the devices and setuid 145 * options, so those are canceled when nosuid is seen. 146 * 147 * All options which are added here need to be added to the 148 * list of standard options in usr/src/cmd/fs.d/fslib.c as well. 149 */ 150 /* 151 * VFS Mount options table 152 */ 153 static char *ro_cancel[] = { MNTOPT_RW, NULL }; 154 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 155 static char *suid_cancel[] = { MNTOPT_NOSUID, NULL }; 156 static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES, 157 MNTOPT_NOSETUID, MNTOPT_SETUID, NULL }; 158 static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL }; 159 static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL }; 160 static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL }; 161 static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL }; 162 static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL }; 163 static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL }; 164 static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL }; 165 static char *noexec_cancel[] = { MNTOPT_EXEC, NULL }; 166 167 static const mntopt_t mntopts[] = { 168 /* 169 * option name cancel options default arg flags 170 */ 171 { MNTOPT_REMOUNT, NULL, NULL, 172 MO_NODISPLAY, (void *)0 }, 173 { MNTOPT_RO, ro_cancel, NULL, 0, 174 (void *)0 }, 175 { MNTOPT_RW, rw_cancel, NULL, 0, 176 (void *)0 }, 177 { MNTOPT_SUID, suid_cancel, NULL, 0, 178 (void *)0 }, 179 { MNTOPT_NOSUID, nosuid_cancel, NULL, 0, 180 (void *)0 }, 181 { MNTOPT_DEVICES, devices_cancel, NULL, 0, 182 (void *)0 }, 183 { MNTOPT_NODEVICES, nodevices_cancel, NULL, 0, 184 (void *)0 }, 185 { MNTOPT_SETUID, setuid_cancel, NULL, 0, 186 (void *)0 }, 187 { MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0, 188 (void *)0 }, 189 { MNTOPT_NBMAND, nbmand_cancel, NULL, 0, 190 (void *)0 }, 191 { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0, 192 (void *)0 }, 193 { MNTOPT_EXEC, exec_cancel, NULL, 0, 194 (void *)0 }, 195 { MNTOPT_NOEXEC, noexec_cancel, NULL, 0, 196 (void *)0 }, 197 }; 198 199 const mntopts_t vfs_mntopts = { 200 sizeof (mntopts) / sizeof (mntopt_t), 201 (mntopt_t *)&mntopts[0] 202 }; 203 204 /* 205 * File system operation dispatch functions. 206 */ 207 208 int 209 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 210 { 211 return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); 212 } 213 214 int 215 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr) 216 { 217 return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr); 218 } 219 220 int 221 fsop_root(vfs_t *vfsp, vnode_t **vpp) 222 { 223 refstr_t *mntpt; 224 int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp); 225 /* 226 * Make sure this root has a path. With lofs, it is possible to have 227 * a NULL mountpoint. 228 */ 229 if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) { 230 mntpt = vfs_getmntpoint(vfsp); 231 vn_setpath_str(*vpp, refstr_value(mntpt), 232 strlen(refstr_value(mntpt))); 233 refstr_rele(mntpt); 234 } 235 236 return (ret); 237 } 238 239 int 240 fsop_statfs(vfs_t *vfsp, statvfs64_t *sp) 241 { 242 return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp); 243 } 244 245 int 246 fsop_sync(vfs_t *vfsp, short flag, cred_t *cr) 247 { 248 return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr); 249 } 250 251 int 252 fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 253 { 254 return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp); 255 } 256 257 int 258 fsop_mountroot(vfs_t *vfsp, enum whymountroot reason) 259 { 260 return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason); 261 } 262 263 void 264 fsop_freefs(vfs_t *vfsp) 265 { 266 (*(vfsp)->vfs_op->vfs_freevfs)(vfsp); 267 } 268 269 int 270 fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate) 271 { 272 return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate)); 273 } 274 275 int 276 fsop_sync_by_kind(int fstype, short flag, cred_t *cr) 277 { 278 ASSERT((fstype >= 0) && (fstype < nfstype)); 279 280 if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype])) 281 return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr); 282 else 283 return (ENOTSUP); 284 } 285 286 /* 287 * File system initialization. vfs_setfsops() must be called from a file 288 * system's init routine. 289 */ 290 291 static int 292 fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, 293 int *unused_ops) 294 { 295 static const fs_operation_trans_def_t vfs_ops_table[] = { 296 VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount), 297 fs_nosys, fs_nosys, 298 299 VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount), 300 fs_nosys, fs_nosys, 301 302 VFSNAME_ROOT, offsetof(vfsops_t, vfs_root), 303 fs_nosys, fs_nosys, 304 305 VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs), 306 fs_nosys, fs_nosys, 307 308 VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync), 309 (fs_generic_func_p) fs_sync, 310 (fs_generic_func_p) fs_sync, /* No errors allowed */ 311 312 VFSNAME_VGET, offsetof(vfsops_t, vfs_vget), 313 fs_nosys, fs_nosys, 314 315 VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot), 316 fs_nosys, fs_nosys, 317 318 VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs), 319 (fs_generic_func_p)fs_freevfs, 320 (fs_generic_func_p)fs_freevfs, /* Shouldn't fail */ 321 322 VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate), 323 (fs_generic_func_p)fs_nosys, 324 (fs_generic_func_p)fs_nosys, 325 326 NULL, 0, NULL, NULL 327 }; 328 329 return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); 330 } 331 332 int 333 vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) 334 { 335 int error; 336 int unused_ops; 337 338 /* Verify that fstype refers to a loaded fs (and not fsid 0). */ 339 340 if ((fstype <= 0) || (fstype >= nfstype)) 341 return (EINVAL); 342 343 if (!ALLOCATED_VFSSW(&vfssw[fstype])) 344 return (EINVAL); 345 346 /* Set up the operations vector. */ 347 348 error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops); 349 350 if (error != 0) 351 return (error); 352 353 vfssw[fstype].vsw_flag |= VSW_INSTALLED; 354 355 if (actual != NULL) 356 *actual = &vfssw[fstype].vsw_vfsops; 357 358 #if DEBUG 359 if (unused_ops != 0) 360 cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied " 361 "but not used", vfssw[fstype].vsw_name, unused_ops); 362 #endif 363 364 return (0); 365 } 366 367 int 368 vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual) 369 { 370 int error; 371 int unused_ops; 372 373 *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP); 374 375 error = fs_copyfsops(template, *actual, &unused_ops); 376 if (error != 0) { 377 kmem_free(*actual, sizeof (vfsops_t)); 378 *actual = NULL; 379 return (error); 380 } 381 382 return (0); 383 } 384 385 /* 386 * Free a vfsops structure created as a result of vfs_makefsops(). 387 * NOTE: For a vfsops structure initialized by vfs_setfsops(), use 388 * vfs_freevfsops_by_type(). 389 */ 390 void 391 vfs_freevfsops(vfsops_t *vfsops) 392 { 393 kmem_free(vfsops, sizeof (vfsops_t)); 394 } 395 396 /* 397 * Since the vfsops structure is part of the vfssw table and wasn't 398 * really allocated, we're not really freeing anything. We keep 399 * the name for consistency with vfs_freevfsops(). We do, however, 400 * need to take care of a little bookkeeping. 401 * NOTE: For a vfsops structure created by vfs_setfsops(), use 402 * vfs_freevfsops_by_type(). 403 */ 404 int 405 vfs_freevfsops_by_type(int fstype) 406 { 407 408 /* Verify that fstype refers to a loaded fs (and not fsid 0). */ 409 if ((fstype <= 0) || (fstype >= nfstype)) 410 return (EINVAL); 411 412 WLOCK_VFSSW(); 413 if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) { 414 WUNLOCK_VFSSW(); 415 return (EINVAL); 416 } 417 418 vfssw[fstype].vsw_flag &= ~VSW_INSTALLED; 419 WUNLOCK_VFSSW(); 420 421 return (0); 422 } 423 424 /* Support routines used to reference vfs_op */ 425 426 /* Set the operations vector for a vfs */ 427 void 428 vfs_setops(vfs_t *vfsp, vfsops_t *vfsops) 429 { 430 vfsops_t *op; 431 432 ASSERT(vfsp != NULL); 433 ASSERT(vfsops != NULL); 434 435 op = vfsp->vfs_op; 436 membar_consumer(); 437 if ((vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) && 438 casptr(&vfsp->vfs_op, op, vfsops) == op) { 439 return; 440 } 441 fsem_setvfsops(vfsp, vfsops); 442 } 443 444 /* Retrieve the operations vector for a vfs */ 445 vfsops_t * 446 vfs_getops(vfs_t *vfsp) 447 { 448 vfsops_t *op; 449 450 ASSERT(vfsp != NULL); 451 452 op = vfsp->vfs_op; 453 membar_consumer(); 454 if ((vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) && 455 op == vfsp->vfs_op) { 456 return (op); 457 } else { 458 return (fsem_getvfsops(vfsp)); 459 } 460 } 461 462 /* 463 * Returns non-zero (1) if the vfsops matches that of the vfs. 464 * Returns zero (0) if not. 465 */ 466 int 467 vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops) 468 { 469 return (vfs_getops(vfsp) == vfsops); 470 } 471 472 /* 473 * Returns non-zero (1) if the file system has installed a non-default, 474 * non-error vfs_sync routine. Returns zero (0) otherwise. 475 */ 476 int 477 vfs_can_sync(vfs_t *vfsp) 478 { 479 /* vfs_sync() routine is not the default/error function */ 480 return (vfs_getops(vfsp)->vfs_sync != fs_sync); 481 } 482 483 /* 484 * Initialize a vfs structure. 485 */ 486 void 487 vfs_init(vfs_t *vfsp, vfsops_t *op, void *data) 488 { 489 vfsp->vfs_count = 0; 490 vfsp->vfs_next = vfsp; 491 vfsp->vfs_prev = vfsp; 492 vfsp->vfs_zone_next = vfsp; 493 vfsp->vfs_zone_prev = vfsp; 494 vfsp->vfs_flag = 0; 495 vfsp->vfs_data = (data); 496 vfsp->vfs_resource = NULL; 497 vfsp->vfs_mntpt = NULL; 498 vfsp->vfs_mntopts.mo_count = 0; 499 vfsp->vfs_mntopts.mo_list = NULL; 500 vfsp->vfs_implp = NULL; 501 vfsp->vfs_zone = NULL; 502 /* 503 * Note: Don't initialize any member of the vfs_impl_t structure 504 * here as it could be a problem for unbundled file systems. 505 */ 506 vfs_setops((vfsp), (op)); 507 sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); 508 } 509 510 /* 511 * Allocate and initialize the vfs implementation private data 512 * structure, vfs_impl_t. 513 */ 514 void 515 vfsimpl_setup(vfs_t *vfsp) 516 { 517 vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP); 518 /* Note that this are #define'd in vfs.h */ 519 vfsp->vfs_femhead = NULL; 520 vfsp->vfs_vskap = NULL; 521 vfsp->vfs_fstypevsp = NULL; 522 } 523 524 /* 525 * Release the vfs_impl_t structure, if it exists. Some unbundled 526 * filesystems may not use the newer version of vfs and thus 527 * would not contain this implementation private data structure. 528 */ 529 void 530 vfsimpl_teardown(vfs_t *vfsp) 531 { 532 vfs_impl_t *vip = vfsp->vfs_implp; 533 534 if (vip == NULL) 535 return; 536 537 if (vip->vi_femhead) { 538 ASSERT(vip->vi_femhead->femh_list == NULL); 539 mutex_destroy(&vip->vi_femhead->femh_lock); 540 kmem_free(vip->vi_femhead, sizeof (*(vip->vi_femhead))); 541 vip->vi_femhead = NULL; 542 } 543 544 kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t)); 545 vfsp->vfs_implp = NULL; 546 } 547 548 /* 549 * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs, 550 * fstatvfs, and sysfs moved to common/syscall. 551 */ 552 553 /* 554 * Update every mounted file system. We call the vfs_sync operation of 555 * each file system type, passing it a NULL vfsp to indicate that all 556 * mounted file systems of that type should be updated. 557 */ 558 void 559 vfs_sync(int flag) 560 { 561 struct vfssw *vswp; 562 RLOCK_VFSSW(); 563 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 564 if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 565 vfs_refvfssw(vswp); 566 RUNLOCK_VFSSW(); 567 (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag, 568 CRED()); 569 vfs_unrefvfssw(vswp); 570 RLOCK_VFSSW(); 571 } 572 } 573 RUNLOCK_VFSSW(); 574 } 575 576 void 577 sync(void) 578 { 579 vfs_sync(0); 580 } 581 582 /* 583 * External routines. 584 */ 585 586 krwlock_t vfssw_lock; /* lock accesses to vfssw */ 587 588 /* 589 * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(), 590 * but otherwise should be accessed only via vfs_list_lock() and 591 * vfs_list_unlock(). Also used to protect the timestamp for mods to the list. 592 */ 593 static krwlock_t vfslist; 594 595 /* 596 * Mount devfs on /devices. This is done right after root is mounted 597 * to provide device access support for the system 598 */ 599 static void 600 vfs_mountdevices(void) 601 { 602 struct vfssw *vsw; 603 struct vnode *mvp; 604 struct mounta mounta = { /* fake mounta for devfs_mount() */ 605 NULL, 606 NULL, 607 MS_SYSSPACE, 608 NULL, 609 NULL, 610 0, 611 NULL, 612 0 613 }; 614 615 /* 616 * _init devfs module to fill in the vfssw 617 */ 618 if (modload("fs", "devfs") == -1) 619 cmn_err(CE_PANIC, "Cannot _init devfs module\n"); 620 621 /* 622 * Hold vfs 623 */ 624 RLOCK_VFSSW(); 625 vsw = vfs_getvfsswbyname("devfs"); 626 VFS_INIT(&devices, &vsw->vsw_vfsops, NULL); 627 VFS_HOLD(&devices); 628 629 /* 630 * Locate mount point 631 */ 632 if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 633 cmn_err(CE_PANIC, "Cannot find /devices\n"); 634 635 /* 636 * Perform the mount of /devices 637 */ 638 if (VFS_MOUNT(&devices, mvp, &mounta, CRED())) 639 cmn_err(CE_PANIC, "Cannot mount /devices\n"); 640 641 RUNLOCK_VFSSW(); 642 643 /* 644 * Set appropriate members and add to vfs list for mnttab display 645 */ 646 vfs_setresource(&devices, "/devices"); 647 vfs_setmntpoint(&devices, "/devices"); 648 649 /* 650 * Hold the root of /devices so it won't go away 651 */ 652 if (VFS_ROOT(&devices, &devicesdir)) 653 cmn_err(CE_PANIC, "vfs_mountdevices: not devices root"); 654 VN_HOLD(devicesdir); 655 656 if (vfs_lock(&devices) != 0) { 657 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices"); 658 return; 659 } 660 661 if (vn_vfswlock(mvp) != 0) { 662 vfs_unlock(&devices); 663 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices"); 664 return; 665 } 666 667 vfs_add(mvp, &devices, 0); 668 vn_vfsunlock(mvp); 669 vfs_unlock(&devices); 670 } 671 672 /* 673 * Mount required filesystem. This is done right after root is mounted. 674 */ 675 static void 676 vfs_mountfs(char *module, char *spec, char *path) 677 { 678 struct vnode *mvp; 679 struct mounta mounta; 680 vfs_t *vfsp; 681 682 mounta.flags = MS_SYSSPACE | MS_DATA; 683 mounta.fstype = module; 684 mounta.spec = spec; 685 mounta.dir = path; 686 if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) { 687 cmn_err(CE_WARN, "Cannot find %s\n", path); 688 return; 689 } 690 if (domount(NULL, &mounta, mvp, CRED(), &vfsp)) 691 cmn_err(CE_WARN, "Cannot mount %s\n", path); 692 else 693 VFS_RELE(vfsp); 694 VN_RELE(mvp); 695 } 696 697 /* 698 * vfs_mountroot is called by main() to mount the root filesystem. 699 */ 700 void 701 vfs_mountroot(void) 702 { 703 struct vnode *rvp = NULL; 704 char *path; 705 size_t plen; 706 struct vfssw *vswp; 707 708 rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL); 709 rw_init(&vfslist, NULL, RW_DEFAULT, NULL); 710 711 /* 712 * Alloc the vfs hash bucket array and locks 713 */ 714 rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP); 715 716 /* 717 * Call machine-dependent routine "rootconf" to choose a root 718 * file system type. 719 */ 720 if (rootconf()) 721 cmn_err(CE_PANIC, "vfs_mountroot: cannot mount root"); 722 /* 723 * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir 724 * to point to it. These are used by lookuppn() so that it 725 * knows where to start from ('/' or '.'). 726 */ 727 vfs_setmntpoint(rootvfs, "/"); 728 if (VFS_ROOT(rootvfs, &rootdir)) 729 cmn_err(CE_PANIC, "vfs_mountroot: no root vnode"); 730 u.u_cdir = rootdir; 731 VN_HOLD(u.u_cdir); 732 u.u_rdir = NULL; 733 734 /* 735 * Setup the global zone's rootvp, now that it exists. 736 */ 737 global_zone->zone_rootvp = rootdir; 738 VN_HOLD(global_zone->zone_rootvp); 739 740 /* 741 * Notify the module code that it can begin using the 742 * root filesystem instead of the boot program's services. 743 */ 744 modrootloaded = 1; 745 /* 746 * Set up mnttab information for root 747 */ 748 vfs_setresource(rootvfs, rootfs.bo_name); 749 750 /* 751 * Notify cluster software that the root filesystem is available. 752 */ 753 clboot_mountroot(); 754 755 /* Now that we're all done with the root FS, set up its vopstats */ 756 if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) { 757 /* Set flag for statistics collection */ 758 if (vswp->vsw_flag & VSW_STATS) { 759 initialize_vopstats(&rootvfs->vfs_vopstats); 760 rootvfs->vfs_flag |= VFS_STATS; 761 rootvfs->vfs_fstypevsp = 762 get_fstype_vopstats(rootvfs, vswp); 763 rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs); 764 } 765 vfs_unrefvfssw(vswp); 766 } 767 768 /* 769 * Mount /devices, /system/contract, /etc/mnttab, /etc/svc/volatile, 770 * /system/object, and /proc. 771 */ 772 vfs_mountdevices(); 773 774 vfs_mountfs("ctfs", "ctfs", CTFS_ROOT); 775 vfs_mountfs("proc", "/proc", "/proc"); 776 vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab"); 777 vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile"); 778 vfs_mountfs("objfs", "objfs", OBJFS_ROOT); 779 780 #ifdef __sparc 781 /* 782 * This bit of magic can go away when we convert sparc to 783 * the new boot architecture based on ramdisk. 784 * 785 * Booting off a mirrored root volume: 786 * At this point, we have booted and mounted root on a 787 * single component of the mirror. Complete the boot 788 * by configuring SVM and converting the root to the 789 * dev_t of the mirrored root device. This dev_t conversion 790 * only works because the underlying device doesn't change. 791 */ 792 if (root_is_svm) { 793 if (svm_rootconf()) { 794 cmn_err(CE_PANIC, "vfs_mountroot: cannot remount root"); 795 } 796 797 /* 798 * mnttab should reflect the new root device 799 */ 800 vfs_lock_wait(rootvfs); 801 vfs_setresource(rootvfs, rootfs.bo_name); 802 vfs_unlock(rootvfs); 803 } 804 #endif /* __sparc */ 805 806 /* 807 * Look up the root device via devfs so that a dv_node is 808 * created for it. The vnode is never VN_RELE()ed. 809 * We allocate more than MAXPATHLEN so that the 810 * buffer passed to i_ddi_prompath_to_devfspath() is 811 * exactly MAXPATHLEN (the function expects a buffer 812 * of that length). 813 */ 814 plen = strlen("/devices"); 815 path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP); 816 (void) strcpy(path, "/devices"); 817 818 if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen) 819 != DDI_SUCCESS || 820 lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) { 821 822 /* NUL terminate in case "path" has garbage */ 823 path[plen + MAXPATHLEN - 1] = '\0'; 824 #ifdef DEBUG 825 cmn_err(CE_WARN, "!Cannot lookup root device: %s", path); 826 #endif 827 } 828 kmem_free(path, plen + MAXPATHLEN); 829 } 830 831 /* 832 * If remount failed and we're in a zone we need to check for the zone 833 * root path and strip it before the call to vfs_setpath(). 834 * 835 * If strpath doesn't begin with the zone_rootpath the original 836 * strpath is returned unchanged. 837 */ 838 static const char * 839 stripzonepath(const char *strpath) 840 { 841 char *str1, *str2; 842 int i; 843 zone_t *zonep = curproc->p_zone; 844 845 if (zonep->zone_rootpath == NULL || strpath == NULL) { 846 return (NULL); 847 } 848 849 /* 850 * we check for the end of the string at one past the 851 * current position because the zone_rootpath always 852 * ends with "/" but we don't want to strip that off. 853 */ 854 str1 = zonep->zone_rootpath; 855 str2 = (char *)strpath; 856 ASSERT(str1[0] != '\0'); 857 for (i = 0; str1[i + 1] != '\0'; i++) { 858 if (str1[i] != str2[i]) 859 return ((char *)strpath); 860 } 861 return (&str2[i]); 862 } 863 864 /* 865 * Common mount code. Called from the system call entry point, from autofs, 866 * and from pxfs. 867 * 868 * Takes the effective file system type, mount arguments, the mount point 869 * vnode, flags specifying whether the mount is a remount and whether it 870 * should be entered into the vfs list, and credentials. Fills in its vfspp 871 * parameter with the mounted file system instance's vfs. 872 * 873 * Note that the effective file system type is specified as a string. It may 874 * be null, in which case it's determined from the mount arguments, and may 875 * differ from the type specified in the mount arguments; this is a hook to 876 * allow interposition when instantiating file system instances. 877 * 878 * The caller is responsible for releasing its own hold on the mount point 879 * vp (this routine does its own hold when necessary). 880 * Also note that for remounts, the mount point vp should be the vnode for 881 * the root of the file system rather than the vnode that the file system 882 * is mounted on top of. 883 */ 884 int 885 domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, 886 struct vfs **vfspp) 887 { 888 struct vfssw *vswp; 889 vfsops_t *vfsops; 890 struct vfs *vfsp; 891 struct vnode *bvp; 892 dev_t bdev = 0; 893 mntopts_t mnt_mntopts; 894 int error = 0; 895 int copyout_error = 0; 896 int ovflags; 897 char *opts = uap->optptr; 898 char *inargs = opts; 899 int optlen = uap->optlen; 900 int remount; 901 int rdonly; 902 int nbmand = 0; 903 int delmip = 0; 904 int addmip = 0; 905 int splice = ((uap->flags & MS_NOSPLICE) == 0); 906 int fromspace = (uap->flags & MS_SYSSPACE) ? 907 UIO_SYSSPACE : UIO_USERSPACE; 908 char *resource = NULL, *mountpt = NULL; 909 refstr_t *oldresource, *oldmntpt; 910 struct pathname pn, rpn; 911 vsk_anchor_t *vskap; 912 913 /* 914 * The v_flag value for the mount point vp is permanently set 915 * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine 916 * for mount point locking. 917 */ 918 mutex_enter(&vp->v_lock); 919 vp->v_flag |= VVFSLOCK; 920 mutex_exit(&vp->v_lock); 921 922 mnt_mntopts.mo_count = 0; 923 /* 924 * Find the ops vector to use to invoke the file system-specific mount 925 * method. If the fsname argument is non-NULL, use it directly. 926 * Otherwise, dig the file system type information out of the mount 927 * arguments. 928 * 929 * A side effect is to hold the vfssw entry. 930 * 931 * Mount arguments can be specified in several ways, which are 932 * distinguished by flag bit settings. The preferred way is to set 933 * MS_OPTIONSTR, indicating an 8 argument mount with the file system 934 * type supplied as a character string and the last two arguments 935 * being a pointer to a character buffer and the size of the buffer. 936 * On entry, the buffer holds a null terminated list of options; on 937 * return, the string is the list of options the file system 938 * recognized. If MS_DATA is set arguments five and six point to a 939 * block of binary data which the file system interprets. 940 * A further wrinkle is that some callers don't set MS_FSS and MS_DATA 941 * consistently with these conventions. To handle them, we check to 942 * see whether the pointer to the file system name has a numeric value 943 * less than 256. If so, we treat it as an index. 944 */ 945 if (fsname != NULL) { 946 if ((vswp = vfs_getvfssw(fsname)) == NULL) { 947 return (EINVAL); 948 } 949 } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) { 950 size_t n; 951 uint_t fstype; 952 char name[FSTYPSZ]; 953 954 if ((fstype = (uintptr_t)uap->fstype) < 256) { 955 RLOCK_VFSSW(); 956 if (fstype == 0 || fstype >= nfstype || 957 !ALLOCATED_VFSSW(&vfssw[fstype])) { 958 RUNLOCK_VFSSW(); 959 return (EINVAL); 960 } 961 (void) strcpy(name, vfssw[fstype].vsw_name); 962 RUNLOCK_VFSSW(); 963 if ((vswp = vfs_getvfssw(name)) == NULL) 964 return (EINVAL); 965 } else { 966 /* 967 * Handle either kernel or user address space. 968 */ 969 if (uap->flags & MS_SYSSPACE) { 970 error = copystr(uap->fstype, name, 971 FSTYPSZ, &n); 972 } else { 973 error = copyinstr(uap->fstype, name, 974 FSTYPSZ, &n); 975 } 976 if (error) { 977 if (error == ENAMETOOLONG) 978 return (EINVAL); 979 return (error); 980 } 981 if ((vswp = vfs_getvfssw(name)) == NULL) 982 return (EINVAL); 983 } 984 } else { 985 if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) 986 return (EINVAL); 987 } 988 if (!VFS_INSTALLED(vswp)) 989 return (EINVAL); 990 vfsops = &vswp->vsw_vfsops; 991 992 vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); 993 /* 994 * Fetch mount options and parse them for generic vfs options 995 */ 996 if (uap->flags & MS_OPTIONSTR) { 997 /* 998 * Limit the buffer size 999 */ 1000 if (optlen < 0 || optlen > MAX_MNTOPT_STR) { 1001 error = EINVAL; 1002 goto errout; 1003 } 1004 if ((uap->flags & MS_SYSSPACE) == 0) { 1005 inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 1006 inargs[0] = '\0'; 1007 if (optlen) { 1008 error = copyinstr(opts, inargs, (size_t)optlen, 1009 NULL); 1010 if (error) { 1011 goto errout; 1012 } 1013 } 1014 } 1015 vfs_parsemntopts(&mnt_mntopts, inargs, 0); 1016 } 1017 /* 1018 * Flag bits override the options string. 1019 */ 1020 if (uap->flags & MS_REMOUNT) 1021 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0); 1022 if (uap->flags & MS_RDONLY) 1023 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0); 1024 if (uap->flags & MS_NOSUID) 1025 vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); 1026 1027 /* 1028 * Check if this is a remount; must be set in the option string and 1029 * the file system must support a remount option. 1030 */ 1031 if (remount = vfs_optionisset_nolock(&mnt_mntopts, 1032 MNTOPT_REMOUNT, NULL)) { 1033 if (!(vswp->vsw_flag & VSW_CANREMOUNT)) { 1034 error = ENOTSUP; 1035 goto errout; 1036 } 1037 uap->flags |= MS_REMOUNT; 1038 } 1039 1040 /* 1041 * uap->flags and vfs_optionisset() should agree. 1042 */ 1043 if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) { 1044 uap->flags |= MS_RDONLY; 1045 } 1046 if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) { 1047 uap->flags |= MS_NOSUID; 1048 } 1049 nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL); 1050 ASSERT(splice || !remount); 1051 /* 1052 * If we are splicing the fs into the namespace, 1053 * perform mount point checks. 1054 * 1055 * We want to resolve the path for the mount point to eliminate 1056 * '.' and ".." and symlinks in mount points; we can't do the 1057 * same for the resource string, since it would turn 1058 * "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do 1059 * this before grabbing vn_vfswlock(), because otherwise we 1060 * would deadlock with lookuppn(). 1061 */ 1062 if (splice) { 1063 ASSERT(vp->v_count > 0); 1064 1065 /* 1066 * Pick up mount point and device from appropriate space. 1067 */ 1068 if (pn_get(uap->spec, fromspace, &pn) == 0) { 1069 resource = kmem_alloc(pn.pn_pathlen + 1, 1070 KM_SLEEP); 1071 (void) strcpy(resource, pn.pn_path); 1072 pn_free(&pn); 1073 } 1074 /* 1075 * Do a lookupname prior to taking the 1076 * writelock. Mark this as completed if 1077 * successful for later cleanup and addition to 1078 * the mount in progress table. 1079 */ 1080 if ((uap->flags & MS_GLOBAL) == 0 && 1081 lookupname(uap->spec, fromspace, 1082 FOLLOW, NULL, &bvp) == 0) { 1083 addmip = 1; 1084 } 1085 1086 if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) { 1087 pathname_t *pnp; 1088 1089 if (*pn.pn_path != '/') { 1090 error = EINVAL; 1091 pn_free(&pn); 1092 goto errout; 1093 } 1094 pn_alloc(&rpn); 1095 /* 1096 * Kludge to prevent autofs from deadlocking with 1097 * itself when it calls domount(). 1098 * 1099 * If autofs is calling, it is because it is doing 1100 * (autofs) mounts in the process of an NFS mount. A 1101 * lookuppn() here would cause us to block waiting for 1102 * said NFS mount to complete, which can't since this 1103 * is the thread that was supposed to doing it. 1104 */ 1105 if (fromspace == UIO_USERSPACE) { 1106 if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL, 1107 NULL)) == 0) { 1108 pnp = &rpn; 1109 } else { 1110 /* 1111 * The file disappeared or otherwise 1112 * became inaccessible since we opened 1113 * it; might as well fail the mount 1114 * since the mount point is no longer 1115 * accessible. 1116 */ 1117 pn_free(&rpn); 1118 pn_free(&pn); 1119 goto errout; 1120 } 1121 } else { 1122 pnp = &pn; 1123 } 1124 mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP); 1125 (void) strcpy(mountpt, pnp->pn_path); 1126 1127 /* 1128 * If the addition of the zone's rootpath 1129 * would push us over a total path length 1130 * of MAXPATHLEN, we fail the mount with 1131 * ENAMETOOLONG, which is what we would have 1132 * gotten if we were trying to perform the same 1133 * mount in the global zone. 1134 * 1135 * strlen() doesn't count the trailing 1136 * '\0', but zone_rootpathlen counts both a 1137 * trailing '/' and the terminating '\0'. 1138 */ 1139 if ((curproc->p_zone->zone_rootpathlen - 1 + 1140 strlen(mountpt)) > MAXPATHLEN || 1141 (resource != NULL && 1142 (curproc->p_zone->zone_rootpathlen - 1 + 1143 strlen(resource)) > MAXPATHLEN)) { 1144 error = ENAMETOOLONG; 1145 } 1146 1147 pn_free(&rpn); 1148 pn_free(&pn); 1149 } 1150 1151 if (error) 1152 goto errout; 1153 1154 /* 1155 * Prevent path name resolution from proceeding past 1156 * the mount point. 1157 */ 1158 if (vn_vfswlock(vp) != 0) { 1159 error = EBUSY; 1160 goto errout; 1161 } 1162 1163 /* 1164 * Verify that it's legitimate to establish a mount on 1165 * the prospective mount point. 1166 */ 1167 if (vn_mountedvfs(vp) != NULL) { 1168 /* 1169 * The mount point lock was obtained after some 1170 * other thread raced through and established a mount. 1171 */ 1172 vn_vfsunlock(vp); 1173 error = EBUSY; 1174 goto errout; 1175 } 1176 if (vp->v_flag & VNOMOUNT) { 1177 vn_vfsunlock(vp); 1178 error = EINVAL; 1179 goto errout; 1180 } 1181 } 1182 if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) { 1183 uap->dataptr = NULL; 1184 uap->datalen = 0; 1185 } 1186 1187 /* 1188 * If this is a remount, we don't want to create a new VFS. 1189 * Instead, we pass the existing one with a remount flag. 1190 */ 1191 if (remount) { 1192 /* 1193 * Confirm that the mount point is the root vnode of the 1194 * file system that is being remounted. 1195 * This can happen if the user specifies a different 1196 * mount point directory pathname in the (re)mount command. 1197 * 1198 * Code below can only be reached if splice is true, so it's 1199 * safe to do vn_vfsunlock() here. 1200 */ 1201 if ((vp->v_flag & VROOT) == 0) { 1202 vn_vfsunlock(vp); 1203 error = ENOENT; 1204 goto errout; 1205 } 1206 /* 1207 * Disallow making file systems read-only unless file system 1208 * explicitly allows it in its vfssw. Ignore other flags. 1209 */ 1210 if (rdonly && vn_is_readonly(vp) == 0 && 1211 (vswp->vsw_flag & VSW_CANRWRO) == 0) { 1212 vn_vfsunlock(vp); 1213 error = EINVAL; 1214 goto errout; 1215 } 1216 /* 1217 * Changing the NBMAND setting on remounts is permitted 1218 * but logged since it can lead to unexpected behavior. 1219 * We also counsel against using it for / and /usr. 1220 */ 1221 if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) || 1222 (!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) { 1223 cmn_err(CE_WARN, "domount: nbmand turned %s via " 1224 "remounting %s", nbmand ? "on" : "off", 1225 refstr_value(vp->v_vfsp->vfs_mntpt)); 1226 } 1227 vfsp = vp->v_vfsp; 1228 ovflags = vfsp->vfs_flag; 1229 vfsp->vfs_flag |= VFS_REMOUNT; 1230 vfsp->vfs_flag &= ~VFS_RDONLY; 1231 } else { 1232 vfsp = kmem_alloc(sizeof (vfs_t), KM_SLEEP); 1233 VFS_INIT(vfsp, vfsops, NULL); 1234 } 1235 1236 VFS_HOLD(vfsp); 1237 1238 /* 1239 * The vfs_reflock is not used anymore the code below explicitly 1240 * holds it preventing others accesing it directly. 1241 */ 1242 if ((sema_tryp(&vfsp->vfs_reflock) == 0) && 1243 !(vfsp->vfs_flag & VFS_REMOUNT)) 1244 cmn_err(CE_WARN, 1245 "mount type %s couldn't get vfs_reflock\n", vswp->vsw_name); 1246 1247 /* 1248 * Lock the vfs. If this is a remount we want to avoid spurious umount 1249 * failures that happen as a side-effect of fsflush() and other mount 1250 * and unmount operations that might be going on simultaneously and 1251 * may have locked the vfs currently. To not return EBUSY immediately 1252 * here we use vfs_lock_wait() instead vfs_lock() for the remount case. 1253 */ 1254 if (!remount) { 1255 if (error = vfs_lock(vfsp)) { 1256 vfsp->vfs_flag = ovflags; 1257 if (splice) 1258 vn_vfsunlock(vp); 1259 if (vfsp->vfs_implp) 1260 vfsimpl_teardown(vfsp); 1261 kmem_free(vfsp, sizeof (struct vfs)); 1262 goto errout; 1263 } 1264 } else { 1265 vfs_lock_wait(vfsp); 1266 } 1267 1268 /* 1269 * Add device to mount in progress table, global mounts require special 1270 * handling. It is possible that we have already done the lookupname 1271 * on a spliced, non-global fs. If so, we don't want to do it again 1272 * since we cannot do a lookupname after taking the 1273 * wlock above. This case is for a non-spliced, non-global filesystem. 1274 */ 1275 if (!addmip) { 1276 if ((uap->flags & MS_GLOBAL) == 0 && 1277 lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) { 1278 addmip = 1; 1279 } 1280 } 1281 1282 if (addmip) { 1283 bdev = bvp->v_rdev; 1284 VN_RELE(bvp); 1285 vfs_addmip(bdev, vfsp); 1286 addmip = 0; 1287 delmip = 1; 1288 } 1289 /* 1290 * Invalidate cached entry for the mount point. 1291 */ 1292 if (splice) 1293 dnlc_purge_vp(vp); 1294 1295 /* 1296 * If have an option string but the filesystem doesn't supply a 1297 * prototype options table, create a table with the global 1298 * options and sufficient room to accept all the options in the 1299 * string. Then parse the passed in option string 1300 * accepting all the options in the string. This gives us an 1301 * option table with all the proper cancel properties for the 1302 * global options. 1303 * 1304 * Filesystems that supply a prototype options table are handled 1305 * earlier in this function. 1306 */ 1307 if (uap->flags & MS_OPTIONSTR) { 1308 if (!(vswp->vsw_flag & VSW_HASPROTO)) { 1309 mntopts_t tmp_mntopts; 1310 1311 tmp_mntopts.mo_count = 0; 1312 vfs_createopttbl_extend(&tmp_mntopts, inargs, 1313 &mnt_mntopts); 1314 vfs_parsemntopts(&tmp_mntopts, inargs, 1); 1315 vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts); 1316 vfs_freeopttbl(&tmp_mntopts); 1317 } 1318 } 1319 1320 /* 1321 * Serialize with zone creations. 1322 */ 1323 mount_in_progress(); 1324 /* 1325 * Instantiate (or reinstantiate) the file system. If appropriate, 1326 * splice it into the file system name space. 1327 * 1328 * We want VFS_MOUNT() to be able to override the vfs_resource 1329 * string if necessary (ie, mntfs), and also for a remount to 1330 * change the same (necessary when remounting '/' during boot). 1331 * So we set up vfs_mntpt and vfs_resource to what we think they 1332 * should be, then hand off control to VFS_MOUNT() which can 1333 * override this. 1334 * 1335 * For safety's sake, when changing vfs_resource or vfs_mntpt of 1336 * a vfs which is on the vfs list (i.e. during a remount), we must 1337 * never set those fields to NULL. Several bits of code make 1338 * assumptions that the fields are always valid. 1339 */ 1340 vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1341 if (remount) { 1342 if ((oldresource = vfsp->vfs_resource) != NULL) 1343 refstr_hold(oldresource); 1344 if ((oldmntpt = vfsp->vfs_mntpt) != NULL) 1345 refstr_hold(oldmntpt); 1346 } 1347 vfs_setresource(vfsp, resource); 1348 vfs_setmntpoint(vfsp, mountpt); 1349 1350 error = VFS_MOUNT(vfsp, vp, uap, credp); 1351 1352 if (uap->flags & MS_RDONLY) 1353 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1354 if (uap->flags & MS_NOSUID) 1355 vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0); 1356 if (uap->flags & MS_GLOBAL) 1357 vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0); 1358 1359 if (error) { 1360 if (remount) { 1361 /* put back pre-remount options */ 1362 vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1363 vfs_setmntpoint(vfsp, (stripzonepath( 1364 refstr_value(oldmntpt)))); 1365 if (oldmntpt) 1366 refstr_rele(oldmntpt); 1367 vfs_setresource(vfsp, (stripzonepath( 1368 refstr_value(oldresource)))); 1369 if (oldresource) 1370 refstr_rele(oldresource); 1371 vfsp->vfs_flag = ovflags; 1372 vfs_unlock(vfsp); 1373 VFS_RELE(vfsp); 1374 } else { 1375 vfs_unlock(vfsp); 1376 vfs_freemnttab(vfsp); 1377 if (vfsp->vfs_implp) 1378 vfsimpl_teardown(vfsp); 1379 kmem_free(vfsp, sizeof (struct vfs)); 1380 } 1381 } else { 1382 /* 1383 * Set the mount time to now 1384 */ 1385 vfsp->vfs_mtime = ddi_get_time(); 1386 if (remount) { 1387 vfsp->vfs_flag &= ~VFS_REMOUNT; 1388 if (oldresource) 1389 refstr_rele(oldresource); 1390 if (oldmntpt) 1391 refstr_rele(oldmntpt); 1392 } else if (splice) { 1393 /* 1394 * Link vfsp into the name space at the mount 1395 * point. Vfs_add() is responsible for 1396 * holding the mount point which will be 1397 * released when vfs_remove() is called. 1398 */ 1399 vfs_add(vp, vfsp, uap->flags); 1400 } else { 1401 /* 1402 * Hold the reference to file system which is 1403 * not linked into the name space. 1404 */ 1405 vfsp->vfs_zone = NULL; 1406 VFS_HOLD(vfsp); 1407 vfsp->vfs_vnodecovered = NULL; 1408 } 1409 /* 1410 * Set flags for global options encountered 1411 */ 1412 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 1413 vfsp->vfs_flag |= VFS_RDONLY; 1414 else 1415 vfsp->vfs_flag &= ~VFS_RDONLY; 1416 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 1417 vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES); 1418 } else { 1419 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 1420 vfsp->vfs_flag |= VFS_NODEVICES; 1421 else 1422 vfsp->vfs_flag &= ~VFS_NODEVICES; 1423 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 1424 vfsp->vfs_flag |= VFS_NOSETUID; 1425 else 1426 vfsp->vfs_flag &= ~VFS_NOSETUID; 1427 } 1428 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 1429 vfsp->vfs_flag |= VFS_NBMAND; 1430 else 1431 vfsp->vfs_flag &= ~VFS_NBMAND; 1432 1433 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 1434 vfsp->vfs_flag |= VFS_XATTR; 1435 else 1436 vfsp->vfs_flag &= ~VFS_XATTR; 1437 1438 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 1439 vfsp->vfs_flag |= VFS_NOEXEC; 1440 else 1441 vfsp->vfs_flag &= ~VFS_NOEXEC; 1442 1443 /* 1444 * Now construct the output option string of options 1445 * we recognized. 1446 */ 1447 if (uap->flags & MS_OPTIONSTR) { 1448 vfs_list_read_lock(); 1449 copyout_error = vfs_buildoptionstr( 1450 &vfsp->vfs_mntopts, inargs, optlen); 1451 vfs_list_unlock(); 1452 if (copyout_error == 0 && 1453 (uap->flags & MS_SYSSPACE) == 0) { 1454 copyout_error = copyoutstr(inargs, opts, 1455 optlen, NULL); 1456 } 1457 } 1458 1459 /* 1460 * If this isn't a remount, set up the vopstats before 1461 * anyone can touch this. We only allow spliced file 1462 * systems (file systems which are in the namespace) to 1463 * have the VFS_STATS flag set. 1464 * NOTE: PxFS mounts the underlying file system with 1465 * MS_NOSPLICE set and copies those vfs_flags to its private 1466 * vfs structure. As a result, PxFS should never have 1467 * the VFS_STATS flag or else we might access the vfs 1468 * statistics-related fields prior to them being 1469 * properly initialized. 1470 */ 1471 if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) { 1472 initialize_vopstats(&vfsp->vfs_vopstats); 1473 /* 1474 * We need to set vfs_vskap to NULL because there's 1475 * a chance it won't be set below. This is checked 1476 * in teardown_vopstats() so we can't have garbage. 1477 */ 1478 vfsp->vfs_vskap = NULL; 1479 vfsp->vfs_flag |= VFS_STATS; 1480 vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp); 1481 } 1482 1483 vfs_unlock(vfsp); 1484 } 1485 mount_completed(); 1486 if (splice) 1487 vn_vfsunlock(vp); 1488 1489 if ((error == 0) && (copyout_error == 0)) { 1490 if (!remount) { 1491 /* 1492 * Don't call get_vskstat_anchor() while holding 1493 * locks since it allocates memory and calls 1494 * VFS_STATVFS(). For NFS, the latter can generate 1495 * an over-the-wire call. 1496 */ 1497 vskap = get_vskstat_anchor(vfsp); 1498 /* Only take the lock if we have something to do */ 1499 if (vskap != NULL) { 1500 vfs_lock_wait(vfsp); 1501 if (vfsp->vfs_flag & VFS_STATS) { 1502 vfsp->vfs_vskap = vskap; 1503 } 1504 vfs_unlock(vfsp); 1505 } 1506 } 1507 /* Return vfsp to caller. */ 1508 *vfspp = vfsp; 1509 } 1510 errout: 1511 vfs_freeopttbl(&mnt_mntopts); 1512 if (resource != NULL) 1513 kmem_free(resource, strlen(resource) + 1); 1514 if (mountpt != NULL) 1515 kmem_free(mountpt, strlen(mountpt) + 1); 1516 /* 1517 * It is possible we errored prior to adding to mount in progress 1518 * table. Must free vnode we acquired with successful lookupname. 1519 */ 1520 if (addmip) 1521 VN_RELE(bvp); 1522 if (delmip) 1523 vfs_delmip(vfsp); 1524 ASSERT(vswp != NULL); 1525 vfs_unrefvfssw(vswp); 1526 if (inargs != opts) 1527 kmem_free(inargs, MAX_MNTOPT_STR); 1528 if (copyout_error) { 1529 VFS_RELE(vfsp); 1530 error = copyout_error; 1531 } 1532 return (error); 1533 } 1534 1535 static void 1536 vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath) 1537 { 1538 size_t len; 1539 refstr_t *ref; 1540 zone_t *zone = curproc->p_zone; 1541 char *sp; 1542 int have_list_lock = 0; 1543 1544 ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp)); 1545 1546 /* 1547 * New path must be less than MAXPATHLEN because mntfs 1548 * will only display up to MAXPATHLEN bytes. This is currently 1549 * safe, because domount() uses pn_get(), and other callers 1550 * similarly cap the size to fewer than MAXPATHLEN bytes. 1551 */ 1552 1553 ASSERT(strlen(newpath) < MAXPATHLEN); 1554 1555 /* mntfs requires consistency while vfs list lock is held */ 1556 1557 if (VFS_ON_LIST(vfsp)) { 1558 have_list_lock = 1; 1559 vfs_list_lock(); 1560 } 1561 1562 if (*refp != NULL) 1563 refstr_rele(*refp); 1564 1565 /* Do we need to modify the path? */ 1566 1567 if (zone == global_zone || *newpath != '/') { 1568 ref = refstr_alloc(newpath); 1569 goto out; 1570 } 1571 1572 /* 1573 * Truncate the trailing '/' in the zoneroot, and merge 1574 * in the zone's rootpath with the "newpath" (resource 1575 * or mountpoint) passed in. 1576 * 1577 * The size of the required buffer is thus the size of 1578 * the buffer required for the passed-in newpath 1579 * (strlen(newpath) + 1), plus the size of the buffer 1580 * required to hold zone_rootpath (zone_rootpathlen) 1581 * minus one for one of the now-superfluous NUL 1582 * terminations, minus one for the trailing '/'. 1583 * 1584 * That gives us: 1585 * 1586 * (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1 1587 * 1588 * Which is what we have below. 1589 */ 1590 1591 len = strlen(newpath) + zone->zone_rootpathlen - 1; 1592 sp = kmem_alloc(len, KM_SLEEP); 1593 1594 /* 1595 * Copy everything including the trailing slash, which 1596 * we then overwrite with the NUL character. 1597 */ 1598 1599 (void) strcpy(sp, zone->zone_rootpath); 1600 sp[zone->zone_rootpathlen - 2] = '\0'; 1601 (void) strcat(sp, newpath); 1602 1603 ref = refstr_alloc(sp); 1604 kmem_free(sp, len); 1605 out: 1606 *refp = ref; 1607 1608 if (have_list_lock) { 1609 vfs_mnttab_modtimeupd(); 1610 vfs_list_unlock(); 1611 } 1612 } 1613 1614 /* 1615 * Record a mounted resource name in a vfs structure. 1616 * If vfsp is already mounted, caller must hold the vfs lock. 1617 */ 1618 void 1619 vfs_setresource(struct vfs *vfsp, const char *resource) 1620 { 1621 if (resource == NULL || resource[0] == '\0') 1622 resource = VFS_NORESOURCE; 1623 vfs_setpath(vfsp, &vfsp->vfs_resource, resource); 1624 } 1625 1626 /* 1627 * Record a mount point name in a vfs structure. 1628 * If vfsp is already mounted, caller must hold the vfs lock. 1629 */ 1630 void 1631 vfs_setmntpoint(struct vfs *vfsp, const char *mntpt) 1632 { 1633 if (mntpt == NULL || mntpt[0] == '\0') 1634 mntpt = VFS_NOMNTPT; 1635 vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt); 1636 } 1637 1638 /* Returns the vfs_resource. Caller must call refstr_rele() when finished. */ 1639 1640 refstr_t * 1641 vfs_getresource(const struct vfs *vfsp) 1642 { 1643 refstr_t *resource; 1644 1645 vfs_list_read_lock(); 1646 resource = vfsp->vfs_resource; 1647 refstr_hold(resource); 1648 vfs_list_unlock(); 1649 1650 return (resource); 1651 } 1652 1653 /* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */ 1654 1655 refstr_t * 1656 vfs_getmntpoint(const struct vfs *vfsp) 1657 { 1658 refstr_t *mntpt; 1659 1660 vfs_list_read_lock(); 1661 mntpt = vfsp->vfs_mntpt; 1662 refstr_hold(mntpt); 1663 vfs_list_unlock(); 1664 1665 return (mntpt); 1666 } 1667 1668 /* 1669 * Create an empty options table with enough empty slots to hold all 1670 * The options in the options string passed as an argument. 1671 * Potentially prepend another options table. 1672 * 1673 * Note: caller is responsible for locking the vfs list, if needed, 1674 * to protect mops. 1675 */ 1676 static void 1677 vfs_createopttbl_extend(mntopts_t *mops, const char *opts, 1678 const mntopts_t *mtmpl) 1679 { 1680 const char *s = opts; 1681 uint_t count; 1682 1683 if (opts == NULL || *opts == '\0') { 1684 count = 0; 1685 } else { 1686 count = 1; 1687 1688 /* 1689 * Count number of options in the string 1690 */ 1691 for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) { 1692 count++; 1693 s++; 1694 } 1695 } 1696 vfs_copyopttbl_extend(mtmpl, mops, count); 1697 } 1698 1699 /* 1700 * Create an empty options table with enough empty slots to hold all 1701 * The options in the options string passed as an argument. 1702 * 1703 * This function is *not* for general use by filesystems. 1704 * 1705 * Note: caller is responsible for locking the vfs list, if needed, 1706 * to protect mops. 1707 */ 1708 void 1709 vfs_createopttbl(mntopts_t *mops, const char *opts) 1710 { 1711 vfs_createopttbl_extend(mops, opts, NULL); 1712 } 1713 1714 1715 /* 1716 * Swap two mount options tables 1717 */ 1718 static void 1719 vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2) 1720 { 1721 uint_t tmpcnt; 1722 mntopt_t *tmplist; 1723 1724 tmpcnt = optbl2->mo_count; 1725 tmplist = optbl2->mo_list; 1726 optbl2->mo_count = optbl1->mo_count; 1727 optbl2->mo_list = optbl1->mo_list; 1728 optbl1->mo_count = tmpcnt; 1729 optbl1->mo_list = tmplist; 1730 } 1731 1732 static void 1733 vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2) 1734 { 1735 vfs_list_lock(); 1736 vfs_swapopttbl_nolock(optbl1, optbl2); 1737 vfs_mnttab_modtimeupd(); 1738 vfs_list_unlock(); 1739 } 1740 1741 static char ** 1742 vfs_copycancelopt_extend(char **const moc, int extend) 1743 { 1744 int i = 0; 1745 int j; 1746 char **result; 1747 1748 if (moc != NULL) { 1749 for (; moc[i] != NULL; i++) 1750 /* count number of options to cancel */; 1751 } 1752 1753 if (i + extend == 0) 1754 return (NULL); 1755 1756 result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP); 1757 1758 for (j = 0; j < i; j++) { 1759 result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP); 1760 (void) strcpy(result[j], moc[j]); 1761 } 1762 for (; j <= i + extend; j++) 1763 result[j] = NULL; 1764 1765 return (result); 1766 } 1767 1768 static void 1769 vfs_copyopt(const mntopt_t *s, mntopt_t *d) 1770 { 1771 char *sp, *dp; 1772 1773 d->mo_flags = s->mo_flags; 1774 d->mo_data = s->mo_data; 1775 sp = s->mo_name; 1776 if (sp != NULL) { 1777 dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 1778 (void) strcpy(dp, sp); 1779 d->mo_name = dp; 1780 } else { 1781 d->mo_name = NULL; /* should never happen */ 1782 } 1783 1784 d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0); 1785 1786 sp = s->mo_arg; 1787 if (sp != NULL) { 1788 dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 1789 (void) strcpy(dp, sp); 1790 d->mo_arg = dp; 1791 } else { 1792 d->mo_arg = NULL; 1793 } 1794 } 1795 1796 /* 1797 * Copy a mount options table, possibly allocating some spare 1798 * slots at the end. It is permissible to copy_extend the NULL table. 1799 */ 1800 static void 1801 vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra) 1802 { 1803 uint_t i, count; 1804 mntopt_t *motbl; 1805 1806 /* 1807 * Clear out any existing stuff in the options table being initialized 1808 */ 1809 vfs_freeopttbl(dmo); 1810 count = (smo == NULL) ? 0 : smo->mo_count; 1811 if ((count + extra) == 0) /* nothing to do */ 1812 return; 1813 dmo->mo_count = count + extra; 1814 motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP); 1815 dmo->mo_list = motbl; 1816 for (i = 0; i < count; i++) { 1817 vfs_copyopt(&smo->mo_list[i], &motbl[i]); 1818 } 1819 for (i = count; i < count + extra; i++) { 1820 motbl[i].mo_flags = MO_EMPTY; 1821 } 1822 } 1823 1824 /* 1825 * Copy a mount options table. 1826 * 1827 * This function is *not* for general use by filesystems. 1828 * 1829 * Note: caller is responsible for locking the vfs list, if needed, 1830 * to protect smo and dmo. 1831 */ 1832 void 1833 vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo) 1834 { 1835 vfs_copyopttbl_extend(smo, dmo, 0); 1836 } 1837 1838 static char ** 1839 vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2) 1840 { 1841 int c1 = 0; 1842 int c2 = 0; 1843 char **result; 1844 char **sp1, **sp2, **dp; 1845 1846 /* 1847 * First we count both lists of cancel options. 1848 * If either is NULL or has no elements, we return a copy of 1849 * the other. 1850 */ 1851 if (mop1->mo_cancel != NULL) { 1852 for (; mop1->mo_cancel[c1] != NULL; c1++) 1853 /* count cancel options in mop1 */; 1854 } 1855 1856 if (c1 == 0) 1857 return (vfs_copycancelopt_extend(mop2->mo_cancel, 0)); 1858 1859 if (mop2->mo_cancel != NULL) { 1860 for (; mop2->mo_cancel[c2] != NULL; c2++) 1861 /* count cancel options in mop2 */; 1862 } 1863 1864 result = vfs_copycancelopt_extend(mop1->mo_cancel, c2); 1865 1866 if (c2 == 0) 1867 return (result); 1868 1869 /* 1870 * When we get here, we've got two sets of cancel options; 1871 * we need to merge the two sets. We know that the result 1872 * array has "c1+c2+1" entries and in the end we might shrink 1873 * it. 1874 * Result now has a copy of the c1 entries from mop1; we'll 1875 * now lookup all the entries of mop2 in mop1 and copy it if 1876 * it is unique. 1877 * This operation is O(n^2) but it's only called once per 1878 * filesystem per duplicate option. This is a situation 1879 * which doesn't arise with the filesystems in ON and 1880 * n is generally 1. 1881 */ 1882 1883 dp = &result[c1]; 1884 for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) { 1885 for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) { 1886 if (strcmp(*sp1, *sp2) == 0) 1887 break; 1888 } 1889 if (*sp1 == NULL) { 1890 /* 1891 * Option *sp2 not found in mop1, so copy it. 1892 * The calls to vfs_copycancelopt_extend() 1893 * guarantee that there's enough room. 1894 */ 1895 *dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP); 1896 (void) strcpy(*dp++, *sp2); 1897 } 1898 } 1899 if (dp != &result[c1+c2]) { 1900 size_t bytes = (dp - result + 1) * sizeof (char *); 1901 char **nres = kmem_alloc(bytes, KM_SLEEP); 1902 1903 bcopy(result, nres, bytes); 1904 kmem_free(result, (c1 + c2 + 1) * sizeof (char *)); 1905 result = nres; 1906 } 1907 return (result); 1908 } 1909 1910 /* 1911 * Merge two mount option tables (outer and inner) into one. This is very 1912 * similar to "merging" global variables and automatic variables in C. 1913 * 1914 * This isn't (and doesn't have to be) fast. 1915 * 1916 * This function is *not* for general use by filesystems. 1917 * 1918 * Note: caller is responsible for locking the vfs list, if needed, 1919 * to protect omo, imo & dmo. 1920 */ 1921 void 1922 vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo) 1923 { 1924 uint_t i, count; 1925 mntopt_t *mop, *motbl; 1926 uint_t freeidx; 1927 1928 /* 1929 * First determine how much space we need to allocate. 1930 */ 1931 count = omo->mo_count; 1932 for (i = 0; i < imo->mo_count; i++) { 1933 if (imo->mo_list[i].mo_flags & MO_EMPTY) 1934 continue; 1935 if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL) 1936 count++; 1937 } 1938 ASSERT(count >= omo->mo_count && 1939 count <= omo->mo_count + imo->mo_count); 1940 motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP); 1941 for (i = 0; i < omo->mo_count; i++) 1942 vfs_copyopt(&omo->mo_list[i], &motbl[i]); 1943 freeidx = omo->mo_count; 1944 for (i = 0; i < imo->mo_count; i++) { 1945 if (imo->mo_list[i].mo_flags & MO_EMPTY) 1946 continue; 1947 if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) { 1948 char **newcanp; 1949 uint_t index = mop - omo->mo_list; 1950 1951 newcanp = vfs_mergecancelopts(mop, &motbl[index]); 1952 1953 vfs_freeopt(&motbl[index]); 1954 vfs_copyopt(&imo->mo_list[i], &motbl[index]); 1955 1956 vfs_freecancelopt(motbl[index].mo_cancel); 1957 motbl[index].mo_cancel = newcanp; 1958 } else { 1959 /* 1960 * If it's a new option, just copy it over to the first 1961 * free location. 1962 */ 1963 vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]); 1964 } 1965 } 1966 dmo->mo_count = count; 1967 dmo->mo_list = motbl; 1968 } 1969 1970 /* 1971 * Functions to set and clear mount options in a mount options table. 1972 */ 1973 1974 /* 1975 * Clear a mount option, if it exists. 1976 * 1977 * The update_mnttab arg indicates whether mops is part of a vfs that is on 1978 * the vfs list. 1979 */ 1980 static void 1981 vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab) 1982 { 1983 struct mntopt *mop; 1984 uint_t i, count; 1985 1986 ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 1987 1988 count = mops->mo_count; 1989 for (i = 0; i < count; i++) { 1990 mop = &mops->mo_list[i]; 1991 1992 if (mop->mo_flags & MO_EMPTY) 1993 continue; 1994 if (strcmp(opt, mop->mo_name)) 1995 continue; 1996 mop->mo_flags &= ~MO_SET; 1997 if (mop->mo_arg != NULL) { 1998 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 1999 } 2000 mop->mo_arg = NULL; 2001 if (update_mnttab) 2002 vfs_mnttab_modtimeupd(); 2003 break; 2004 } 2005 } 2006 2007 void 2008 vfs_clearmntopt(struct vfs *vfsp, const char *opt) 2009 { 2010 int gotlock = 0; 2011 2012 if (VFS_ON_LIST(vfsp)) { 2013 gotlock = 1; 2014 vfs_list_lock(); 2015 } 2016 vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock); 2017 if (gotlock) 2018 vfs_list_unlock(); 2019 } 2020 2021 2022 /* 2023 * Set a mount option on. If it's not found in the table, it's silently 2024 * ignored. If the option has MO_IGNORE set, it is still set unless the 2025 * VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag 2026 * bits can be used to toggle the MO_NODISPLAY bit for the option. 2027 * If the VFS_CREATEOPT flag bit is set then the first option slot with 2028 * MO_EMPTY set is created as the option passed in. 2029 * 2030 * The update_mnttab arg indicates whether mops is part of a vfs that is on 2031 * the vfs list. 2032 */ 2033 static void 2034 vfs_setmntopt_nolock(mntopts_t *mops, const char *opt, 2035 const char *arg, int flags, int update_mnttab) 2036 { 2037 mntopt_t *mop; 2038 uint_t i, count; 2039 char *sp; 2040 2041 ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 2042 2043 if (flags & VFS_CREATEOPT) { 2044 if (vfs_hasopt(mops, opt) != NULL) { 2045 flags &= ~VFS_CREATEOPT; 2046 } 2047 } 2048 count = mops->mo_count; 2049 for (i = 0; i < count; i++) { 2050 mop = &mops->mo_list[i]; 2051 2052 if (mop->mo_flags & MO_EMPTY) { 2053 if ((flags & VFS_CREATEOPT) == 0) 2054 continue; 2055 sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP); 2056 (void) strcpy(sp, opt); 2057 mop->mo_name = sp; 2058 if (arg != NULL) 2059 mop->mo_flags = MO_HASVALUE; 2060 else 2061 mop->mo_flags = 0; 2062 } else if (strcmp(opt, mop->mo_name)) { 2063 continue; 2064 } 2065 if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT)) 2066 break; 2067 if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) { 2068 sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP); 2069 (void) strcpy(sp, arg); 2070 } else { 2071 sp = NULL; 2072 } 2073 if (mop->mo_arg != NULL) 2074 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2075 mop->mo_arg = sp; 2076 if (flags & VFS_DISPLAY) 2077 mop->mo_flags &= ~MO_NODISPLAY; 2078 if (flags & VFS_NODISPLAY) 2079 mop->mo_flags |= MO_NODISPLAY; 2080 mop->mo_flags |= MO_SET; 2081 if (mop->mo_cancel != NULL) { 2082 char **cp; 2083 2084 for (cp = mop->mo_cancel; *cp != NULL; cp++) 2085 vfs_clearmntopt_nolock(mops, *cp, 0); 2086 } 2087 if (update_mnttab) 2088 vfs_mnttab_modtimeupd(); 2089 break; 2090 } 2091 } 2092 2093 void 2094 vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags) 2095 { 2096 int gotlock = 0; 2097 2098 if (VFS_ON_LIST(vfsp)) { 2099 gotlock = 1; 2100 vfs_list_lock(); 2101 } 2102 vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock); 2103 if (gotlock) 2104 vfs_list_unlock(); 2105 } 2106 2107 2108 /* 2109 * Add a "tag" option to a mounted file system's options list. 2110 * 2111 * Note: caller is responsible for locking the vfs list, if needed, 2112 * to protect mops. 2113 */ 2114 static mntopt_t * 2115 vfs_addtag(mntopts_t *mops, const char *tag) 2116 { 2117 uint_t count; 2118 mntopt_t *mop, *motbl; 2119 2120 count = mops->mo_count + 1; 2121 motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP); 2122 if (mops->mo_count) { 2123 size_t len = (count - 1) * sizeof (mntopt_t); 2124 2125 bcopy(mops->mo_list, motbl, len); 2126 kmem_free(mops->mo_list, len); 2127 } 2128 mops->mo_count = count; 2129 mops->mo_list = motbl; 2130 mop = &motbl[count - 1]; 2131 mop->mo_flags = MO_TAG; 2132 mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP); 2133 (void) strcpy(mop->mo_name, tag); 2134 return (mop); 2135 } 2136 2137 /* 2138 * Allow users to set arbitrary "tags" in a vfs's mount options. 2139 * Broader use within the kernel is discouraged. 2140 */ 2141 int 2142 vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2143 cred_t *cr) 2144 { 2145 vfs_t *vfsp; 2146 mntopts_t *mops; 2147 mntopt_t *mop; 2148 int found = 0; 2149 dev_t dev = makedevice(major, minor); 2150 int err = 0; 2151 char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 2152 2153 /* 2154 * Find the desired mounted file system 2155 */ 2156 vfs_list_lock(); 2157 vfsp = rootvfs; 2158 do { 2159 if (vfsp->vfs_dev == dev && 2160 strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2161 found = 1; 2162 break; 2163 } 2164 vfsp = vfsp->vfs_next; 2165 } while (vfsp != rootvfs); 2166 2167 if (!found) { 2168 err = EINVAL; 2169 goto out; 2170 } 2171 err = secpolicy_fs_config(cr, vfsp); 2172 if (err != 0) 2173 goto out; 2174 2175 mops = &vfsp->vfs_mntopts; 2176 /* 2177 * Add tag if it doesn't already exist 2178 */ 2179 if ((mop = vfs_hasopt(mops, tag)) == NULL) { 2180 int len; 2181 2182 (void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR); 2183 len = strlen(buf); 2184 if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) { 2185 err = ENAMETOOLONG; 2186 goto out; 2187 } 2188 mop = vfs_addtag(mops, tag); 2189 } 2190 if ((mop->mo_flags & MO_TAG) == 0) { 2191 err = EINVAL; 2192 goto out; 2193 } 2194 vfs_setmntopt_nolock(mops, tag, NULL, 0, 1); 2195 out: 2196 vfs_list_unlock(); 2197 kmem_free(buf, MAX_MNTOPT_STR); 2198 return (err); 2199 } 2200 2201 /* 2202 * Allow users to remove arbitrary "tags" in a vfs's mount options. 2203 * Broader use within the kernel is discouraged. 2204 */ 2205 int 2206 vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2207 cred_t *cr) 2208 { 2209 vfs_t *vfsp; 2210 mntopt_t *mop; 2211 int found = 0; 2212 dev_t dev = makedevice(major, minor); 2213 int err = 0; 2214 2215 /* 2216 * Find the desired mounted file system 2217 */ 2218 vfs_list_lock(); 2219 vfsp = rootvfs; 2220 do { 2221 if (vfsp->vfs_dev == dev && 2222 strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2223 found = 1; 2224 break; 2225 } 2226 vfsp = vfsp->vfs_next; 2227 } while (vfsp != rootvfs); 2228 2229 if (!found) { 2230 err = EINVAL; 2231 goto out; 2232 } 2233 err = secpolicy_fs_config(cr, vfsp); 2234 if (err != 0) 2235 goto out; 2236 2237 if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) { 2238 err = EINVAL; 2239 goto out; 2240 } 2241 if ((mop->mo_flags & MO_TAG) == 0) { 2242 err = EINVAL; 2243 goto out; 2244 } 2245 vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1); 2246 out: 2247 vfs_list_unlock(); 2248 return (err); 2249 } 2250 2251 /* 2252 * Function to parse an option string and fill in a mount options table. 2253 * Unknown options are silently ignored. The input option string is modified 2254 * by replacing separators with nulls. If the create flag is set, options 2255 * not found in the table are just added on the fly. The table must have 2256 * an option slot marked MO_EMPTY to add an option on the fly. 2257 * 2258 * This function is *not* for general use by filesystems. 2259 * 2260 * Note: caller is responsible for locking the vfs list, if needed, 2261 * to protect mops.. 2262 */ 2263 void 2264 vfs_parsemntopts(mntopts_t *mops, char *osp, int create) 2265 { 2266 char *s = osp, *p, *nextop, *valp, *cp, *ep; 2267 int setflg = VFS_NOFORCEOPT; 2268 2269 if (osp == NULL) 2270 return; 2271 while (*s != '\0') { 2272 p = strchr(s, ','); /* find next option */ 2273 if (p == NULL) { 2274 cp = NULL; 2275 p = s + strlen(s); 2276 } else { 2277 cp = p; /* save location of comma */ 2278 *p++ = '\0'; /* mark end and point to next option */ 2279 } 2280 nextop = p; 2281 p = strchr(s, '='); /* look for value */ 2282 if (p == NULL) { 2283 valp = NULL; /* no value supplied */ 2284 } else { 2285 ep = p; /* save location of equals */ 2286 *p++ = '\0'; /* end option and point to value */ 2287 valp = p; 2288 } 2289 /* 2290 * set option into options table 2291 */ 2292 if (create) 2293 setflg |= VFS_CREATEOPT; 2294 vfs_setmntopt_nolock(mops, s, valp, setflg, 0); 2295 if (cp != NULL) 2296 *cp = ','; /* restore the comma */ 2297 if (valp != NULL) 2298 *ep = '='; /* restore the equals */ 2299 s = nextop; 2300 } 2301 } 2302 2303 /* 2304 * Function to inquire if an option exists in a mount options table. 2305 * Returns a pointer to the option if it exists, else NULL. 2306 * 2307 * This function is *not* for general use by filesystems. 2308 * 2309 * Note: caller is responsible for locking the vfs list, if needed, 2310 * to protect mops. 2311 */ 2312 struct mntopt * 2313 vfs_hasopt(const mntopts_t *mops, const char *opt) 2314 { 2315 struct mntopt *mop; 2316 uint_t i, count; 2317 2318 count = mops->mo_count; 2319 for (i = 0; i < count; i++) { 2320 mop = &mops->mo_list[i]; 2321 2322 if (mop->mo_flags & MO_EMPTY) 2323 continue; 2324 if (strcmp(opt, mop->mo_name) == 0) 2325 return (mop); 2326 } 2327 return (NULL); 2328 } 2329 2330 /* 2331 * Function to inquire if an option is set in a mount options table. 2332 * Returns non-zero if set and fills in the arg pointer with a pointer to 2333 * the argument string or NULL if there is no argument string. 2334 */ 2335 static int 2336 vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp) 2337 { 2338 struct mntopt *mop; 2339 uint_t i, count; 2340 2341 count = mops->mo_count; 2342 for (i = 0; i < count; i++) { 2343 mop = &mops->mo_list[i]; 2344 2345 if (mop->mo_flags & MO_EMPTY) 2346 continue; 2347 if (strcmp(opt, mop->mo_name)) 2348 continue; 2349 if ((mop->mo_flags & MO_SET) == 0) 2350 return (0); 2351 if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0) 2352 *argp = mop->mo_arg; 2353 return (1); 2354 } 2355 return (0); 2356 } 2357 2358 2359 int 2360 vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp) 2361 { 2362 int ret; 2363 2364 vfs_list_read_lock(); 2365 ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp); 2366 vfs_list_unlock(); 2367 return (ret); 2368 } 2369 2370 2371 /* 2372 * Construct a comma separated string of the options set in the given 2373 * mount table, return the string in the given buffer. Return non-zero if 2374 * the buffer would overflow. 2375 * 2376 * This function is *not* for general use by filesystems. 2377 * 2378 * Note: caller is responsible for locking the vfs list, if needed, 2379 * to protect mp. 2380 */ 2381 int 2382 vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len) 2383 { 2384 char *cp; 2385 uint_t i; 2386 2387 buf[0] = '\0'; 2388 cp = buf; 2389 for (i = 0; i < mp->mo_count; i++) { 2390 struct mntopt *mop; 2391 2392 mop = &mp->mo_list[i]; 2393 if (mop->mo_flags & MO_SET) { 2394 int optlen, comma = 0; 2395 2396 if (buf[0] != '\0') 2397 comma = 1; 2398 optlen = strlen(mop->mo_name); 2399 if (strlen(buf) + comma + optlen + 1 > len) 2400 goto err; 2401 if (comma) 2402 *cp++ = ','; 2403 (void) strcpy(cp, mop->mo_name); 2404 cp += optlen; 2405 /* 2406 * Append option value if there is one 2407 */ 2408 if (mop->mo_arg != NULL) { 2409 int arglen; 2410 2411 arglen = strlen(mop->mo_arg); 2412 if (strlen(buf) + arglen + 2 > len) 2413 goto err; 2414 *cp++ = '='; 2415 (void) strcpy(cp, mop->mo_arg); 2416 cp += arglen; 2417 } 2418 } 2419 } 2420 return (0); 2421 err: 2422 return (EOVERFLOW); 2423 } 2424 2425 static void 2426 vfs_freecancelopt(char **moc) 2427 { 2428 if (moc != NULL) { 2429 int ccnt = 0; 2430 char **cp; 2431 2432 for (cp = moc; *cp != NULL; cp++) { 2433 kmem_free(*cp, strlen(*cp) + 1); 2434 ccnt++; 2435 } 2436 kmem_free(moc, (ccnt + 1) * sizeof (char *)); 2437 } 2438 } 2439 2440 static void 2441 vfs_freeopt(mntopt_t *mop) 2442 { 2443 if (mop->mo_name != NULL) 2444 kmem_free(mop->mo_name, strlen(mop->mo_name) + 1); 2445 2446 vfs_freecancelopt(mop->mo_cancel); 2447 2448 if (mop->mo_arg != NULL) 2449 kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2450 } 2451 2452 /* 2453 * Free a mount options table 2454 * 2455 * This function is *not* for general use by filesystems. 2456 * 2457 * Note: caller is responsible for locking the vfs list, if needed, 2458 * to protect mp. 2459 */ 2460 void 2461 vfs_freeopttbl(mntopts_t *mp) 2462 { 2463 uint_t i, count; 2464 2465 count = mp->mo_count; 2466 for (i = 0; i < count; i++) { 2467 vfs_freeopt(&mp->mo_list[i]); 2468 } 2469 if (count) { 2470 kmem_free(mp->mo_list, sizeof (mntopt_t) * count); 2471 mp->mo_count = 0; 2472 mp->mo_list = NULL; 2473 } 2474 } 2475 2476 /* 2477 * Free any mnttab information recorded in the vfs struct. 2478 * The vfs must not be on the vfs list. 2479 */ 2480 static void 2481 vfs_freemnttab(struct vfs *vfsp) 2482 { 2483 ASSERT(!VFS_ON_LIST(vfsp)); 2484 2485 /* 2486 * Free device and mount point information 2487 */ 2488 if (vfsp->vfs_mntpt != NULL) { 2489 refstr_rele(vfsp->vfs_mntpt); 2490 vfsp->vfs_mntpt = NULL; 2491 } 2492 if (vfsp->vfs_resource != NULL) { 2493 refstr_rele(vfsp->vfs_resource); 2494 vfsp->vfs_resource = NULL; 2495 } 2496 /* 2497 * Now free mount options information 2498 */ 2499 vfs_freeopttbl(&vfsp->vfs_mntopts); 2500 } 2501 2502 /* 2503 * Return the last mnttab modification time 2504 */ 2505 void 2506 vfs_mnttab_modtime(timespec_t *ts) 2507 { 2508 ASSERT(RW_LOCK_HELD(&vfslist)); 2509 *ts = vfs_mnttab_mtime; 2510 } 2511 2512 /* 2513 * See if mnttab is changed 2514 */ 2515 void 2516 vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp) 2517 { 2518 int changed; 2519 2520 *phpp = (struct pollhead *)NULL; 2521 2522 /* 2523 * Note: don't grab vfs list lock before accessing vfs_mnttab_mtime. 2524 * Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe 2525 * to not grab the vfs list lock because tv_sec is monotonically 2526 * increasing. 2527 */ 2528 2529 changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) || 2530 (old->tv_sec != vfs_mnttab_mtime.tv_sec); 2531 if (!changed) { 2532 *phpp = &vfs_pollhd; 2533 } 2534 } 2535 2536 /* 2537 * Update the mnttab modification time and wake up any waiters for 2538 * mnttab changes 2539 */ 2540 void 2541 vfs_mnttab_modtimeupd() 2542 { 2543 hrtime_t oldhrt, newhrt; 2544 2545 ASSERT(RW_WRITE_HELD(&vfslist)); 2546 oldhrt = ts2hrt(&vfs_mnttab_mtime); 2547 gethrestime(&vfs_mnttab_mtime); 2548 newhrt = ts2hrt(&vfs_mnttab_mtime); 2549 if (oldhrt == (hrtime_t)0) 2550 vfs_mnttab_ctime = vfs_mnttab_mtime; 2551 /* 2552 * Attempt to provide unique mtime (like uniqtime but not). 2553 */ 2554 if (newhrt == oldhrt) { 2555 newhrt++; 2556 hrt2ts(newhrt, &vfs_mnttab_mtime); 2557 } 2558 pollwakeup(&vfs_pollhd, (short)POLLRDBAND); 2559 } 2560 2561 int 2562 dounmount(struct vfs *vfsp, int flag, cred_t *cr) 2563 { 2564 vnode_t *coveredvp; 2565 int error; 2566 extern void teardown_vopstats(vfs_t *); 2567 2568 /* 2569 * Get covered vnode. This will be NULL if the vfs is not linked 2570 * into the file system name space (i.e., domount() with MNT_NOSPICE). 2571 */ 2572 coveredvp = vfsp->vfs_vnodecovered; 2573 ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp)); 2574 2575 /* 2576 * Purge all dnlc entries for this vfs. 2577 */ 2578 (void) dnlc_purge_vfsp(vfsp, 0); 2579 2580 /* For forcible umount, skip VFS_SYNC() since it may hang */ 2581 if ((flag & MS_FORCE) == 0) 2582 (void) VFS_SYNC(vfsp, 0, cr); 2583 2584 /* 2585 * Lock the vfs to maintain fs status quo during unmount. This 2586 * has to be done after the sync because ufs_update tries to acquire 2587 * the vfs_reflock. 2588 */ 2589 vfs_lock_wait(vfsp); 2590 2591 if (error = VFS_UNMOUNT(vfsp, flag, cr)) { 2592 vfs_unlock(vfsp); 2593 if (coveredvp != NULL) 2594 vn_vfsunlock(coveredvp); 2595 } else if (coveredvp != NULL) { 2596 teardown_vopstats(vfsp); 2597 /* 2598 * vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered) 2599 * when it frees vfsp so we do a VN_HOLD() so we can 2600 * continue to use coveredvp afterwards. 2601 */ 2602 VN_HOLD(coveredvp); 2603 vfs_remove(vfsp); 2604 vn_vfsunlock(coveredvp); 2605 VN_RELE(coveredvp); 2606 } else { 2607 teardown_vopstats(vfsp); 2608 /* 2609 * Release the reference to vfs that is not linked 2610 * into the name space. 2611 */ 2612 vfs_unlock(vfsp); 2613 VFS_RELE(vfsp); 2614 } 2615 return (error); 2616 } 2617 2618 2619 /* 2620 * Vfs_unmountall() is called by uadmin() to unmount all 2621 * mounted file systems (except the root file system) during shutdown. 2622 * It follows the existing locking protocol when traversing the vfs list 2623 * to sync and unmount vfses. Even though there should be no 2624 * other thread running while the system is shutting down, it is prudent 2625 * to still follow the locking protocol. 2626 */ 2627 void 2628 vfs_unmountall(void) 2629 { 2630 struct vfs *vfsp; 2631 struct vfs *prev_vfsp = NULL; 2632 int error; 2633 2634 /* 2635 * Toss all dnlc entries now so that the per-vfs sync 2636 * and unmount operations don't have to slog through 2637 * a bunch of uninteresting vnodes over and over again. 2638 */ 2639 dnlc_purge(); 2640 2641 vfs_list_lock(); 2642 for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) { 2643 prev_vfsp = vfsp->vfs_prev; 2644 2645 if (vfs_lock(vfsp) != 0) 2646 continue; 2647 error = vn_vfswlock(vfsp->vfs_vnodecovered); 2648 vfs_unlock(vfsp); 2649 if (error) 2650 continue; 2651 2652 vfs_list_unlock(); 2653 2654 (void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED()); 2655 (void) dounmount(vfsp, 0, CRED()); 2656 2657 /* 2658 * Since we dropped the vfslist lock above we must 2659 * verify that next_vfsp still exists, else start over. 2660 */ 2661 vfs_list_lock(); 2662 for (vfsp = rootvfs->vfs_prev; 2663 vfsp != rootvfs; vfsp = vfsp->vfs_prev) 2664 if (vfsp == prev_vfsp) 2665 break; 2666 if (vfsp == rootvfs && prev_vfsp != rootvfs) 2667 prev_vfsp = rootvfs->vfs_prev; 2668 } 2669 vfs_list_unlock(); 2670 } 2671 2672 /* 2673 * Called to add an entry to the end of the vfs mount in progress list 2674 */ 2675 void 2676 vfs_addmip(dev_t dev, struct vfs *vfsp) 2677 { 2678 struct ipmnt *mipp; 2679 2680 mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP); 2681 mipp->mip_next = NULL; 2682 mipp->mip_dev = dev; 2683 mipp->mip_vfsp = vfsp; 2684 mutex_enter(&vfs_miplist_mutex); 2685 if (vfs_miplist_end != NULL) 2686 vfs_miplist_end->mip_next = mipp; 2687 else 2688 vfs_miplist = mipp; 2689 vfs_miplist_end = mipp; 2690 mutex_exit(&vfs_miplist_mutex); 2691 } 2692 2693 /* 2694 * Called to remove an entry from the mount in progress list 2695 * Either because the mount completed or it failed. 2696 */ 2697 void 2698 vfs_delmip(struct vfs *vfsp) 2699 { 2700 struct ipmnt *mipp, *mipprev; 2701 2702 mutex_enter(&vfs_miplist_mutex); 2703 mipprev = NULL; 2704 for (mipp = vfs_miplist; 2705 mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) { 2706 mipprev = mipp; 2707 } 2708 if (mipp == NULL) 2709 return; /* shouldn't happen */ 2710 if (mipp == vfs_miplist_end) 2711 vfs_miplist_end = mipprev; 2712 if (mipprev == NULL) 2713 vfs_miplist = mipp->mip_next; 2714 else 2715 mipprev->mip_next = mipp->mip_next; 2716 mutex_exit(&vfs_miplist_mutex); 2717 kmem_free(mipp, sizeof (struct ipmnt)); 2718 } 2719 2720 /* 2721 * vfs_add is called by a specific filesystem's mount routine to add 2722 * the new vfs into the vfs list/hash and to cover the mounted-on vnode. 2723 * The vfs should already have been locked by the caller. 2724 * 2725 * coveredvp is NULL if this is the root. 2726 */ 2727 void 2728 vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag) 2729 { 2730 int newflag; 2731 2732 ASSERT(vfs_lock_held(vfsp)); 2733 VFS_HOLD(vfsp); 2734 newflag = vfsp->vfs_flag; 2735 if (mflag & MS_RDONLY) 2736 newflag |= VFS_RDONLY; 2737 else 2738 newflag &= ~VFS_RDONLY; 2739 if (mflag & MS_NOSUID) 2740 newflag |= (VFS_NOSETUID|VFS_NODEVICES); 2741 else 2742 newflag &= ~(VFS_NOSETUID|VFS_NODEVICES); 2743 if (mflag & MS_NOMNTTAB) 2744 newflag |= VFS_NOMNTTAB; 2745 else 2746 newflag &= ~VFS_NOMNTTAB; 2747 2748 if (coveredvp != NULL) { 2749 ASSERT(vn_vfswlock_held(coveredvp)); 2750 coveredvp->v_vfsmountedhere = vfsp; 2751 VN_HOLD(coveredvp); 2752 } 2753 vfsp->vfs_vnodecovered = coveredvp; 2754 vfsp->vfs_flag = newflag; 2755 2756 vfs_list_add(vfsp); 2757 } 2758 2759 /* 2760 * Remove a vfs from the vfs list, null out the pointer from the 2761 * covered vnode to the vfs (v_vfsmountedhere), and null out the pointer 2762 * from the vfs to the covered vnode (vfs_vnodecovered). Release the 2763 * reference to the vfs and to the covered vnode. 2764 * 2765 * Called from dounmount after it's confirmed with the file system 2766 * that the unmount is legal. 2767 */ 2768 void 2769 vfs_remove(struct vfs *vfsp) 2770 { 2771 vnode_t *vp; 2772 2773 ASSERT(vfs_lock_held(vfsp)); 2774 2775 /* 2776 * Can't unmount root. Should never happen because fs will 2777 * be busy. 2778 */ 2779 if (vfsp == rootvfs) 2780 cmn_err(CE_PANIC, "vfs_remove: unmounting root"); 2781 2782 vfs_list_remove(vfsp); 2783 2784 /* 2785 * Unhook from the file system name space. 2786 */ 2787 vp = vfsp->vfs_vnodecovered; 2788 ASSERT(vn_vfswlock_held(vp)); 2789 vp->v_vfsmountedhere = NULL; 2790 vfsp->vfs_vnodecovered = NULL; 2791 VN_RELE(vp); 2792 2793 /* 2794 * Release lock and wakeup anybody waiting. 2795 */ 2796 vfs_unlock(vfsp); 2797 VFS_RELE(vfsp); 2798 } 2799 2800 /* 2801 * Lock a filesystem to prevent access to it while mounting, 2802 * unmounting and syncing. Return EBUSY immediately if lock 2803 * can't be acquired. 2804 */ 2805 int 2806 vfs_lock(vfs_t *vfsp) 2807 { 2808 vn_vfslocks_entry_t *vpvfsentry; 2809 2810 vpvfsentry = vn_vfslocks_getlock(vfsp); 2811 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER)) 2812 return (0); 2813 2814 vn_vfslocks_rele(vpvfsentry); 2815 return (EBUSY); 2816 } 2817 2818 int 2819 vfs_rlock(vfs_t *vfsp) 2820 { 2821 vn_vfslocks_entry_t *vpvfsentry; 2822 2823 vpvfsentry = vn_vfslocks_getlock(vfsp); 2824 2825 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER)) 2826 return (0); 2827 2828 vn_vfslocks_rele(vpvfsentry); 2829 return (EBUSY); 2830 } 2831 2832 void 2833 vfs_lock_wait(vfs_t *vfsp) 2834 { 2835 vn_vfslocks_entry_t *vpvfsentry; 2836 2837 vpvfsentry = vn_vfslocks_getlock(vfsp); 2838 rwst_enter(&vpvfsentry->ve_lock, RW_WRITER); 2839 } 2840 2841 void 2842 vfs_rlock_wait(vfs_t *vfsp) 2843 { 2844 vn_vfslocks_entry_t *vpvfsentry; 2845 2846 vpvfsentry = vn_vfslocks_getlock(vfsp); 2847 rwst_enter(&vpvfsentry->ve_lock, RW_READER); 2848 } 2849 2850 /* 2851 * Unlock a locked filesystem. 2852 */ 2853 void 2854 vfs_unlock(vfs_t *vfsp) 2855 { 2856 vn_vfslocks_entry_t *vpvfsentry; 2857 2858 /* 2859 * vfs_unlock will mimic sema_v behaviour to fix 4748018. 2860 * And these changes should remain for the patch changes as it is. 2861 */ 2862 if (panicstr) 2863 return; 2864 2865 /* 2866 * ve_refcount needs to be dropped twice here. 2867 * 1. To release refernce after a call to vfs_locks_getlock() 2868 * 2. To release the reference from the locking routines like 2869 * vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,. 2870 */ 2871 2872 vpvfsentry = vn_vfslocks_getlock(vfsp); 2873 vn_vfslocks_rele(vpvfsentry); 2874 2875 rwst_exit(&vpvfsentry->ve_lock); 2876 vn_vfslocks_rele(vpvfsentry); 2877 } 2878 2879 /* 2880 * Utility routine that allows a filesystem to construct its 2881 * fsid in "the usual way" - by munging some underlying dev_t and 2882 * the filesystem type number into the 64-bit fsid. Note that 2883 * this implicitly relies on dev_t persistence to make filesystem 2884 * id's persistent. 2885 * 2886 * There's nothing to prevent an individual fs from constructing its 2887 * fsid in a different way, and indeed they should. 2888 * 2889 * Since we want fsids to be 32-bit quantities (so that they can be 2890 * exported identically by either 32-bit or 64-bit APIs, as well as 2891 * the fact that fsid's are "known" to NFS), we compress the device 2892 * number given down to 32-bits, and panic if that isn't possible. 2893 */ 2894 void 2895 vfs_make_fsid(fsid_t *fsi, dev_t dev, int val) 2896 { 2897 if (!cmpldev((dev32_t *)&fsi->val[0], dev)) 2898 panic("device number too big for fsid!"); 2899 fsi->val[1] = val; 2900 } 2901 2902 int 2903 vfs_lock_held(vfs_t *vfsp) 2904 { 2905 int held; 2906 vn_vfslocks_entry_t *vpvfsentry; 2907 2908 /* 2909 * vfs_lock_held will mimic sema_held behaviour 2910 * if panicstr is set. And these changes should remain 2911 * for the patch changes as it is. 2912 */ 2913 if (panicstr) 2914 return (1); 2915 2916 vpvfsentry = vn_vfslocks_getlock(vfsp); 2917 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER); 2918 2919 vn_vfslocks_rele(vpvfsentry); 2920 return (held); 2921 } 2922 2923 struct _kthread * 2924 vfs_lock_owner(vfs_t *vfsp) 2925 { 2926 struct _kthread *owner; 2927 vn_vfslocks_entry_t *vpvfsentry; 2928 2929 /* 2930 * vfs_wlock_held will mimic sema_held behaviour 2931 * if panicstr is set. And these changes should remain 2932 * for the patch changes as it is. 2933 */ 2934 if (panicstr) 2935 return (NULL); 2936 2937 vpvfsentry = vn_vfslocks_getlock(vfsp); 2938 owner = rwst_owner(&vpvfsentry->ve_lock); 2939 2940 vn_vfslocks_rele(vpvfsentry); 2941 return (owner); 2942 } 2943 2944 /* 2945 * vfs list locking. 2946 * 2947 * Rather than manipulate the vfslist lock directly, we abstract into lock 2948 * and unlock routines to allow the locking implementation to be changed for 2949 * clustering. 2950 * 2951 * Whenever the vfs list is modified through its hash links, the overall list 2952 * lock must be obtained before locking the relevant hash bucket. But to see 2953 * whether a given vfs is on the list, it suffices to obtain the lock for the 2954 * hash bucket without getting the overall list lock. (See getvfs() below.) 2955 */ 2956 2957 void 2958 vfs_list_lock() 2959 { 2960 rw_enter(&vfslist, RW_WRITER); 2961 } 2962 2963 void 2964 vfs_list_read_lock() 2965 { 2966 rw_enter(&vfslist, RW_READER); 2967 } 2968 2969 void 2970 vfs_list_unlock() 2971 { 2972 rw_exit(&vfslist); 2973 } 2974 2975 /* 2976 * Low level worker routines for adding entries to and removing entries from 2977 * the vfs list. 2978 */ 2979 2980 static void 2981 vfs_hash_add(struct vfs *vfsp, int insert_at_head) 2982 { 2983 int vhno; 2984 struct vfs **hp; 2985 dev_t dev; 2986 2987 ASSERT(RW_WRITE_HELD(&vfslist)); 2988 2989 dev = expldev(vfsp->vfs_fsid.val[0]); 2990 vhno = VFSHASH(getmajor(dev), getminor(dev)); 2991 2992 mutex_enter(&rvfs_list[vhno].rvfs_lock); 2993 2994 /* 2995 * Link into the hash table, inserting it at the end, so that LOFS 2996 * with the same fsid as UFS (or other) file systems will not hide the 2997 * UFS. 2998 */ 2999 if (insert_at_head) { 3000 vfsp->vfs_hash = rvfs_list[vhno].rvfs_head; 3001 rvfs_list[vhno].rvfs_head = vfsp; 3002 } else { 3003 for (hp = &rvfs_list[vhno].rvfs_head; *hp != NULL; 3004 hp = &(*hp)->vfs_hash) 3005 continue; 3006 /* 3007 * hp now contains the address of the pointer to update 3008 * to effect the insertion. 3009 */ 3010 vfsp->vfs_hash = NULL; 3011 *hp = vfsp; 3012 } 3013 3014 rvfs_list[vhno].rvfs_len++; 3015 mutex_exit(&rvfs_list[vhno].rvfs_lock); 3016 } 3017 3018 3019 static void 3020 vfs_hash_remove(struct vfs *vfsp) 3021 { 3022 int vhno; 3023 struct vfs *tvfsp; 3024 dev_t dev; 3025 3026 ASSERT(RW_WRITE_HELD(&vfslist)); 3027 3028 dev = expldev(vfsp->vfs_fsid.val[0]); 3029 vhno = VFSHASH(getmajor(dev), getminor(dev)); 3030 3031 mutex_enter(&rvfs_list[vhno].rvfs_lock); 3032 3033 /* 3034 * Remove from hash. 3035 */ 3036 if (rvfs_list[vhno].rvfs_head == vfsp) { 3037 rvfs_list[vhno].rvfs_head = vfsp->vfs_hash; 3038 rvfs_list[vhno].rvfs_len--; 3039 goto foundit; 3040 } 3041 for (tvfsp = rvfs_list[vhno].rvfs_head; tvfsp != NULL; 3042 tvfsp = tvfsp->vfs_hash) { 3043 if (tvfsp->vfs_hash == vfsp) { 3044 tvfsp->vfs_hash = vfsp->vfs_hash; 3045 rvfs_list[vhno].rvfs_len--; 3046 goto foundit; 3047 } 3048 } 3049 cmn_err(CE_WARN, "vfs_list_remove: vfs not found in hash"); 3050 3051 foundit: 3052 3053 mutex_exit(&rvfs_list[vhno].rvfs_lock); 3054 } 3055 3056 3057 void 3058 vfs_list_add(struct vfs *vfsp) 3059 { 3060 zone_t *zone; 3061 3062 /* 3063 * The zone that owns the mount is the one that performed the mount. 3064 * Note that this isn't necessarily the same as the zone mounted into. 3065 * The corresponding zone_rele() will be done when the vfs_t is 3066 * being free'd. 3067 */ 3068 vfsp->vfs_zone = curproc->p_zone; 3069 zone_hold(vfsp->vfs_zone); 3070 3071 /* 3072 * Find the zone mounted into, and put this mount on its vfs list. 3073 */ 3074 zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 3075 ASSERT(zone != NULL); 3076 /* 3077 * Special casing for the root vfs. This structure is allocated 3078 * statically and hooked onto rootvfs at link time. During the 3079 * vfs_mountroot call at system startup time, the root file system's 3080 * VFS_MOUNTROOT routine will call vfs_add with this root vfs struct 3081 * as argument. The code below must detect and handle this special 3082 * case. The only apparent justification for this special casing is 3083 * to ensure that the root file system appears at the head of the 3084 * list. 3085 * 3086 * XXX: I'm assuming that it's ok to do normal list locking when 3087 * adding the entry for the root file system (this used to be 3088 * done with no locks held). 3089 */ 3090 vfs_list_lock(); 3091 /* 3092 * Link into the vfs list proper. 3093 */ 3094 if (vfsp == &root) { 3095 /* 3096 * Assert: This vfs is already on the list as its first entry. 3097 * Thus, there's nothing to do. 3098 */ 3099 ASSERT(rootvfs == vfsp); 3100 /* 3101 * Add it to the head of the global zone's vfslist. 3102 */ 3103 ASSERT(zone == global_zone); 3104 ASSERT(zone->zone_vfslist == NULL); 3105 zone->zone_vfslist = vfsp; 3106 } else { 3107 /* 3108 * Link to end of list using vfs_prev (as rootvfs is now a 3109 * doubly linked circular list) so list is in mount order for 3110 * mnttab use. 3111 */ 3112 rootvfs->vfs_prev->vfs_next = vfsp; 3113 vfsp->vfs_prev = rootvfs->vfs_prev; 3114 rootvfs->vfs_prev = vfsp; 3115 vfsp->vfs_next = rootvfs; 3116 3117 /* 3118 * Do it again for the zone-private list (which may be NULL). 3119 */ 3120 if (zone->zone_vfslist == NULL) { 3121 ASSERT(zone != global_zone); 3122 zone->zone_vfslist = vfsp; 3123 } else { 3124 zone->zone_vfslist->vfs_zone_prev->vfs_zone_next = vfsp; 3125 vfsp->vfs_zone_prev = zone->zone_vfslist->vfs_zone_prev; 3126 zone->zone_vfslist->vfs_zone_prev = vfsp; 3127 vfsp->vfs_zone_next = zone->zone_vfslist; 3128 } 3129 } 3130 3131 /* 3132 * Link into the hash table, inserting it at the end, so that LOFS 3133 * with the same fsid as UFS (or other) file systems will not hide 3134 * the UFS. 3135 */ 3136 vfs_hash_add(vfsp, 0); 3137 3138 /* 3139 * update the mnttab modification time 3140 */ 3141 vfs_mnttab_modtimeupd(); 3142 vfs_list_unlock(); 3143 zone_rele(zone); 3144 } 3145 3146 void 3147 vfs_list_remove(struct vfs *vfsp) 3148 { 3149 zone_t *zone; 3150 3151 zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 3152 ASSERT(zone != NULL); 3153 /* 3154 * Callers are responsible for preventing attempts to unmount the 3155 * root. 3156 */ 3157 ASSERT(vfsp != rootvfs); 3158 3159 vfs_list_lock(); 3160 3161 /* 3162 * Remove from hash. 3163 */ 3164 vfs_hash_remove(vfsp); 3165 3166 /* 3167 * Remove from vfs list. 3168 */ 3169 vfsp->vfs_prev->vfs_next = vfsp->vfs_next; 3170 vfsp->vfs_next->vfs_prev = vfsp->vfs_prev; 3171 vfsp->vfs_next = vfsp->vfs_prev = NULL; 3172 3173 /* 3174 * Remove from zone-specific vfs list. 3175 */ 3176 if (zone->zone_vfslist == vfsp) 3177 zone->zone_vfslist = vfsp->vfs_zone_next; 3178 3179 if (vfsp->vfs_zone_next == vfsp) { 3180 ASSERT(vfsp->vfs_zone_prev == vfsp); 3181 ASSERT(zone->zone_vfslist == vfsp); 3182 zone->zone_vfslist = NULL; 3183 } 3184 3185 vfsp->vfs_zone_prev->vfs_zone_next = vfsp->vfs_zone_next; 3186 vfsp->vfs_zone_next->vfs_zone_prev = vfsp->vfs_zone_prev; 3187 vfsp->vfs_zone_next = vfsp->vfs_zone_prev = NULL; 3188 3189 /* 3190 * update the mnttab modification time 3191 */ 3192 vfs_mnttab_modtimeupd(); 3193 vfs_list_unlock(); 3194 zone_rele(zone); 3195 } 3196 3197 struct vfs * 3198 getvfs(fsid_t *fsid) 3199 { 3200 struct vfs *vfsp; 3201 int val0 = fsid->val[0]; 3202 int val1 = fsid->val[1]; 3203 dev_t dev = expldev(val0); 3204 int vhno = VFSHASH(getmajor(dev), getminor(dev)); 3205 kmutex_t *hmp = &rvfs_list[vhno].rvfs_lock; 3206 3207 mutex_enter(hmp); 3208 for (vfsp = rvfs_list[vhno].rvfs_head; vfsp; vfsp = vfsp->vfs_hash) { 3209 if (vfsp->vfs_fsid.val[0] == val0 && 3210 vfsp->vfs_fsid.val[1] == val1) { 3211 VFS_HOLD(vfsp); 3212 mutex_exit(hmp); 3213 return (vfsp); 3214 } 3215 } 3216 mutex_exit(hmp); 3217 return (NULL); 3218 } 3219 3220 /* 3221 * Search the vfs mount in progress list for a specified device/vfs entry. 3222 * Returns 0 if the first entry in the list that the device matches has the 3223 * given vfs pointer as well. If the device matches but a different vfs 3224 * pointer is encountered in the list before the given vfs pointer then 3225 * a 1 is returned. 3226 */ 3227 3228 int 3229 vfs_devmounting(dev_t dev, struct vfs *vfsp) 3230 { 3231 int retval = 0; 3232 struct ipmnt *mipp; 3233 3234 mutex_enter(&vfs_miplist_mutex); 3235 for (mipp = vfs_miplist; mipp != NULL; mipp = mipp->mip_next) { 3236 if (mipp->mip_dev == dev) { 3237 if (mipp->mip_vfsp != vfsp) 3238 retval = 1; 3239 break; 3240 } 3241 } 3242 mutex_exit(&vfs_miplist_mutex); 3243 return (retval); 3244 } 3245 3246 /* 3247 * Search the vfs list for a specified device. Returns 1, if entry is found 3248 * or 0 if no suitable entry is found. 3249 */ 3250 3251 int 3252 vfs_devismounted(dev_t dev) 3253 { 3254 struct vfs *vfsp; 3255 int found; 3256 3257 vfs_list_read_lock(); 3258 vfsp = rootvfs; 3259 found = 0; 3260 do { 3261 if (vfsp->vfs_dev == dev) { 3262 found = 1; 3263 break; 3264 } 3265 vfsp = vfsp->vfs_next; 3266 } while (vfsp != rootvfs); 3267 3268 vfs_list_unlock(); 3269 return (found); 3270 } 3271 3272 /* 3273 * Search the vfs list for a specified device. Returns a pointer to it 3274 * or NULL if no suitable entry is found. The caller of this routine 3275 * is responsible for releasing the returned vfs pointer. 3276 */ 3277 struct vfs * 3278 vfs_dev2vfsp(dev_t dev) 3279 { 3280 struct vfs *vfsp; 3281 int found; 3282 3283 vfs_list_read_lock(); 3284 vfsp = rootvfs; 3285 found = 0; 3286 do { 3287 /* 3288 * The following could be made more efficient by making 3289 * the entire loop use vfs_zone_next if the call is from 3290 * a zone. The only callers, however, ustat(2) and 3291 * umount2(2), don't seem to justify the added 3292 * complexity at present. 3293 */ 3294 if (vfsp->vfs_dev == dev && 3295 ZONE_PATH_VISIBLE(refstr_value(vfsp->vfs_mntpt), 3296 curproc->p_zone)) { 3297 VFS_HOLD(vfsp); 3298 found = 1; 3299 break; 3300 } 3301 vfsp = vfsp->vfs_next; 3302 } while (vfsp != rootvfs); 3303 vfs_list_unlock(); 3304 return (found ? vfsp: NULL); 3305 } 3306 3307 /* 3308 * Search the vfs list for a specified mntpoint. Returns a pointer to it 3309 * or NULL if no suitable entry is found. The caller of this routine 3310 * is responsible for releasing the returned vfs pointer. 3311 * 3312 * Note that if multiple mntpoints match, the last one matching is 3313 * returned in an attempt to return the "top" mount when overlay 3314 * mounts are covering the same mount point. This is accomplished by starting 3315 * at the end of the list and working our way backwards, stopping at the first 3316 * matching mount. 3317 */ 3318 struct vfs * 3319 vfs_mntpoint2vfsp(const char *mp) 3320 { 3321 struct vfs *vfsp; 3322 struct vfs *retvfsp = NULL; 3323 zone_t *zone = curproc->p_zone; 3324 struct vfs *list; 3325 3326 vfs_list_read_lock(); 3327 if (getzoneid() == GLOBAL_ZONEID) { 3328 /* 3329 * The global zone may see filesystems in any zone. 3330 */ 3331 vfsp = rootvfs->vfs_prev; 3332 do { 3333 if (strcmp(refstr_value(vfsp->vfs_mntpt), mp) == 0) { 3334 retvfsp = vfsp; 3335 break; 3336 } 3337 vfsp = vfsp->vfs_prev; 3338 } while (vfsp != rootvfs->vfs_prev); 3339 } else if ((list = zone->zone_vfslist) != NULL) { 3340 const char *mntpt; 3341 3342 vfsp = list->vfs_zone_prev; 3343 do { 3344 mntpt = refstr_value(vfsp->vfs_mntpt); 3345 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3346 if (strcmp(mntpt, mp) == 0) { 3347 retvfsp = vfsp; 3348 break; 3349 } 3350 vfsp = vfsp->vfs_zone_prev; 3351 } while (vfsp != list->vfs_zone_prev); 3352 } 3353 if (retvfsp) 3354 VFS_HOLD(retvfsp); 3355 vfs_list_unlock(); 3356 return (retvfsp); 3357 } 3358 3359 /* 3360 * Search the vfs list for a specified vfsops. 3361 * if vfs entry is found then return 1, else 0. 3362 */ 3363 int 3364 vfs_opsinuse(vfsops_t *ops) 3365 { 3366 struct vfs *vfsp; 3367 int found; 3368 3369 vfs_list_read_lock(); 3370 vfsp = rootvfs; 3371 found = 0; 3372 do { 3373 if (vfs_getops(vfsp) == ops) { 3374 found = 1; 3375 break; 3376 } 3377 vfsp = vfsp->vfs_next; 3378 } while (vfsp != rootvfs); 3379 vfs_list_unlock(); 3380 return (found); 3381 } 3382 3383 /* 3384 * Allocate an entry in vfssw for a file system type 3385 */ 3386 struct vfssw * 3387 allocate_vfssw(char *type) 3388 { 3389 struct vfssw *vswp; 3390 3391 if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) { 3392 /* 3393 * The vfssw table uses the empty string to identify an 3394 * available entry; we cannot add any type which has 3395 * a leading NUL. The string length is limited to 3396 * the size of the st_fstype array in struct stat. 3397 */ 3398 return (NULL); 3399 } 3400 3401 ASSERT(VFSSW_WRITE_LOCKED()); 3402 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) 3403 if (!ALLOCATED_VFSSW(vswp)) { 3404 vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP); 3405 (void) strcpy(vswp->vsw_name, type); 3406 ASSERT(vswp->vsw_count == 0); 3407 vswp->vsw_count = 1; 3408 mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL); 3409 return (vswp); 3410 } 3411 return (NULL); 3412 } 3413 3414 /* 3415 * Impose additional layer of translation between vfstype names 3416 * and module names in the filesystem. 3417 */ 3418 static char * 3419 vfs_to_modname(char *vfstype) 3420 { 3421 if (strcmp(vfstype, "proc") == 0) { 3422 vfstype = "procfs"; 3423 } else if (strcmp(vfstype, "fd") == 0) { 3424 vfstype = "fdfs"; 3425 } else if (strncmp(vfstype, "nfs", 3) == 0) { 3426 vfstype = "nfs"; 3427 } 3428 3429 return (vfstype); 3430 } 3431 3432 /* 3433 * Find a vfssw entry given a file system type name. 3434 * Try to autoload the filesystem if it's not found. 3435 * If it's installed, return the vfssw locked to prevent unloading. 3436 */ 3437 struct vfssw * 3438 vfs_getvfssw(char *type) 3439 { 3440 struct vfssw *vswp; 3441 char *modname; 3442 3443 RLOCK_VFSSW(); 3444 vswp = vfs_getvfsswbyname(type); 3445 modname = vfs_to_modname(type); 3446 3447 if (rootdir == NULL) { 3448 /* 3449 * If we haven't yet loaded the root file system, then our 3450 * _init won't be called until later. Allocate vfssw entry, 3451 * because mod_installfs won't be called. 3452 */ 3453 if (vswp == NULL) { 3454 RUNLOCK_VFSSW(); 3455 WLOCK_VFSSW(); 3456 if ((vswp = vfs_getvfsswbyname(type)) == NULL) { 3457 if ((vswp = allocate_vfssw(type)) == NULL) { 3458 WUNLOCK_VFSSW(); 3459 return (NULL); 3460 } 3461 } 3462 WUNLOCK_VFSSW(); 3463 RLOCK_VFSSW(); 3464 } 3465 if (!VFS_INSTALLED(vswp)) { 3466 RUNLOCK_VFSSW(); 3467 (void) modloadonly("fs", modname); 3468 } else 3469 RUNLOCK_VFSSW(); 3470 return (vswp); 3471 } 3472 3473 /* 3474 * Try to load the filesystem. Before calling modload(), we drop 3475 * our lock on the VFS switch table, and pick it up after the 3476 * module is loaded. However, there is a potential race: the 3477 * module could be unloaded after the call to modload() completes 3478 * but before we pick up the lock and drive on. Therefore, 3479 * we keep reloading the module until we've loaded the module 3480 * _and_ we have the lock on the VFS switch table. 3481 */ 3482 while (vswp == NULL || !VFS_INSTALLED(vswp)) { 3483 RUNLOCK_VFSSW(); 3484 if (modload("fs", modname) == -1) 3485 return (NULL); 3486 RLOCK_VFSSW(); 3487 if (vswp == NULL) 3488 if ((vswp = vfs_getvfsswbyname(type)) == NULL) 3489 break; 3490 } 3491 RUNLOCK_VFSSW(); 3492 3493 return (vswp); 3494 } 3495 3496 /* 3497 * Find a vfssw entry given a file system type name. 3498 */ 3499 struct vfssw * 3500 vfs_getvfsswbyname(char *type) 3501 { 3502 struct vfssw *vswp; 3503 3504 ASSERT(VFSSW_LOCKED()); 3505 if (type == NULL || *type == '\0') 3506 return (NULL); 3507 3508 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 3509 if (strcmp(type, vswp->vsw_name) == 0) { 3510 vfs_refvfssw(vswp); 3511 return (vswp); 3512 } 3513 } 3514 3515 return (NULL); 3516 } 3517 3518 /* 3519 * Find a vfssw entry given a set of vfsops. 3520 */ 3521 struct vfssw * 3522 vfs_getvfsswbyvfsops(vfsops_t *vfsops) 3523 { 3524 struct vfssw *vswp; 3525 3526 RLOCK_VFSSW(); 3527 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 3528 if (ALLOCATED_VFSSW(vswp) && &vswp->vsw_vfsops == vfsops) { 3529 vfs_refvfssw(vswp); 3530 RUNLOCK_VFSSW(); 3531 return (vswp); 3532 } 3533 } 3534 RUNLOCK_VFSSW(); 3535 3536 return (NULL); 3537 } 3538 3539 /* 3540 * Reference a vfssw entry. 3541 */ 3542 void 3543 vfs_refvfssw(struct vfssw *vswp) 3544 { 3545 3546 mutex_enter(&vswp->vsw_lock); 3547 vswp->vsw_count++; 3548 mutex_exit(&vswp->vsw_lock); 3549 } 3550 3551 /* 3552 * Unreference a vfssw entry. 3553 */ 3554 void 3555 vfs_unrefvfssw(struct vfssw *vswp) 3556 { 3557 3558 mutex_enter(&vswp->vsw_lock); 3559 vswp->vsw_count--; 3560 mutex_exit(&vswp->vsw_lock); 3561 } 3562 3563 int sync_timeout = 30; /* timeout for syncing a page during panic */ 3564 int sync_timeleft; /* portion of sync_timeout remaining */ 3565 3566 static int sync_retries = 20; /* number of retries when not making progress */ 3567 static int sync_triesleft; /* portion of sync_retries remaining */ 3568 3569 static pgcnt_t old_pgcnt, new_pgcnt; 3570 static int new_bufcnt, old_bufcnt; 3571 3572 /* 3573 * Sync all of the mounted filesystems, and then wait for the actual i/o to 3574 * complete. We wait by counting the number of dirty pages and buffers, 3575 * pushing them out using bio_busy() and page_busy(), and then counting again. 3576 * This routine is used during both the uadmin A_SHUTDOWN code as well as 3577 * the SYNC phase of the panic code (see comments in panic.c). It should only 3578 * be used after some higher-level mechanism has quiesced the system so that 3579 * new writes are not being initiated while we are waiting for completion. 3580 * 3581 * To ensure finite running time, our algorithm uses two timeout mechanisms: 3582 * sync_timeleft (a timer implemented by the omnipresent deadman() cyclic), and 3583 * sync_triesleft (a progress counter used by the vfs_syncall() loop below). 3584 * Together these ensure that syncing completes if our i/o paths are stuck. 3585 * The counters are declared above so they can be found easily in the debugger. 3586 * 3587 * The sync_timeleft counter is reset by bio_busy() and page_busy() using the 3588 * vfs_syncprogress() subroutine whenever we make progress through the lists of 3589 * pages and buffers. It is decremented and expired by the deadman() cyclic. 3590 * When vfs_syncall() decides it is done, we disable the deadman() counter by 3591 * setting sync_timeleft to zero. This timer guards against vfs_syncall() 3592 * deadlocking or hanging inside of a broken filesystem or driver routine. 3593 * 3594 * The sync_triesleft counter is updated by vfs_syncall() itself. If we make 3595 * sync_retries consecutive calls to bio_busy() and page_busy() without 3596 * decreasing either the number of dirty buffers or dirty pages below the 3597 * lowest count we have seen so far, we give up and return from vfs_syncall(). 3598 * 3599 * Each loop iteration ends with a call to delay() one second to allow time for 3600 * i/o completion and to permit the user time to read our progress messages. 3601 */ 3602 void 3603 vfs_syncall(void) 3604 { 3605 if (rootdir == NULL && !modrootloaded) 3606 return; /* panic during boot - no filesystems yet */ 3607 3608 printf("syncing file systems..."); 3609 vfs_syncprogress(); 3610 sync(); 3611 3612 vfs_syncprogress(); 3613 sync_triesleft = sync_retries; 3614 3615 old_bufcnt = new_bufcnt = INT_MAX; 3616 old_pgcnt = new_pgcnt = ULONG_MAX; 3617 3618 while (sync_triesleft > 0) { 3619 old_bufcnt = MIN(old_bufcnt, new_bufcnt); 3620 old_pgcnt = MIN(old_pgcnt, new_pgcnt); 3621 3622 new_bufcnt = bio_busy(B_TRUE); 3623 new_pgcnt = page_busy(B_TRUE); 3624 vfs_syncprogress(); 3625 3626 if (new_bufcnt == 0 && new_pgcnt == 0) 3627 break; 3628 3629 if (new_bufcnt < old_bufcnt || new_pgcnt < old_pgcnt) 3630 sync_triesleft = sync_retries; 3631 else 3632 sync_triesleft--; 3633 3634 if (new_bufcnt) 3635 printf(" [%d]", new_bufcnt); 3636 if (new_pgcnt) 3637 printf(" %lu", new_pgcnt); 3638 3639 delay(hz); 3640 } 3641 3642 if (new_bufcnt != 0 || new_pgcnt != 0) 3643 printf(" done (not all i/o completed)\n"); 3644 else 3645 printf(" done\n"); 3646 3647 sync_timeleft = 0; 3648 delay(hz); 3649 } 3650 3651 /* 3652 * If we are in the middle of the sync phase of panic, reset sync_timeleft to 3653 * sync_timeout to indicate that we are making progress and the deadman() 3654 * omnipresent cyclic should not yet time us out. Note that it is safe to 3655 * store to sync_timeleft here since the deadman() is firing at high-level 3656 * on top of us. If we are racing with the deadman(), either the deadman() 3657 * will decrement the old value and then we will reset it, or we will 3658 * reset it and then the deadman() will immediately decrement it. In either 3659 * case, correct behavior results. 3660 */ 3661 void 3662 vfs_syncprogress(void) 3663 { 3664 if (panicstr) 3665 sync_timeleft = sync_timeout; 3666 } 3667 3668 /* 3669 * Map VFS flags to statvfs flags. These shouldn't really be separate 3670 * flags at all. 3671 */ 3672 uint_t 3673 vf_to_stf(uint_t vf) 3674 { 3675 uint_t stf = 0; 3676 3677 if (vf & VFS_RDONLY) 3678 stf |= ST_RDONLY; 3679 if (vf & VFS_NOSETUID) 3680 stf |= ST_NOSUID; 3681 if (vf & VFS_NOTRUNC) 3682 stf |= ST_NOTRUNC; 3683 3684 return (stf); 3685 } 3686 3687 /* 3688 * Use old-style function prototype for vfsstray() so 3689 * that we can use it anywhere in the vfsops structure. 3690 */ 3691 int vfsstray(); 3692 3693 /* 3694 * Entries for (illegal) fstype 0. 3695 */ 3696 /* ARGSUSED */ 3697 int 3698 vfsstray_sync(struct vfs *vfsp, short arg, struct cred *cr) 3699 { 3700 cmn_err(CE_PANIC, "stray vfs operation"); 3701 return (0); 3702 } 3703 3704 vfsops_t vfs_strayops = { 3705 vfsstray, 3706 vfsstray, 3707 vfsstray, 3708 vfsstray, 3709 vfsstray_sync, 3710 vfsstray, 3711 vfsstray, 3712 vfsstray 3713 }; 3714 3715 /* 3716 * Entries for (illegal) fstype 0. 3717 */ 3718 int 3719 vfsstray(void) 3720 { 3721 cmn_err(CE_PANIC, "stray vfs operation"); 3722 return (0); 3723 } 3724 3725 /* 3726 * Support for dealing with forced UFS unmount and its interaction with 3727 * LOFS. Could be used by any filesystem. 3728 * See bug 1203132. 3729 */ 3730 int 3731 vfs_EIO(void) 3732 { 3733 return (EIO); 3734 } 3735 3736 /* 3737 * We've gotta define the op for sync separately, since the compiler gets 3738 * confused if we mix and match ANSI and normal style prototypes when 3739 * a "short" argument is present and spits out a warning. 3740 */ 3741 /*ARGSUSED*/ 3742 int 3743 vfs_EIO_sync(struct vfs *vfsp, short arg, struct cred *cr) 3744 { 3745 return (EIO); 3746 } 3747 3748 vfs_t EIO_vfs; 3749 vfsops_t *EIO_vfsops; 3750 3751 /* 3752 * Called from startup() to initialize all loaded vfs's 3753 */ 3754 void 3755 vfsinit(void) 3756 { 3757 struct vfssw *vswp; 3758 int error; 3759 extern int vopstats_enabled; 3760 extern void vopstats_startup(); 3761 3762 static const fs_operation_def_t EIO_vfsops_template[] = { 3763 VFSNAME_MOUNT, vfs_EIO, 3764 VFSNAME_UNMOUNT, vfs_EIO, 3765 VFSNAME_ROOT, vfs_EIO, 3766 VFSNAME_STATVFS, vfs_EIO, 3767 VFSNAME_SYNC, (fs_generic_func_p) vfs_EIO_sync, 3768 VFSNAME_VGET, vfs_EIO, 3769 VFSNAME_MOUNTROOT, vfs_EIO, 3770 VFSNAME_FREEVFS, vfs_EIO, 3771 VFSNAME_VNSTATE, vfs_EIO, 3772 NULL, NULL 3773 }; 3774 3775 3776 /* Initialize the vnode cache (file systems may use it during init). */ 3777 3778 vn_create_cache(); 3779 3780 /* Setup event monitor framework */ 3781 3782 fem_init(); 3783 3784 /* Initialize the dummy stray file system type. */ 3785 3786 vfssw[0].vsw_vfsops = vfs_strayops; 3787 3788 /* Initialize the dummy EIO file system. */ 3789 error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops); 3790 if (error != 0) { 3791 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template"); 3792 /* Shouldn't happen, but not bad enough to panic */ 3793 } 3794 3795 VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL); 3796 3797 /* 3798 * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup 3799 * on this vfs can immediately notice it's invalid. 3800 */ 3801 EIO_vfs.vfs_flag |= VFS_UNMOUNTED; 3802 3803 /* 3804 * Call the init routines of non-loadable filesystems only. 3805 * Filesystems which are loaded as separate modules will be 3806 * initialized by the module loading code instead. 3807 */ 3808 3809 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 3810 RLOCK_VFSSW(); 3811 if (vswp->vsw_init != NULL) 3812 (*vswp->vsw_init)(vswp - vfssw, vswp->vsw_name); 3813 RUNLOCK_VFSSW(); 3814 } 3815 3816 vopstats_startup(); 3817 3818 if (vopstats_enabled) { 3819 /* EIO_vfs can collect stats, but we don't retrieve them */ 3820 initialize_vopstats(&EIO_vfs.vfs_vopstats); 3821 EIO_vfs.vfs_fstypevsp = NULL; 3822 EIO_vfs.vfs_vskap = NULL; 3823 EIO_vfs.vfs_flag |= VFS_STATS; 3824 } 3825 } 3826 3827 /* 3828 * Increments the vfs reference count by one atomically. 3829 */ 3830 void 3831 vfs_hold(vfs_t *vfsp) 3832 { 3833 atomic_add_32(&vfsp->vfs_count, 1); 3834 ASSERT(vfsp->vfs_count != 0); 3835 } 3836 3837 /* 3838 * Decrements the vfs reference count by one atomically. When 3839 * vfs reference count becomes zero, it calls the file system 3840 * specific vfs_freevfs() to free up the resources. 3841 */ 3842 void 3843 vfs_rele(vfs_t *vfsp) 3844 { 3845 ASSERT(vfsp->vfs_count != 0); 3846 if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) { 3847 VFS_FREEVFS(vfsp); 3848 if (vfsp->vfs_zone) 3849 zone_rele(vfsp->vfs_zone); 3850 vfs_freemnttab(vfsp); 3851 if (vfsp->vfs_implp) 3852 vfsimpl_teardown(vfsp); 3853 sema_destroy(&vfsp->vfs_reflock); 3854 kmem_free(vfsp, sizeof (*vfsp)); 3855 } 3856 } 3857 3858 /* 3859 * Generic operations vector support. 3860 * 3861 * This is used to build operations vectors for both the vfs and vnode. 3862 * It's normally called only when a file system is loaded. 3863 * 3864 * There are many possible algorithms for this, including the following: 3865 * 3866 * (1) scan the list of known operations; for each, see if the file system 3867 * includes an entry for it, and fill it in as appropriate. 3868 * 3869 * (2) set up defaults for all known operations. scan the list of ops 3870 * supplied by the file system; for each which is both supplied and 3871 * known, fill it in. 3872 * 3873 * (3) sort the lists of known ops & supplied ops; scan the list, filling 3874 * in entries as we go. 3875 * 3876 * we choose (1) for simplicity, and because performance isn't critical here. 3877 * note that (2) could be sped up using a precomputed hash table on known ops. 3878 * (3) could be faster than either, but only if the lists were very large or 3879 * supplied in sorted order. 3880 * 3881 */ 3882 3883 int 3884 fs_build_vector(void *vector, int *unused_ops, 3885 const fs_operation_trans_def_t *translation, 3886 const fs_operation_def_t *operations) 3887 { 3888 int i, num_trans, num_ops, used; 3889 3890 /* Count the number of translations and the number of supplied */ 3891 /* operations. */ 3892 3893 { 3894 const fs_operation_trans_def_t *p; 3895 3896 for (num_trans = 0, p = translation; 3897 p->name != NULL; 3898 num_trans++, p++) 3899 ; 3900 } 3901 3902 { 3903 const fs_operation_def_t *p; 3904 3905 for (num_ops = 0, p = operations; 3906 p->name != NULL; 3907 num_ops++, p++) 3908 ; 3909 } 3910 3911 /* Walk through each operation known to our caller. There will be */ 3912 /* one entry in the supplied "translation table" for each. */ 3913 3914 used = 0; 3915 3916 for (i = 0; i < num_trans; i++) { 3917 int j, found; 3918 char *curname; 3919 fs_generic_func_p result; 3920 fs_generic_func_p *location; 3921 3922 curname = translation[i].name; 3923 3924 /* Look for a matching operation in the list supplied by the */ 3925 /* file system. */ 3926 3927 found = 0; 3928 3929 for (j = 0; j < num_ops; j++) { 3930 if (strcmp(operations[j].name, curname) == 0) { 3931 used++; 3932 found = 1; 3933 break; 3934 } 3935 } 3936 3937 /* If the file system is using a "placeholder" for default */ 3938 /* or error functions, grab the appropriate function out of */ 3939 /* the translation table. If the file system didn't supply */ 3940 /* this operation at all, use the default function. */ 3941 3942 if (found) { 3943 result = operations[j].func; 3944 if (result == fs_default) { 3945 result = translation[i].defaultFunc; 3946 } else if (result == fs_error) { 3947 result = translation[i].errorFunc; 3948 } else if (result == NULL) { 3949 /* Null values are PROHIBITED */ 3950 return (EINVAL); 3951 } 3952 } else { 3953 result = translation[i].defaultFunc; 3954 } 3955 3956 /* Now store the function into the operations vector. */ 3957 3958 location = (fs_generic_func_p *) 3959 (((char *)vector) + translation[i].offset); 3960 3961 *location = result; 3962 } 3963 3964 *unused_ops = num_ops - used; 3965 3966 return (0); 3967 } 3968 3969 /* Placeholder functions, should never be called. */ 3970 3971 int 3972 fs_error(void) 3973 { 3974 cmn_err(CE_PANIC, "fs_error called"); 3975 return (0); 3976 } 3977 3978 int 3979 fs_default(void) 3980 { 3981 cmn_err(CE_PANIC, "fs_default called"); 3982 return (0); 3983 } 3984 3985 #ifdef __sparc 3986 3987 /* 3988 * Part of the implementation of booting off a mirrored root 3989 * involves a change of dev_t for the root device. To 3990 * accomplish this, first remove the existing hash table 3991 * entry for the root device, convert to the new dev_t, 3992 * then re-insert in the hash table at the head of the list. 3993 */ 3994 void 3995 vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype) 3996 { 3997 vfs_list_lock(); 3998 3999 vfs_hash_remove(vfsp); 4000 4001 vfsp->vfs_dev = ndev; 4002 vfs_make_fsid(&vfsp->vfs_fsid, ndev, fstype); 4003 4004 vfs_hash_add(vfsp, 1); 4005 4006 vfs_list_unlock(); 4007 } 4008 4009 #else /* x86 NEWBOOT */ 4010 4011 int 4012 rootconf() 4013 { 4014 int error; 4015 struct vfssw *vsw; 4016 extern void pm_init(); 4017 char *fstyp; 4018 4019 fstyp = getrootfs(); 4020 4021 if (error = clboot_rootconf()) 4022 return (error); 4023 4024 if (modload("fs", fstyp) == -1) 4025 cmn_err(CE_PANIC, "Cannot _init %s module\n", fstyp); 4026 4027 RLOCK_VFSSW(); 4028 vsw = vfs_getvfsswbyname(fstyp); 4029 RUNLOCK_VFSSW(); 4030 VFS_INIT(rootvfs, &vsw->vsw_vfsops, 0); 4031 VFS_HOLD(rootvfs); 4032 4033 /* always mount readonly first */ 4034 rootvfs->vfs_flag |= VFS_RDONLY; 4035 4036 pm_init(); 4037 4038 if (netboot) 4039 (void) strplumb(); 4040 4041 error = VFS_MOUNTROOT(rootvfs, ROOT_INIT); 4042 vfs_unrefvfssw(vsw); 4043 rootdev = rootvfs->vfs_dev; 4044 4045 if (error) 4046 cmn_err(CE_PANIC, "cannot mount root path %s", svm_bootpath); 4047 return (error); 4048 } 4049 4050 /* 4051 * XXX this is called by nfs only and should probably be removed 4052 * If booted with ASKNAME, prompt on the console for a filesystem 4053 * name and return it. 4054 */ 4055 void 4056 getfsname(char *askfor, char *name, size_t namelen) 4057 { 4058 if (boothowto & RB_ASKNAME) { 4059 printf("%s name: ", askfor); 4060 console_gets(name, namelen); 4061 } 4062 } 4063 4064 /* 4065 * If server_path exists, then we are booting a diskless 4066 * client. Otherwise, we default to ufs. Zfs should perhaps be 4067 * another property. 4068 */ 4069 static char * 4070 getrootfs(void) 4071 { 4072 extern char *strplumb_get_netdev_path(void); 4073 char *propstr = NULL; 4074 4075 /* check fstype property; it should be nfsdyn for diskless */ 4076 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 4077 DDI_PROP_DONTPASS, "fstype", &propstr) 4078 == DDI_SUCCESS) { 4079 (void) strncpy(rootfs.bo_fstype, propstr, BO_MAXFSNAME); 4080 ddi_prop_free(propstr); 4081 } 4082 4083 if (strncmp(rootfs.bo_fstype, "nfs", 3) != 0) 4084 return (rootfs.bo_fstype); 4085 4086 ++netboot; 4087 /* check if path to network interface is specified in bootpath */ 4088 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 4089 DDI_PROP_DONTPASS, "bootpath", &propstr) 4090 == DDI_SUCCESS) { 4091 (void) strncpy(rootfs.bo_name, propstr, BO_MAXOBJNAME); 4092 ddi_prop_free(propstr); 4093 } else { 4094 /* attempt to determine netdev_path via boot_mac address */ 4095 netdev_path = strplumb_get_netdev_path(); 4096 if (netdev_path == NULL) 4097 cmn_err(CE_PANIC, 4098 "Cannot find boot network interface\n"); 4099 (void) strncpy(rootfs.bo_name, netdev_path, BO_MAXOBJNAME); 4100 } 4101 return ("nfs"); 4102 } 4103 #endif 4104