1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1999-2004 Poul-Henning Kamp 5 * Copyright (c) 1999 Michael Smith 6 * Copyright (c) 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/conf.h> 44 #include <sys/smp.h> 45 #include <sys/devctl.h> 46 #include <sys/eventhandler.h> 47 #include <sys/fcntl.h> 48 #include <sys/jail.h> 49 #include <sys/kernel.h> 50 #include <sys/ktr.h> 51 #include <sys/libkern.h> 52 #include <sys/limits.h> 53 #include <sys/malloc.h> 54 #include <sys/mount.h> 55 #include <sys/mutex.h> 56 #include <sys/namei.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/filedesc.h> 60 #include <sys/reboot.h> 61 #include <sys/sbuf.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysproto.h> 64 #include <sys/sx.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysent.h> 67 #include <sys/systm.h> 68 #include <sys/vnode.h> 69 #include <vm/uma.h> 70 71 #include <geom/geom.h> 72 73 #include <machine/stdarg.h> 74 75 #include <security/audit/audit.h> 76 #include <security/mac/mac_framework.h> 77 78 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 79 80 static int vfs_domount(struct thread *td, const char *fstype, char *fspath, 81 uint64_t fsflags, struct vfsoptlist **optlist); 82 static void free_mntarg(struct mntarg *ma); 83 84 static int usermount = 0; 85 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 86 "Unprivileged users may mount and unmount file systems"); 87 88 static bool default_autoro = false; 89 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0, 90 "Retry failed r/w mount as r/o if no explicit ro/rw option is specified"); 91 92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 93 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure"); 94 static uma_zone_t mount_zone; 95 96 /* List of mounted filesystems. */ 97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 98 99 /* For any iteration/modification of mountlist */ 100 struct mtx_padalign __exclusive_cache_line mountlist_mtx; 101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 102 103 EVENTHANDLER_LIST_DEFINE(vfs_mounted); 104 EVENTHANDLER_LIST_DEFINE(vfs_unmounted); 105 106 static void mount_devctl_event(const char *type, struct mount *mp, bool donew); 107 108 /* 109 * Global opts, taken by all filesystems 110 */ 111 static const char *global_opts[] = { 112 "errmsg", 113 "fstype", 114 "fspath", 115 "ro", 116 "rw", 117 "nosuid", 118 "noexec", 119 NULL 120 }; 121 122 static int 123 mount_init(void *mem, int size, int flags) 124 { 125 struct mount *mp; 126 127 mp = (struct mount *)mem; 128 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 129 mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF); 130 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); 131 mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO); 132 mp->mnt_ref = 0; 133 mp->mnt_vfs_ops = 1; 134 mp->mnt_rootvnode = NULL; 135 return (0); 136 } 137 138 static void 139 mount_fini(void *mem, int size) 140 { 141 struct mount *mp; 142 143 mp = (struct mount *)mem; 144 uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu); 145 lockdestroy(&mp->mnt_explock); 146 mtx_destroy(&mp->mnt_listmtx); 147 mtx_destroy(&mp->mnt_mtx); 148 } 149 150 static void 151 vfs_mount_init(void *dummy __unused) 152 { 153 154 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL, 155 NULL, mount_init, mount_fini, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 156 } 157 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL); 158 159 /* 160 * --------------------------------------------------------------------- 161 * Functions for building and sanitizing the mount options 162 */ 163 164 /* Remove one mount option. */ 165 static void 166 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 167 { 168 169 TAILQ_REMOVE(opts, opt, link); 170 free(opt->name, M_MOUNT); 171 if (opt->value != NULL) 172 free(opt->value, M_MOUNT); 173 free(opt, M_MOUNT); 174 } 175 176 /* Release all resources related to the mount options. */ 177 void 178 vfs_freeopts(struct vfsoptlist *opts) 179 { 180 struct vfsopt *opt; 181 182 while (!TAILQ_EMPTY(opts)) { 183 opt = TAILQ_FIRST(opts); 184 vfs_freeopt(opts, opt); 185 } 186 free(opts, M_MOUNT); 187 } 188 189 void 190 vfs_deleteopt(struct vfsoptlist *opts, const char *name) 191 { 192 struct vfsopt *opt, *temp; 193 194 if (opts == NULL) 195 return; 196 TAILQ_FOREACH_SAFE(opt, opts, link, temp) { 197 if (strcmp(opt->name, name) == 0) 198 vfs_freeopt(opts, opt); 199 } 200 } 201 202 static int 203 vfs_isopt_ro(const char *opt) 204 { 205 206 if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 || 207 strcmp(opt, "norw") == 0) 208 return (1); 209 return (0); 210 } 211 212 static int 213 vfs_isopt_rw(const char *opt) 214 { 215 216 if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0) 217 return (1); 218 return (0); 219 } 220 221 /* 222 * Check if options are equal (with or without the "no" prefix). 223 */ 224 static int 225 vfs_equalopts(const char *opt1, const char *opt2) 226 { 227 char *p; 228 229 /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 230 if (strcmp(opt1, opt2) == 0) 231 return (1); 232 /* "noopt" vs. "opt" */ 233 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 234 return (1); 235 /* "opt" vs. "noopt" */ 236 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 237 return (1); 238 while ((p = strchr(opt1, '.')) != NULL && 239 !strncmp(opt1, opt2, ++p - opt1)) { 240 opt2 += p - opt1; 241 opt1 = p; 242 /* "foo.noopt" vs. "foo.opt" */ 243 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 244 return (1); 245 /* "foo.opt" vs. "foo.noopt" */ 246 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 247 return (1); 248 } 249 /* "ro" / "rdonly" / "norw" / "rw" / "noro" */ 250 if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) && 251 (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2))) 252 return (1); 253 return (0); 254 } 255 256 /* 257 * If a mount option is specified several times, 258 * (with or without the "no" prefix) only keep 259 * the last occurrence of it. 260 */ 261 static void 262 vfs_sanitizeopts(struct vfsoptlist *opts) 263 { 264 struct vfsopt *opt, *opt2, *tmp; 265 266 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 267 opt2 = TAILQ_PREV(opt, vfsoptlist, link); 268 while (opt2 != NULL) { 269 if (vfs_equalopts(opt->name, opt2->name)) { 270 tmp = TAILQ_PREV(opt2, vfsoptlist, link); 271 vfs_freeopt(opts, opt2); 272 opt2 = tmp; 273 } else { 274 opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 275 } 276 } 277 } 278 } 279 280 /* 281 * Build a linked list of mount options from a struct uio. 282 */ 283 int 284 vfs_buildopts(struct uio *auio, struct vfsoptlist **options) 285 { 286 struct vfsoptlist *opts; 287 struct vfsopt *opt; 288 size_t memused, namelen, optlen; 289 unsigned int i, iovcnt; 290 int error; 291 292 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 293 TAILQ_INIT(opts); 294 memused = 0; 295 iovcnt = auio->uio_iovcnt; 296 for (i = 0; i < iovcnt; i += 2) { 297 namelen = auio->uio_iov[i].iov_len; 298 optlen = auio->uio_iov[i + 1].iov_len; 299 memused += sizeof(struct vfsopt) + optlen + namelen; 300 /* 301 * Avoid consuming too much memory, and attempts to overflow 302 * memused. 303 */ 304 if (memused > VFS_MOUNTARG_SIZE_MAX || 305 optlen > VFS_MOUNTARG_SIZE_MAX || 306 namelen > VFS_MOUNTARG_SIZE_MAX) { 307 error = EINVAL; 308 goto bad; 309 } 310 311 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 312 opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 313 opt->value = NULL; 314 opt->len = 0; 315 opt->pos = i / 2; 316 opt->seen = 0; 317 318 /* 319 * Do this early, so jumps to "bad" will free the current 320 * option. 321 */ 322 TAILQ_INSERT_TAIL(opts, opt, link); 323 324 if (auio->uio_segflg == UIO_SYSSPACE) { 325 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 326 } else { 327 error = copyin(auio->uio_iov[i].iov_base, opt->name, 328 namelen); 329 if (error) 330 goto bad; 331 } 332 /* Ensure names are null-terminated strings. */ 333 if (namelen == 0 || opt->name[namelen - 1] != '\0') { 334 error = EINVAL; 335 goto bad; 336 } 337 if (optlen != 0) { 338 opt->len = optlen; 339 opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 340 if (auio->uio_segflg == UIO_SYSSPACE) { 341 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 342 optlen); 343 } else { 344 error = copyin(auio->uio_iov[i + 1].iov_base, 345 opt->value, optlen); 346 if (error) 347 goto bad; 348 } 349 } 350 } 351 vfs_sanitizeopts(opts); 352 *options = opts; 353 return (0); 354 bad: 355 vfs_freeopts(opts); 356 return (error); 357 } 358 359 /* 360 * Merge the old mount options with the new ones passed 361 * in the MNT_UPDATE case. 362 * 363 * XXX: This function will keep a "nofoo" option in the new 364 * options. E.g, if the option's canonical name is "foo", 365 * "nofoo" ends up in the mount point's active options. 366 */ 367 static void 368 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts) 369 { 370 struct vfsopt *opt, *new; 371 372 TAILQ_FOREACH(opt, oldopts, link) { 373 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 374 new->name = strdup(opt->name, M_MOUNT); 375 if (opt->len != 0) { 376 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 377 bcopy(opt->value, new->value, opt->len); 378 } else 379 new->value = NULL; 380 new->len = opt->len; 381 new->seen = opt->seen; 382 TAILQ_INSERT_HEAD(toopts, new, link); 383 } 384 vfs_sanitizeopts(toopts); 385 } 386 387 /* 388 * Mount a filesystem. 389 */ 390 #ifndef _SYS_SYSPROTO_H_ 391 struct nmount_args { 392 struct iovec *iovp; 393 unsigned int iovcnt; 394 int flags; 395 }; 396 #endif 397 int 398 sys_nmount(struct thread *td, struct nmount_args *uap) 399 { 400 struct uio *auio; 401 int error; 402 u_int iovcnt; 403 uint64_t flags; 404 405 /* 406 * Mount flags are now 64-bits. On 32-bit archtectures only 407 * 32-bits are passed in, but from here on everything handles 408 * 64-bit flags correctly. 409 */ 410 flags = uap->flags; 411 412 AUDIT_ARG_FFLAGS(flags); 413 CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__, 414 uap->iovp, uap->iovcnt, flags); 415 416 /* 417 * Filter out MNT_ROOTFS. We do not want clients of nmount() in 418 * userspace to set this flag, but we must filter it out if we want 419 * MNT_UPDATE on the root file system to work. 420 * MNT_ROOTFS should only be set by the kernel when mounting its 421 * root file system. 422 */ 423 flags &= ~MNT_ROOTFS; 424 425 iovcnt = uap->iovcnt; 426 /* 427 * Check that we have an even number of iovec's 428 * and that we have at least two options. 429 */ 430 if ((iovcnt & 1) || (iovcnt < 4)) { 431 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__, 432 uap->iovcnt); 433 return (EINVAL); 434 } 435 436 error = copyinuio(uap->iovp, iovcnt, &auio); 437 if (error) { 438 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno", 439 __func__, error); 440 return (error); 441 } 442 error = vfs_donmount(td, flags, auio); 443 444 free(auio, M_IOV); 445 return (error); 446 } 447 448 /* 449 * --------------------------------------------------------------------- 450 * Various utility functions 451 */ 452 453 /* 454 * Get a reference on a mount point from a vnode. 455 * 456 * The vnode is allowed to be passed unlocked and race against dooming. Note in 457 * such case there are no guarantees the referenced mount point will still be 458 * associated with it after the function returns. 459 */ 460 struct mount * 461 vfs_ref_from_vp(struct vnode *vp) 462 { 463 struct mount *mp; 464 struct mount_pcpu *mpcpu; 465 466 mp = atomic_load_ptr(&vp->v_mount); 467 if (__predict_false(mp == NULL)) { 468 return (mp); 469 } 470 if (vfs_op_thread_enter(mp, mpcpu)) { 471 if (__predict_true(mp == vp->v_mount)) { 472 vfs_mp_count_add_pcpu(mpcpu, ref, 1); 473 vfs_op_thread_exit(mp, mpcpu); 474 } else { 475 vfs_op_thread_exit(mp, mpcpu); 476 mp = NULL; 477 } 478 } else { 479 MNT_ILOCK(mp); 480 if (mp == vp->v_mount) { 481 MNT_REF(mp); 482 MNT_IUNLOCK(mp); 483 } else { 484 MNT_IUNLOCK(mp); 485 mp = NULL; 486 } 487 } 488 return (mp); 489 } 490 491 void 492 vfs_ref(struct mount *mp) 493 { 494 struct mount_pcpu *mpcpu; 495 496 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 497 if (vfs_op_thread_enter(mp, mpcpu)) { 498 vfs_mp_count_add_pcpu(mpcpu, ref, 1); 499 vfs_op_thread_exit(mp, mpcpu); 500 return; 501 } 502 503 MNT_ILOCK(mp); 504 MNT_REF(mp); 505 MNT_IUNLOCK(mp); 506 } 507 508 struct mount * 509 vfs_pin_from_vp(struct vnode *vp) 510 { 511 struct mount *mp; 512 513 mp = atomic_load_ptr(&vp->v_mount); 514 if (mp == NULL) 515 return (NULL); 516 MNT_ILOCK(mp); 517 if (mp != vp->v_mount || (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 518 MNT_IUNLOCK(mp); 519 return (NULL); 520 } 521 MNT_REF(mp); 522 KASSERT(mp->mnt_pinned_count < INT_MAX, 523 ("mount pinned count overflow")); 524 ++mp->mnt_pinned_count; 525 MNT_IUNLOCK(mp); 526 return (mp); 527 } 528 529 void 530 vfs_unpin(struct mount *mp) 531 { 532 MNT_ILOCK(mp); 533 KASSERT(mp->mnt_pinned_count > 0, ("mount pinned count underflow")); 534 KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0, 535 ("mount pinned with pending unmount")); 536 --mp->mnt_pinned_count; 537 MNT_REL(mp); 538 MNT_IUNLOCK(mp); 539 } 540 541 void 542 vfs_rel(struct mount *mp) 543 { 544 struct mount_pcpu *mpcpu; 545 546 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 547 if (vfs_op_thread_enter(mp, mpcpu)) { 548 vfs_mp_count_sub_pcpu(mpcpu, ref, 1); 549 vfs_op_thread_exit(mp, mpcpu); 550 return; 551 } 552 553 MNT_ILOCK(mp); 554 MNT_REL(mp); 555 MNT_IUNLOCK(mp); 556 } 557 558 /* 559 * Allocate and initialize the mount point struct. 560 */ 561 struct mount * 562 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, 563 struct ucred *cred) 564 { 565 struct mount *mp; 566 567 mp = uma_zalloc(mount_zone, M_WAITOK); 568 bzero(&mp->mnt_startzero, 569 __rangeof(struct mount, mnt_startzero, mnt_endzero)); 570 mp->mnt_kern_flag = 0; 571 mp->mnt_flag = 0; 572 mp->mnt_rootvnode = NULL; 573 mp->mnt_vnodecovered = NULL; 574 mp->mnt_op = NULL; 575 mp->mnt_vfc = NULL; 576 TAILQ_INIT(&mp->mnt_nvnodelist); 577 mp->mnt_nvnodelistsize = 0; 578 TAILQ_INIT(&mp->mnt_lazyvnodelist); 579 mp->mnt_lazyvnodelistsize = 0; 580 if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 || 581 mp->mnt_writeopcount != 0) 582 panic("%s: non-zero counters on new mp %p\n", __func__, mp); 583 if (mp->mnt_vfs_ops != 1) 584 panic("%s: vfs_ops should be 1 but %d found\n", __func__, 585 mp->mnt_vfs_ops); 586 (void) vfs_busy(mp, MBF_NOWAIT); 587 atomic_add_acq_int(&vfsp->vfc_refcount, 1); 588 mp->mnt_op = vfsp->vfc_vfsops; 589 mp->mnt_vfc = vfsp; 590 mp->mnt_stat.f_type = vfsp->vfc_typenum; 591 mp->mnt_gen++; 592 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 593 mp->mnt_vnodecovered = vp; 594 mp->mnt_cred = crdup(cred); 595 mp->mnt_stat.f_owner = cred->cr_uid; 596 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 597 mp->mnt_iosize_max = DFLTPHYS; 598 #ifdef MAC 599 mac_mount_init(mp); 600 mac_mount_create(cred, mp); 601 #endif 602 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 603 TAILQ_INIT(&mp->mnt_uppers); 604 mp->mnt_pinned_count = 0; 605 return (mp); 606 } 607 608 /* 609 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 610 */ 611 void 612 vfs_mount_destroy(struct mount *mp) 613 { 614 615 if (mp->mnt_vfs_ops == 0) 616 panic("%s: entered with zero vfs_ops\n", __func__); 617 618 vfs_assert_mount_counters(mp); 619 620 MNT_ILOCK(mp); 621 mp->mnt_kern_flag |= MNTK_REFEXPIRE; 622 if (mp->mnt_kern_flag & MNTK_MWAIT) { 623 mp->mnt_kern_flag &= ~MNTK_MWAIT; 624 wakeup(mp); 625 } 626 while (mp->mnt_ref) 627 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); 628 KASSERT(mp->mnt_ref == 0, 629 ("%s: invalid refcount in the drain path @ %s:%d", __func__, 630 __FILE__, __LINE__)); 631 if (mp->mnt_writeopcount != 0) 632 panic("vfs_mount_destroy: nonzero writeopcount"); 633 if (mp->mnt_secondary_writes != 0) 634 panic("vfs_mount_destroy: nonzero secondary_writes"); 635 atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1); 636 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { 637 struct vnode *vp; 638 639 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) 640 vn_printf(vp, "dangling vnode "); 641 panic("unmount: dangling vnode"); 642 } 643 KASSERT(mp->mnt_pinned_count == 0, 644 ("mnt_pinned_count = %d", mp->mnt_pinned_count)); 645 KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); 646 if (mp->mnt_nvnodelistsize != 0) 647 panic("vfs_mount_destroy: nonzero nvnodelistsize"); 648 if (mp->mnt_lazyvnodelistsize != 0) 649 panic("vfs_mount_destroy: nonzero lazyvnodelistsize"); 650 if (mp->mnt_lockref != 0) 651 panic("vfs_mount_destroy: nonzero lock refcount"); 652 MNT_IUNLOCK(mp); 653 654 if (mp->mnt_vfs_ops != 1) 655 panic("%s: vfs_ops should be 1 but %d found\n", __func__, 656 mp->mnt_vfs_ops); 657 658 if (mp->mnt_rootvnode != NULL) 659 panic("%s: mount point still has a root vnode %p\n", __func__, 660 mp->mnt_rootvnode); 661 662 if (mp->mnt_vnodecovered != NULL) 663 vrele(mp->mnt_vnodecovered); 664 #ifdef MAC 665 mac_mount_destroy(mp); 666 #endif 667 if (mp->mnt_opt != NULL) 668 vfs_freeopts(mp->mnt_opt); 669 crfree(mp->mnt_cred); 670 uma_zfree(mount_zone, mp); 671 } 672 673 static bool 674 vfs_should_downgrade_to_ro_mount(uint64_t fsflags, int error) 675 { 676 /* This is an upgrade of an exisiting mount. */ 677 if ((fsflags & MNT_UPDATE) != 0) 678 return (false); 679 /* This is already an R/O mount. */ 680 if ((fsflags & MNT_RDONLY) != 0) 681 return (false); 682 683 switch (error) { 684 case ENODEV: /* generic, geom, ... */ 685 case EACCES: /* cam/scsi, ... */ 686 case EROFS: /* md, mmcsd, ... */ 687 /* 688 * These errors can be returned by the storage layer to signal 689 * that the media is read-only. No harm in the R/O mount 690 * attempt if the error was returned for some other reason. 691 */ 692 return (true); 693 default: 694 return (false); 695 } 696 } 697 698 int 699 vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions) 700 { 701 struct vfsoptlist *optlist; 702 struct vfsopt *opt, *tmp_opt; 703 char *fstype, *fspath, *errmsg; 704 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 705 bool autoro; 706 707 errmsg = fspath = NULL; 708 errmsg_len = fspathlen = 0; 709 errmsg_pos = -1; 710 autoro = default_autoro; 711 712 error = vfs_buildopts(fsoptions, &optlist); 713 if (error) 714 return (error); 715 716 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 717 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 718 719 /* 720 * We need these two options before the others, 721 * and they are mandatory for any filesystem. 722 * Ensure they are NUL terminated as well. 723 */ 724 fstypelen = 0; 725 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 726 if (error || fstypelen <= 0 || fstype[fstypelen - 1] != '\0') { 727 error = EINVAL; 728 if (errmsg != NULL) 729 strncpy(errmsg, "Invalid fstype", errmsg_len); 730 goto bail; 731 } 732 fspathlen = 0; 733 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 734 if (error || fspathlen <= 0 || fspath[fspathlen - 1] != '\0') { 735 error = EINVAL; 736 if (errmsg != NULL) 737 strncpy(errmsg, "Invalid fspath", errmsg_len); 738 goto bail; 739 } 740 741 /* 742 * We need to see if we have the "update" option 743 * before we call vfs_domount(), since vfs_domount() has special 744 * logic based on MNT_UPDATE. This is very important 745 * when we want to update the root filesystem. 746 */ 747 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { 748 int do_freeopt = 0; 749 750 if (strcmp(opt->name, "update") == 0) { 751 fsflags |= MNT_UPDATE; 752 do_freeopt = 1; 753 } 754 else if (strcmp(opt->name, "async") == 0) 755 fsflags |= MNT_ASYNC; 756 else if (strcmp(opt->name, "force") == 0) { 757 fsflags |= MNT_FORCE; 758 do_freeopt = 1; 759 } 760 else if (strcmp(opt->name, "reload") == 0) { 761 fsflags |= MNT_RELOAD; 762 do_freeopt = 1; 763 } 764 else if (strcmp(opt->name, "multilabel") == 0) 765 fsflags |= MNT_MULTILABEL; 766 else if (strcmp(opt->name, "noasync") == 0) 767 fsflags &= ~MNT_ASYNC; 768 else if (strcmp(opt->name, "noatime") == 0) 769 fsflags |= MNT_NOATIME; 770 else if (strcmp(opt->name, "atime") == 0) { 771 free(opt->name, M_MOUNT); 772 opt->name = strdup("nonoatime", M_MOUNT); 773 } 774 else if (strcmp(opt->name, "noclusterr") == 0) 775 fsflags |= MNT_NOCLUSTERR; 776 else if (strcmp(opt->name, "clusterr") == 0) { 777 free(opt->name, M_MOUNT); 778 opt->name = strdup("nonoclusterr", M_MOUNT); 779 } 780 else if (strcmp(opt->name, "noclusterw") == 0) 781 fsflags |= MNT_NOCLUSTERW; 782 else if (strcmp(opt->name, "clusterw") == 0) { 783 free(opt->name, M_MOUNT); 784 opt->name = strdup("nonoclusterw", M_MOUNT); 785 } 786 else if (strcmp(opt->name, "noexec") == 0) 787 fsflags |= MNT_NOEXEC; 788 else if (strcmp(opt->name, "exec") == 0) { 789 free(opt->name, M_MOUNT); 790 opt->name = strdup("nonoexec", M_MOUNT); 791 } 792 else if (strcmp(opt->name, "nosuid") == 0) 793 fsflags |= MNT_NOSUID; 794 else if (strcmp(opt->name, "suid") == 0) { 795 free(opt->name, M_MOUNT); 796 opt->name = strdup("nonosuid", M_MOUNT); 797 } 798 else if (strcmp(opt->name, "nosymfollow") == 0) 799 fsflags |= MNT_NOSYMFOLLOW; 800 else if (strcmp(opt->name, "symfollow") == 0) { 801 free(opt->name, M_MOUNT); 802 opt->name = strdup("nonosymfollow", M_MOUNT); 803 } 804 else if (strcmp(opt->name, "noro") == 0) { 805 fsflags &= ~MNT_RDONLY; 806 autoro = false; 807 } 808 else if (strcmp(opt->name, "rw") == 0) { 809 fsflags &= ~MNT_RDONLY; 810 autoro = false; 811 } 812 else if (strcmp(opt->name, "ro") == 0) { 813 fsflags |= MNT_RDONLY; 814 autoro = false; 815 } 816 else if (strcmp(opt->name, "rdonly") == 0) { 817 free(opt->name, M_MOUNT); 818 opt->name = strdup("ro", M_MOUNT); 819 fsflags |= MNT_RDONLY; 820 autoro = false; 821 } 822 else if (strcmp(opt->name, "autoro") == 0) { 823 do_freeopt = 1; 824 autoro = true; 825 } 826 else if (strcmp(opt->name, "suiddir") == 0) 827 fsflags |= MNT_SUIDDIR; 828 else if (strcmp(opt->name, "sync") == 0) 829 fsflags |= MNT_SYNCHRONOUS; 830 else if (strcmp(opt->name, "union") == 0) 831 fsflags |= MNT_UNION; 832 else if (strcmp(opt->name, "automounted") == 0) { 833 fsflags |= MNT_AUTOMOUNTED; 834 do_freeopt = 1; 835 } else if (strcmp(opt->name, "nocover") == 0) { 836 fsflags |= MNT_NOCOVER; 837 do_freeopt = 1; 838 } else if (strcmp(opt->name, "cover") == 0) { 839 fsflags &= ~MNT_NOCOVER; 840 do_freeopt = 1; 841 } else if (strcmp(opt->name, "emptydir") == 0) { 842 fsflags |= MNT_EMPTYDIR; 843 do_freeopt = 1; 844 } else if (strcmp(opt->name, "noemptydir") == 0) { 845 fsflags &= ~MNT_EMPTYDIR; 846 do_freeopt = 1; 847 } 848 if (do_freeopt) 849 vfs_freeopt(optlist, opt); 850 } 851 852 /* 853 * Be ultra-paranoid about making sure the type and fspath 854 * variables will fit in our mp buffers, including the 855 * terminating NUL. 856 */ 857 if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) { 858 error = ENAMETOOLONG; 859 goto bail; 860 } 861 862 error = vfs_domount(td, fstype, fspath, fsflags, &optlist); 863 864 /* 865 * See if we can mount in the read-only mode if the error code suggests 866 * that it could be possible and the mount options allow for that. 867 * Never try it if "[no]{ro|rw}" has been explicitly requested and not 868 * overridden by "autoro". 869 */ 870 if (autoro && vfs_should_downgrade_to_ro_mount(fsflags, error)) { 871 printf("%s: R/W mount failed, possibly R/O media," 872 " trying R/O mount\n", __func__); 873 fsflags |= MNT_RDONLY; 874 error = vfs_domount(td, fstype, fspath, fsflags, &optlist); 875 } 876 bail: 877 /* copyout the errmsg */ 878 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 879 && errmsg_len > 0 && errmsg != NULL) { 880 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 881 bcopy(errmsg, 882 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 883 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 884 } else { 885 copyout(errmsg, 886 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 887 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 888 } 889 } 890 891 if (optlist != NULL) 892 vfs_freeopts(optlist); 893 return (error); 894 } 895 896 /* 897 * Old mount API. 898 */ 899 #ifndef _SYS_SYSPROTO_H_ 900 struct mount_args { 901 char *type; 902 char *path; 903 int flags; 904 caddr_t data; 905 }; 906 #endif 907 /* ARGSUSED */ 908 int 909 sys_mount(struct thread *td, struct mount_args *uap) 910 { 911 char *fstype; 912 struct vfsconf *vfsp = NULL; 913 struct mntarg *ma = NULL; 914 uint64_t flags; 915 int error; 916 917 /* 918 * Mount flags are now 64-bits. On 32-bit architectures only 919 * 32-bits are passed in, but from here on everything handles 920 * 64-bit flags correctly. 921 */ 922 flags = uap->flags; 923 924 AUDIT_ARG_FFLAGS(flags); 925 926 /* 927 * Filter out MNT_ROOTFS. We do not want clients of mount() in 928 * userspace to set this flag, but we must filter it out if we want 929 * MNT_UPDATE on the root file system to work. 930 * MNT_ROOTFS should only be set by the kernel when mounting its 931 * root file system. 932 */ 933 flags &= ~MNT_ROOTFS; 934 935 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 936 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 937 if (error) { 938 free(fstype, M_TEMP); 939 return (error); 940 } 941 942 AUDIT_ARG_TEXT(fstype); 943 vfsp = vfs_byname_kld(fstype, td, &error); 944 free(fstype, M_TEMP); 945 if (vfsp == NULL) 946 return (ENOENT); 947 if (((vfsp->vfc_flags & VFCF_SBDRY) != 0 && 948 vfsp->vfc_vfsops_sd->vfs_cmount == NULL) || 949 ((vfsp->vfc_flags & VFCF_SBDRY) == 0 && 950 vfsp->vfc_vfsops->vfs_cmount == NULL)) 951 return (EOPNOTSUPP); 952 953 ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN); 954 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 955 ma = mount_argb(ma, flags & MNT_RDONLY, "noro"); 956 ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid"); 957 ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec"); 958 959 if ((vfsp->vfc_flags & VFCF_SBDRY) != 0) 960 return (vfsp->vfc_vfsops_sd->vfs_cmount(ma, uap->data, flags)); 961 return (vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags)); 962 } 963 964 /* 965 * vfs_domount_first(): first file system mount (not update) 966 */ 967 static int 968 vfs_domount_first( 969 struct thread *td, /* Calling thread. */ 970 struct vfsconf *vfsp, /* File system type. */ 971 char *fspath, /* Mount path. */ 972 struct vnode *vp, /* Vnode to be covered. */ 973 uint64_t fsflags, /* Flags common to all filesystems. */ 974 struct vfsoptlist **optlist /* Options local to the filesystem. */ 975 ) 976 { 977 struct vattr va; 978 struct mount *mp; 979 struct vnode *newdp, *rootvp; 980 int error, error1; 981 bool unmounted; 982 983 ASSERT_VOP_ELOCKED(vp, __func__); 984 KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here")); 985 986 if ((fsflags & MNT_EMPTYDIR) != 0) { 987 error = vfs_emptydir(vp); 988 if (error != 0) { 989 vput(vp); 990 return (error); 991 } 992 } 993 994 /* 995 * If the jail of the calling thread lacks permission for this type of 996 * file system, or is trying to cover its own root, deny immediately. 997 */ 998 if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred, 999 vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) { 1000 vput(vp); 1001 return (EPERM); 1002 } 1003 1004 /* 1005 * If the user is not root, ensure that they own the directory 1006 * onto which we are attempting to mount. 1007 */ 1008 error = VOP_GETATTR(vp, &va, td->td_ucred); 1009 if (error == 0 && va.va_uid != td->td_ucred->cr_uid) 1010 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN); 1011 if (error == 0) 1012 error = vinvalbuf(vp, V_SAVE, 0, 0); 1013 if (error == 0 && vp->v_type != VDIR) 1014 error = ENOTDIR; 1015 if (error == 0) { 1016 VI_LOCK(vp); 1017 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) 1018 vp->v_iflag |= VI_MOUNT; 1019 else 1020 error = EBUSY; 1021 VI_UNLOCK(vp); 1022 } 1023 if (error != 0) { 1024 vput(vp); 1025 return (error); 1026 } 1027 vn_seqc_write_begin(vp); 1028 VOP_UNLOCK(vp); 1029 1030 /* Allocate and initialize the filesystem. */ 1031 mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); 1032 /* XXXMAC: pass to vfs_mount_alloc? */ 1033 mp->mnt_optnew = *optlist; 1034 /* Set the mount level flags. */ 1035 mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY)); 1036 1037 /* 1038 * Mount the filesystem. 1039 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 1040 * get. No freeing of cn_pnbuf. 1041 */ 1042 error1 = 0; 1043 unmounted = true; 1044 if ((error = VFS_MOUNT(mp)) != 0 || 1045 (error1 = VFS_STATFS(mp, &mp->mnt_stat)) != 0 || 1046 (error1 = VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) != 0) { 1047 rootvp = NULL; 1048 if (error1 != 0) { 1049 MPASS(error == 0); 1050 rootvp = vfs_cache_root_clear(mp); 1051 if (rootvp != NULL) { 1052 vhold(rootvp); 1053 vrele(rootvp); 1054 } 1055 (void)vn_start_write(NULL, &mp, V_WAIT); 1056 MNT_ILOCK(mp); 1057 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_UNMOUNTF; 1058 MNT_IUNLOCK(mp); 1059 VFS_PURGE(mp); 1060 error = VFS_UNMOUNT(mp, 0); 1061 vn_finished_write(mp); 1062 if (error != 0) { 1063 printf( 1064 "failed post-mount (%d): rollback unmount returned %d\n", 1065 error1, error); 1066 unmounted = false; 1067 } 1068 error = error1; 1069 } 1070 vfs_unbusy(mp); 1071 mp->mnt_vnodecovered = NULL; 1072 if (unmounted) { 1073 /* XXXKIB wait for mnt_lockref drain? */ 1074 vfs_mount_destroy(mp); 1075 } 1076 VI_LOCK(vp); 1077 vp->v_iflag &= ~VI_MOUNT; 1078 VI_UNLOCK(vp); 1079 if (rootvp != NULL) { 1080 vn_seqc_write_end(rootvp); 1081 vdrop(rootvp); 1082 } 1083 vn_seqc_write_end(vp); 1084 vrele(vp); 1085 return (error); 1086 } 1087 vn_seqc_write_begin(newdp); 1088 VOP_UNLOCK(newdp); 1089 1090 if (mp->mnt_opt != NULL) 1091 vfs_freeopts(mp->mnt_opt); 1092 mp->mnt_opt = mp->mnt_optnew; 1093 *optlist = NULL; 1094 1095 /* 1096 * Prevent external consumers of mount options from reading mnt_optnew. 1097 */ 1098 mp->mnt_optnew = NULL; 1099 1100 MNT_ILOCK(mp); 1101 if ((mp->mnt_flag & MNT_ASYNC) != 0 && 1102 (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) 1103 mp->mnt_kern_flag |= MNTK_ASYNC; 1104 else 1105 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1106 MNT_IUNLOCK(mp); 1107 1108 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1109 cache_purge(vp); 1110 VI_LOCK(vp); 1111 vp->v_iflag &= ~VI_MOUNT; 1112 vn_irflag_set_locked(vp, VIRF_MOUNTPOINT); 1113 vp->v_mountedhere = mp; 1114 VI_UNLOCK(vp); 1115 /* Place the new filesystem at the end of the mount list. */ 1116 mtx_lock(&mountlist_mtx); 1117 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1118 mtx_unlock(&mountlist_mtx); 1119 vfs_event_signal(NULL, VQ_MOUNT, 0); 1120 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 1121 VOP_UNLOCK(vp); 1122 EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td); 1123 VOP_UNLOCK(newdp); 1124 mount_devctl_event("MOUNT", mp, false); 1125 mountcheckdirs(vp, newdp); 1126 vn_seqc_write_end(vp); 1127 vn_seqc_write_end(newdp); 1128 vrele(newdp); 1129 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1130 vfs_allocate_syncvnode(mp); 1131 vfs_op_exit(mp); 1132 vfs_unbusy(mp); 1133 return (0); 1134 } 1135 1136 /* 1137 * vfs_domount_update(): update of mounted file system 1138 */ 1139 static int 1140 vfs_domount_update( 1141 struct thread *td, /* Calling thread. */ 1142 struct vnode *vp, /* Mount point vnode. */ 1143 uint64_t fsflags, /* Flags common to all filesystems. */ 1144 struct vfsoptlist **optlist /* Options local to the filesystem. */ 1145 ) 1146 { 1147 struct export_args export; 1148 struct o2export_args o2export; 1149 struct vnode *rootvp; 1150 void *bufp; 1151 struct mount *mp; 1152 int error, export_error, i, len; 1153 uint64_t flag; 1154 gid_t *grps; 1155 1156 ASSERT_VOP_ELOCKED(vp, __func__); 1157 KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here")); 1158 mp = vp->v_mount; 1159 1160 if ((vp->v_vflag & VV_ROOT) == 0) { 1161 if (vfs_copyopt(*optlist, "export", &export, sizeof(export)) 1162 == 0) 1163 error = EXDEV; 1164 else 1165 error = EINVAL; 1166 vput(vp); 1167 return (error); 1168 } 1169 1170 /* 1171 * We only allow the filesystem to be reloaded if it 1172 * is currently mounted read-only. 1173 */ 1174 flag = mp->mnt_flag; 1175 if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) { 1176 vput(vp); 1177 return (EOPNOTSUPP); /* Needs translation */ 1178 } 1179 /* 1180 * Only privileged root, or (if MNT_USER is set) the user that 1181 * did the original mount is permitted to update it. 1182 */ 1183 error = vfs_suser(mp, td); 1184 if (error != 0) { 1185 vput(vp); 1186 return (error); 1187 } 1188 if (vfs_busy(mp, MBF_NOWAIT)) { 1189 vput(vp); 1190 return (EBUSY); 1191 } 1192 VI_LOCK(vp); 1193 if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { 1194 VI_UNLOCK(vp); 1195 vfs_unbusy(mp); 1196 vput(vp); 1197 return (EBUSY); 1198 } 1199 vp->v_iflag |= VI_MOUNT; 1200 VI_UNLOCK(vp); 1201 VOP_UNLOCK(vp); 1202 1203 vfs_op_enter(mp); 1204 vn_seqc_write_begin(vp); 1205 1206 rootvp = NULL; 1207 MNT_ILOCK(mp); 1208 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { 1209 MNT_IUNLOCK(mp); 1210 error = EBUSY; 1211 goto end; 1212 } 1213 mp->mnt_flag &= ~MNT_UPDATEMASK; 1214 mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | 1215 MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); 1216 if ((mp->mnt_flag & MNT_ASYNC) == 0) 1217 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1218 rootvp = vfs_cache_root_clear(mp); 1219 MNT_IUNLOCK(mp); 1220 mp->mnt_optnew = *optlist; 1221 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 1222 1223 /* 1224 * Mount the filesystem. 1225 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 1226 * get. No freeing of cn_pnbuf. 1227 */ 1228 error = VFS_MOUNT(mp); 1229 1230 export_error = 0; 1231 /* Process the export option. */ 1232 if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp, 1233 &len) == 0) { 1234 /* Assume that there is only 1 ABI for each length. */ 1235 switch (len) { 1236 case (sizeof(struct oexport_args)): 1237 bzero(&o2export, sizeof(o2export)); 1238 /* FALLTHROUGH */ 1239 case (sizeof(o2export)): 1240 bcopy(bufp, &o2export, len); 1241 export.ex_flags = (uint64_t)o2export.ex_flags; 1242 export.ex_root = o2export.ex_root; 1243 export.ex_uid = o2export.ex_anon.cr_uid; 1244 export.ex_groups = NULL; 1245 export.ex_ngroups = o2export.ex_anon.cr_ngroups; 1246 if (export.ex_ngroups > 0) { 1247 if (export.ex_ngroups <= XU_NGROUPS) { 1248 export.ex_groups = malloc( 1249 export.ex_ngroups * sizeof(gid_t), 1250 M_TEMP, M_WAITOK); 1251 for (i = 0; i < export.ex_ngroups; i++) 1252 export.ex_groups[i] = 1253 o2export.ex_anon.cr_groups[i]; 1254 } else 1255 export_error = EINVAL; 1256 } else if (export.ex_ngroups < 0) 1257 export_error = EINVAL; 1258 export.ex_addr = o2export.ex_addr; 1259 export.ex_addrlen = o2export.ex_addrlen; 1260 export.ex_mask = o2export.ex_mask; 1261 export.ex_masklen = o2export.ex_masklen; 1262 export.ex_indexfile = o2export.ex_indexfile; 1263 export.ex_numsecflavors = o2export.ex_numsecflavors; 1264 if (export.ex_numsecflavors < MAXSECFLAVORS) { 1265 for (i = 0; i < export.ex_numsecflavors; i++) 1266 export.ex_secflavors[i] = 1267 o2export.ex_secflavors[i]; 1268 } else 1269 export_error = EINVAL; 1270 if (export_error == 0) 1271 export_error = vfs_export(mp, &export); 1272 free(export.ex_groups, M_TEMP); 1273 break; 1274 case (sizeof(export)): 1275 bcopy(bufp, &export, len); 1276 grps = NULL; 1277 if (export.ex_ngroups > 0) { 1278 if (export.ex_ngroups <= NGROUPS_MAX) { 1279 grps = malloc(export.ex_ngroups * 1280 sizeof(gid_t), M_TEMP, M_WAITOK); 1281 export_error = copyin(export.ex_groups, 1282 grps, export.ex_ngroups * 1283 sizeof(gid_t)); 1284 if (export_error == 0) 1285 export.ex_groups = grps; 1286 } else 1287 export_error = EINVAL; 1288 } else if (export.ex_ngroups == 0) 1289 export.ex_groups = NULL; 1290 else 1291 export_error = EINVAL; 1292 if (export_error == 0) 1293 export_error = vfs_export(mp, &export); 1294 free(grps, M_TEMP); 1295 break; 1296 default: 1297 export_error = EINVAL; 1298 break; 1299 } 1300 } 1301 1302 MNT_ILOCK(mp); 1303 if (error == 0) { 1304 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | 1305 MNT_SNAPSHOT); 1306 } else { 1307 /* 1308 * If we fail, restore old mount flags. MNT_QUOTA is special, 1309 * because it is not part of MNT_UPDATEMASK, but it could have 1310 * changed in the meantime if quotactl(2) was called. 1311 * All in all we want current value of MNT_QUOTA, not the old 1312 * one. 1313 */ 1314 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 1315 } 1316 if ((mp->mnt_flag & MNT_ASYNC) != 0 && 1317 (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) 1318 mp->mnt_kern_flag |= MNTK_ASYNC; 1319 else 1320 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1321 MNT_IUNLOCK(mp); 1322 1323 if (error != 0) 1324 goto end; 1325 1326 mount_devctl_event("REMOUNT", mp, true); 1327 if (mp->mnt_opt != NULL) 1328 vfs_freeopts(mp->mnt_opt); 1329 mp->mnt_opt = mp->mnt_optnew; 1330 *optlist = NULL; 1331 (void)VFS_STATFS(mp, &mp->mnt_stat); 1332 /* 1333 * Prevent external consumers of mount options from reading 1334 * mnt_optnew. 1335 */ 1336 mp->mnt_optnew = NULL; 1337 1338 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1339 vfs_allocate_syncvnode(mp); 1340 else 1341 vfs_deallocate_syncvnode(mp); 1342 end: 1343 vfs_op_exit(mp); 1344 if (rootvp != NULL) { 1345 vn_seqc_write_end(rootvp); 1346 vrele(rootvp); 1347 } 1348 vn_seqc_write_end(vp); 1349 vfs_unbusy(mp); 1350 VI_LOCK(vp); 1351 vp->v_iflag &= ~VI_MOUNT; 1352 VI_UNLOCK(vp); 1353 vrele(vp); 1354 return (error != 0 ? error : export_error); 1355 } 1356 1357 /* 1358 * vfs_domount(): actually attempt a filesystem mount. 1359 */ 1360 static int 1361 vfs_domount( 1362 struct thread *td, /* Calling thread. */ 1363 const char *fstype, /* Filesystem type. */ 1364 char *fspath, /* Mount path. */ 1365 uint64_t fsflags, /* Flags common to all filesystems. */ 1366 struct vfsoptlist **optlist /* Options local to the filesystem. */ 1367 ) 1368 { 1369 struct vfsconf *vfsp; 1370 struct nameidata nd; 1371 struct vnode *vp; 1372 char *pathbuf; 1373 int error; 1374 1375 /* 1376 * Be ultra-paranoid about making sure the type and fspath 1377 * variables will fit in our mp buffers, including the 1378 * terminating NUL. 1379 */ 1380 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 1381 return (ENAMETOOLONG); 1382 1383 if (jailed(td->td_ucred) || usermount == 0) { 1384 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 1385 return (error); 1386 } 1387 1388 /* 1389 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 1390 */ 1391 if (fsflags & MNT_EXPORTED) { 1392 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); 1393 if (error) 1394 return (error); 1395 } 1396 if (fsflags & MNT_SUIDDIR) { 1397 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); 1398 if (error) 1399 return (error); 1400 } 1401 /* 1402 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. 1403 */ 1404 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { 1405 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) 1406 fsflags |= MNT_NOSUID | MNT_USER; 1407 } 1408 1409 /* Load KLDs before we lock the covered vnode to avoid reversals. */ 1410 vfsp = NULL; 1411 if ((fsflags & MNT_UPDATE) == 0) { 1412 /* Don't try to load KLDs if we're mounting the root. */ 1413 if (fsflags & MNT_ROOTFS) 1414 vfsp = vfs_byname(fstype); 1415 else 1416 vfsp = vfs_byname_kld(fstype, td, &error); 1417 if (vfsp == NULL) 1418 return (ENODEV); 1419 } 1420 1421 /* 1422 * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE. 1423 */ 1424 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 1425 UIO_SYSSPACE, fspath, td); 1426 error = namei(&nd); 1427 if (error != 0) 1428 return (error); 1429 NDFREE(&nd, NDF_ONLY_PNBUF); 1430 vp = nd.ni_vp; 1431 if ((fsflags & MNT_UPDATE) == 0) { 1432 if ((vp->v_vflag & VV_ROOT) != 0 && 1433 (fsflags & MNT_NOCOVER) != 0) { 1434 vput(vp); 1435 return (EBUSY); 1436 } 1437 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1438 strcpy(pathbuf, fspath); 1439 error = vn_path_to_global_path(td, vp, pathbuf, MNAMELEN); 1440 if (error == 0) { 1441 error = vfs_domount_first(td, vfsp, pathbuf, vp, 1442 fsflags, optlist); 1443 } 1444 free(pathbuf, M_TEMP); 1445 } else 1446 error = vfs_domount_update(td, vp, fsflags, optlist); 1447 1448 return (error); 1449 } 1450 1451 /* 1452 * Unmount a filesystem. 1453 * 1454 * Note: unmount takes a path to the vnode mounted on as argument, not 1455 * special file (as before). 1456 */ 1457 #ifndef _SYS_SYSPROTO_H_ 1458 struct unmount_args { 1459 char *path; 1460 int flags; 1461 }; 1462 #endif 1463 /* ARGSUSED */ 1464 int 1465 sys_unmount(struct thread *td, struct unmount_args *uap) 1466 { 1467 1468 return (kern_unmount(td, uap->path, uap->flags)); 1469 } 1470 1471 int 1472 kern_unmount(struct thread *td, const char *path, int flags) 1473 { 1474 struct nameidata nd; 1475 struct mount *mp; 1476 char *pathbuf; 1477 int error, id0, id1; 1478 1479 AUDIT_ARG_VALUE(flags); 1480 if (jailed(td->td_ucred) || usermount == 0) { 1481 error = priv_check(td, PRIV_VFS_UNMOUNT); 1482 if (error) 1483 return (error); 1484 } 1485 1486 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1487 error = copyinstr(path, pathbuf, MNAMELEN, NULL); 1488 if (error) { 1489 free(pathbuf, M_TEMP); 1490 return (error); 1491 } 1492 if (flags & MNT_BYFSID) { 1493 AUDIT_ARG_TEXT(pathbuf); 1494 /* Decode the filesystem ID. */ 1495 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 1496 free(pathbuf, M_TEMP); 1497 return (EINVAL); 1498 } 1499 1500 mtx_lock(&mountlist_mtx); 1501 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1502 if (mp->mnt_stat.f_fsid.val[0] == id0 && 1503 mp->mnt_stat.f_fsid.val[1] == id1) { 1504 vfs_ref(mp); 1505 break; 1506 } 1507 } 1508 mtx_unlock(&mountlist_mtx); 1509 } else { 1510 /* 1511 * Try to find global path for path argument. 1512 */ 1513 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 1514 UIO_SYSSPACE, pathbuf, td); 1515 if (namei(&nd) == 0) { 1516 NDFREE(&nd, NDF_ONLY_PNBUF); 1517 error = vn_path_to_global_path(td, nd.ni_vp, pathbuf, 1518 MNAMELEN); 1519 if (error == 0) 1520 vput(nd.ni_vp); 1521 } 1522 mtx_lock(&mountlist_mtx); 1523 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1524 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) { 1525 vfs_ref(mp); 1526 break; 1527 } 1528 } 1529 mtx_unlock(&mountlist_mtx); 1530 } 1531 free(pathbuf, M_TEMP); 1532 if (mp == NULL) { 1533 /* 1534 * Previously we returned ENOENT for a nonexistent path and 1535 * EINVAL for a non-mountpoint. We cannot tell these apart 1536 * now, so in the !MNT_BYFSID case return the more likely 1537 * EINVAL for compatibility. 1538 */ 1539 return ((flags & MNT_BYFSID) ? ENOENT : EINVAL); 1540 } 1541 1542 /* 1543 * Don't allow unmounting the root filesystem. 1544 */ 1545 if (mp->mnt_flag & MNT_ROOTFS) { 1546 vfs_rel(mp); 1547 return (EINVAL); 1548 } 1549 error = dounmount(mp, flags, td); 1550 return (error); 1551 } 1552 1553 /* 1554 * Return error if any of the vnodes, ignoring the root vnode 1555 * and the syncer vnode, have non-zero usecount. 1556 * 1557 * This function is purely advisory - it can return false positives 1558 * and negatives. 1559 */ 1560 static int 1561 vfs_check_usecounts(struct mount *mp) 1562 { 1563 struct vnode *vp, *mvp; 1564 1565 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1566 if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON && 1567 vp->v_usecount != 0) { 1568 VI_UNLOCK(vp); 1569 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1570 return (EBUSY); 1571 } 1572 VI_UNLOCK(vp); 1573 } 1574 1575 return (0); 1576 } 1577 1578 static void 1579 dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags) 1580 { 1581 1582 mtx_assert(MNT_MTX(mp), MA_OWNED); 1583 mp->mnt_kern_flag &= ~mntkflags; 1584 if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) { 1585 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1586 wakeup(mp); 1587 } 1588 vfs_op_exit_locked(mp); 1589 MNT_IUNLOCK(mp); 1590 if (coveredvp != NULL) { 1591 VOP_UNLOCK(coveredvp); 1592 vdrop(coveredvp); 1593 } 1594 vn_finished_write(mp); 1595 } 1596 1597 /* 1598 * There are various reference counters associated with the mount point. 1599 * Normally it is permitted to modify them without taking the mnt ilock, 1600 * but this behavior can be temporarily disabled if stable value is needed 1601 * or callers are expected to block (e.g. to not allow new users during 1602 * forced unmount). 1603 */ 1604 void 1605 vfs_op_enter(struct mount *mp) 1606 { 1607 struct mount_pcpu *mpcpu; 1608 int cpu; 1609 1610 MNT_ILOCK(mp); 1611 mp->mnt_vfs_ops++; 1612 if (mp->mnt_vfs_ops > 1) { 1613 MNT_IUNLOCK(mp); 1614 return; 1615 } 1616 vfs_op_barrier_wait(mp); 1617 CPU_FOREACH(cpu) { 1618 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1619 1620 mp->mnt_ref += mpcpu->mntp_ref; 1621 mpcpu->mntp_ref = 0; 1622 1623 mp->mnt_lockref += mpcpu->mntp_lockref; 1624 mpcpu->mntp_lockref = 0; 1625 1626 mp->mnt_writeopcount += mpcpu->mntp_writeopcount; 1627 mpcpu->mntp_writeopcount = 0; 1628 } 1629 if (mp->mnt_ref <= 0 || mp->mnt_lockref < 0 || mp->mnt_writeopcount < 0) 1630 panic("%s: invalid count(s) on mp %p: ref %d lockref %d writeopcount %d\n", 1631 __func__, mp, mp->mnt_ref, mp->mnt_lockref, mp->mnt_writeopcount); 1632 MNT_IUNLOCK(mp); 1633 vfs_assert_mount_counters(mp); 1634 } 1635 1636 void 1637 vfs_op_exit_locked(struct mount *mp) 1638 { 1639 1640 mtx_assert(MNT_MTX(mp), MA_OWNED); 1641 1642 if (mp->mnt_vfs_ops <= 0) 1643 panic("%s: invalid vfs_ops count %d for mp %p\n", 1644 __func__, mp->mnt_vfs_ops, mp); 1645 mp->mnt_vfs_ops--; 1646 } 1647 1648 void 1649 vfs_op_exit(struct mount *mp) 1650 { 1651 1652 MNT_ILOCK(mp); 1653 vfs_op_exit_locked(mp); 1654 MNT_IUNLOCK(mp); 1655 } 1656 1657 struct vfs_op_barrier_ipi { 1658 struct mount *mp; 1659 struct smp_rendezvous_cpus_retry_arg srcra; 1660 }; 1661 1662 static void 1663 vfs_op_action_func(void *arg) 1664 { 1665 struct vfs_op_barrier_ipi *vfsopipi; 1666 struct mount *mp; 1667 1668 vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra); 1669 mp = vfsopipi->mp; 1670 1671 if (!vfs_op_thread_entered(mp)) 1672 smp_rendezvous_cpus_done(arg); 1673 } 1674 1675 static void 1676 vfs_op_wait_func(void *arg, int cpu) 1677 { 1678 struct vfs_op_barrier_ipi *vfsopipi; 1679 struct mount *mp; 1680 struct mount_pcpu *mpcpu; 1681 1682 vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra); 1683 mp = vfsopipi->mp; 1684 1685 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1686 while (atomic_load_int(&mpcpu->mntp_thread_in_ops)) 1687 cpu_spinwait(); 1688 } 1689 1690 void 1691 vfs_op_barrier_wait(struct mount *mp) 1692 { 1693 struct vfs_op_barrier_ipi vfsopipi; 1694 1695 vfsopipi.mp = mp; 1696 1697 smp_rendezvous_cpus_retry(all_cpus, 1698 smp_no_rendezvous_barrier, 1699 vfs_op_action_func, 1700 smp_no_rendezvous_barrier, 1701 vfs_op_wait_func, 1702 &vfsopipi.srcra); 1703 } 1704 1705 #ifdef DIAGNOSTIC 1706 void 1707 vfs_assert_mount_counters(struct mount *mp) 1708 { 1709 struct mount_pcpu *mpcpu; 1710 int cpu; 1711 1712 if (mp->mnt_vfs_ops == 0) 1713 return; 1714 1715 CPU_FOREACH(cpu) { 1716 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1717 if (mpcpu->mntp_ref != 0 || 1718 mpcpu->mntp_lockref != 0 || 1719 mpcpu->mntp_writeopcount != 0) 1720 vfs_dump_mount_counters(mp); 1721 } 1722 } 1723 1724 void 1725 vfs_dump_mount_counters(struct mount *mp) 1726 { 1727 struct mount_pcpu *mpcpu; 1728 int ref, lockref, writeopcount; 1729 int cpu; 1730 1731 printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops); 1732 1733 printf(" ref : "); 1734 ref = mp->mnt_ref; 1735 CPU_FOREACH(cpu) { 1736 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1737 printf("%d ", mpcpu->mntp_ref); 1738 ref += mpcpu->mntp_ref; 1739 } 1740 printf("\n"); 1741 printf(" lockref : "); 1742 lockref = mp->mnt_lockref; 1743 CPU_FOREACH(cpu) { 1744 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1745 printf("%d ", mpcpu->mntp_lockref); 1746 lockref += mpcpu->mntp_lockref; 1747 } 1748 printf("\n"); 1749 printf("writeopcount: "); 1750 writeopcount = mp->mnt_writeopcount; 1751 CPU_FOREACH(cpu) { 1752 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1753 printf("%d ", mpcpu->mntp_writeopcount); 1754 writeopcount += mpcpu->mntp_writeopcount; 1755 } 1756 printf("\n"); 1757 1758 printf("counter struct total\n"); 1759 printf("ref %-5d %-5d\n", mp->mnt_ref, ref); 1760 printf("lockref %-5d %-5d\n", mp->mnt_lockref, lockref); 1761 printf("writeopcount %-5d %-5d\n", mp->mnt_writeopcount, writeopcount); 1762 1763 panic("invalid counts on struct mount"); 1764 } 1765 #endif 1766 1767 int 1768 vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which) 1769 { 1770 struct mount_pcpu *mpcpu; 1771 int cpu, sum; 1772 1773 switch (which) { 1774 case MNT_COUNT_REF: 1775 sum = mp->mnt_ref; 1776 break; 1777 case MNT_COUNT_LOCKREF: 1778 sum = mp->mnt_lockref; 1779 break; 1780 case MNT_COUNT_WRITEOPCOUNT: 1781 sum = mp->mnt_writeopcount; 1782 break; 1783 } 1784 1785 CPU_FOREACH(cpu) { 1786 mpcpu = vfs_mount_pcpu_remote(mp, cpu); 1787 switch (which) { 1788 case MNT_COUNT_REF: 1789 sum += mpcpu->mntp_ref; 1790 break; 1791 case MNT_COUNT_LOCKREF: 1792 sum += mpcpu->mntp_lockref; 1793 break; 1794 case MNT_COUNT_WRITEOPCOUNT: 1795 sum += mpcpu->mntp_writeopcount; 1796 break; 1797 } 1798 } 1799 return (sum); 1800 } 1801 1802 /* 1803 * Do the actual filesystem unmount. 1804 */ 1805 int 1806 dounmount(struct mount *mp, int flags, struct thread *td) 1807 { 1808 struct vnode *coveredvp, *rootvp; 1809 int error; 1810 uint64_t async_flag; 1811 int mnt_gen_r; 1812 1813 if ((coveredvp = mp->mnt_vnodecovered) != NULL) { 1814 mnt_gen_r = mp->mnt_gen; 1815 VI_LOCK(coveredvp); 1816 vholdl(coveredvp); 1817 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); 1818 /* 1819 * Check for mp being unmounted while waiting for the 1820 * covered vnode lock. 1821 */ 1822 if (coveredvp->v_mountedhere != mp || 1823 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { 1824 VOP_UNLOCK(coveredvp); 1825 vdrop(coveredvp); 1826 vfs_rel(mp); 1827 return (EBUSY); 1828 } 1829 } 1830 1831 /* 1832 * Only privileged root, or (if MNT_USER is set) the user that did the 1833 * original mount is permitted to unmount this filesystem. 1834 */ 1835 error = vfs_suser(mp, td); 1836 if (error != 0) { 1837 if (coveredvp != NULL) { 1838 VOP_UNLOCK(coveredvp); 1839 vdrop(coveredvp); 1840 } 1841 vfs_rel(mp); 1842 return (error); 1843 } 1844 1845 vfs_op_enter(mp); 1846 1847 vn_start_write(NULL, &mp, V_WAIT | V_MNTREF); 1848 MNT_ILOCK(mp); 1849 if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 || 1850 (mp->mnt_flag & MNT_UPDATE) != 0 || 1851 mp->mnt_pinned_count != 0) { 1852 dounmount_cleanup(mp, coveredvp, 0); 1853 return (EBUSY); 1854 } 1855 mp->mnt_kern_flag |= MNTK_UNMOUNT; 1856 rootvp = vfs_cache_root_clear(mp); 1857 if (coveredvp != NULL) 1858 vn_seqc_write_begin(coveredvp); 1859 if (flags & MNT_NONBUSY) { 1860 MNT_IUNLOCK(mp); 1861 error = vfs_check_usecounts(mp); 1862 MNT_ILOCK(mp); 1863 if (error != 0) { 1864 vn_seqc_write_end(coveredvp); 1865 dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); 1866 if (rootvp != NULL) { 1867 vn_seqc_write_end(rootvp); 1868 vrele(rootvp); 1869 } 1870 return (error); 1871 } 1872 } 1873 /* Allow filesystems to detect that a forced unmount is in progress. */ 1874 if (flags & MNT_FORCE) { 1875 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1876 MNT_IUNLOCK(mp); 1877 /* 1878 * Must be done after setting MNTK_UNMOUNTF and before 1879 * waiting for mnt_lockref to become 0. 1880 */ 1881 VFS_PURGE(mp); 1882 MNT_ILOCK(mp); 1883 } 1884 error = 0; 1885 if (mp->mnt_lockref) { 1886 mp->mnt_kern_flag |= MNTK_DRAINING; 1887 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, 1888 "mount drain", 0); 1889 } 1890 MNT_IUNLOCK(mp); 1891 KASSERT(mp->mnt_lockref == 0, 1892 ("%s: invalid lock refcount in the drain path @ %s:%d", 1893 __func__, __FILE__, __LINE__)); 1894 KASSERT(error == 0, 1895 ("%s: invalid return value for msleep in the drain path @ %s:%d", 1896 __func__, __FILE__, __LINE__)); 1897 1898 /* 1899 * We want to keep the vnode around so that we can vn_seqc_write_end 1900 * after we are done with unmount. Downgrade our reference to a mere 1901 * hold count so that we don't interefere with anything. 1902 */ 1903 if (rootvp != NULL) { 1904 vhold(rootvp); 1905 vrele(rootvp); 1906 } 1907 1908 if (mp->mnt_flag & MNT_EXPUBLIC) 1909 vfs_setpublicfs(NULL, NULL, NULL); 1910 1911 vfs_periodic(mp, MNT_WAIT); 1912 MNT_ILOCK(mp); 1913 async_flag = mp->mnt_flag & MNT_ASYNC; 1914 mp->mnt_flag &= ~MNT_ASYNC; 1915 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1916 MNT_IUNLOCK(mp); 1917 vfs_deallocate_syncvnode(mp); 1918 error = VFS_UNMOUNT(mp, flags); 1919 vn_finished_write(mp); 1920 /* 1921 * If we failed to flush the dirty blocks for this mount point, 1922 * undo all the cdir/rdir and rootvnode changes we made above. 1923 * Unless we failed to do so because the device is reporting that 1924 * it doesn't exist anymore. 1925 */ 1926 if (error && error != ENXIO) { 1927 MNT_ILOCK(mp); 1928 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1929 MNT_IUNLOCK(mp); 1930 vfs_allocate_syncvnode(mp); 1931 MNT_ILOCK(mp); 1932 } 1933 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1934 mp->mnt_flag |= async_flag; 1935 if ((mp->mnt_flag & MNT_ASYNC) != 0 && 1936 (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) 1937 mp->mnt_kern_flag |= MNTK_ASYNC; 1938 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1939 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1940 wakeup(mp); 1941 } 1942 vfs_op_exit_locked(mp); 1943 MNT_IUNLOCK(mp); 1944 if (coveredvp) { 1945 vn_seqc_write_end(coveredvp); 1946 VOP_UNLOCK(coveredvp); 1947 vdrop(coveredvp); 1948 } 1949 if (rootvp != NULL) { 1950 vn_seqc_write_end(rootvp); 1951 vdrop(rootvp); 1952 } 1953 return (error); 1954 } 1955 mtx_lock(&mountlist_mtx); 1956 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1957 mtx_unlock(&mountlist_mtx); 1958 EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td); 1959 if (coveredvp != NULL) { 1960 VI_LOCK(coveredvp); 1961 vn_irflag_unset_locked(coveredvp, VIRF_MOUNTPOINT); 1962 coveredvp->v_mountedhere = NULL; 1963 vn_seqc_write_end_locked(coveredvp); 1964 VI_UNLOCK(coveredvp); 1965 VOP_UNLOCK(coveredvp); 1966 vdrop(coveredvp); 1967 } 1968 mount_devctl_event("UNMOUNT", mp, false); 1969 if (rootvp != NULL) { 1970 vn_seqc_write_end(rootvp); 1971 vdrop(rootvp); 1972 } 1973 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1974 if (rootvnode != NULL && mp == rootvnode->v_mount) { 1975 vrele(rootvnode); 1976 rootvnode = NULL; 1977 } 1978 if (mp == rootdevmp) 1979 rootdevmp = NULL; 1980 vfs_mount_destroy(mp); 1981 return (0); 1982 } 1983 1984 /* 1985 * Report errors during filesystem mounting. 1986 */ 1987 void 1988 vfs_mount_error(struct mount *mp, const char *fmt, ...) 1989 { 1990 struct vfsoptlist *moptlist = mp->mnt_optnew; 1991 va_list ap; 1992 int error, len; 1993 char *errmsg; 1994 1995 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1996 if (error || errmsg == NULL || len <= 0) 1997 return; 1998 1999 va_start(ap, fmt); 2000 vsnprintf(errmsg, (size_t)len, fmt, ap); 2001 va_end(ap); 2002 } 2003 2004 void 2005 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...) 2006 { 2007 va_list ap; 2008 int error, len; 2009 char *errmsg; 2010 2011 error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len); 2012 if (error || errmsg == NULL || len <= 0) 2013 return; 2014 2015 va_start(ap, fmt); 2016 vsnprintf(errmsg, (size_t)len, fmt, ap); 2017 va_end(ap); 2018 } 2019 2020 /* 2021 * --------------------------------------------------------------------- 2022 * Functions for querying mount options/arguments from filesystems. 2023 */ 2024 2025 /* 2026 * Check that no unknown options are given 2027 */ 2028 int 2029 vfs_filteropt(struct vfsoptlist *opts, const char **legal) 2030 { 2031 struct vfsopt *opt; 2032 char errmsg[255]; 2033 const char **t, *p, *q; 2034 int ret = 0; 2035 2036 TAILQ_FOREACH(opt, opts, link) { 2037 p = opt->name; 2038 q = NULL; 2039 if (p[0] == 'n' && p[1] == 'o') 2040 q = p + 2; 2041 for(t = global_opts; *t != NULL; t++) { 2042 if (strcmp(*t, p) == 0) 2043 break; 2044 if (q != NULL) { 2045 if (strcmp(*t, q) == 0) 2046 break; 2047 } 2048 } 2049 if (*t != NULL) 2050 continue; 2051 for(t = legal; *t != NULL; t++) { 2052 if (strcmp(*t, p) == 0) 2053 break; 2054 if (q != NULL) { 2055 if (strcmp(*t, q) == 0) 2056 break; 2057 } 2058 } 2059 if (*t != NULL) 2060 continue; 2061 snprintf(errmsg, sizeof(errmsg), 2062 "mount option <%s> is unknown", p); 2063 ret = EINVAL; 2064 } 2065 if (ret != 0) { 2066 TAILQ_FOREACH(opt, opts, link) { 2067 if (strcmp(opt->name, "errmsg") == 0) { 2068 strncpy((char *)opt->value, errmsg, opt->len); 2069 break; 2070 } 2071 } 2072 if (opt == NULL) 2073 printf("%s\n", errmsg); 2074 } 2075 return (ret); 2076 } 2077 2078 /* 2079 * Get a mount option by its name. 2080 * 2081 * Return 0 if the option was found, ENOENT otherwise. 2082 * If len is non-NULL it will be filled with the length 2083 * of the option. If buf is non-NULL, it will be filled 2084 * with the address of the option. 2085 */ 2086 int 2087 vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len) 2088 { 2089 struct vfsopt *opt; 2090 2091 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 2092 2093 TAILQ_FOREACH(opt, opts, link) { 2094 if (strcmp(name, opt->name) == 0) { 2095 opt->seen = 1; 2096 if (len != NULL) 2097 *len = opt->len; 2098 if (buf != NULL) 2099 *buf = opt->value; 2100 return (0); 2101 } 2102 } 2103 return (ENOENT); 2104 } 2105 2106 int 2107 vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 2108 { 2109 struct vfsopt *opt; 2110 2111 if (opts == NULL) 2112 return (-1); 2113 2114 TAILQ_FOREACH(opt, opts, link) { 2115 if (strcmp(name, opt->name) == 0) { 2116 opt->seen = 1; 2117 return (opt->pos); 2118 } 2119 } 2120 return (-1); 2121 } 2122 2123 int 2124 vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value) 2125 { 2126 char *opt_value, *vtp; 2127 quad_t iv; 2128 int error, opt_len; 2129 2130 error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len); 2131 if (error != 0) 2132 return (error); 2133 if (opt_len == 0 || opt_value == NULL) 2134 return (EINVAL); 2135 if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0') 2136 return (EINVAL); 2137 iv = strtoq(opt_value, &vtp, 0); 2138 if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0')) 2139 return (EINVAL); 2140 if (iv < 0) 2141 return (EINVAL); 2142 switch (vtp[0]) { 2143 case 't': case 'T': 2144 iv *= 1024; 2145 /* FALLTHROUGH */ 2146 case 'g': case 'G': 2147 iv *= 1024; 2148 /* FALLTHROUGH */ 2149 case 'm': case 'M': 2150 iv *= 1024; 2151 /* FALLTHROUGH */ 2152 case 'k': case 'K': 2153 iv *= 1024; 2154 case '\0': 2155 break; 2156 default: 2157 return (EINVAL); 2158 } 2159 *value = iv; 2160 2161 return (0); 2162 } 2163 2164 char * 2165 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 2166 { 2167 struct vfsopt *opt; 2168 2169 *error = 0; 2170 TAILQ_FOREACH(opt, opts, link) { 2171 if (strcmp(name, opt->name) != 0) 2172 continue; 2173 opt->seen = 1; 2174 if (opt->len == 0 || 2175 ((char *)opt->value)[opt->len - 1] != '\0') { 2176 *error = EINVAL; 2177 return (NULL); 2178 } 2179 return (opt->value); 2180 } 2181 *error = ENOENT; 2182 return (NULL); 2183 } 2184 2185 int 2186 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, 2187 uint64_t val) 2188 { 2189 struct vfsopt *opt; 2190 2191 TAILQ_FOREACH(opt, opts, link) { 2192 if (strcmp(name, opt->name) == 0) { 2193 opt->seen = 1; 2194 if (w != NULL) 2195 *w |= val; 2196 return (1); 2197 } 2198 } 2199 if (w != NULL) 2200 *w &= ~val; 2201 return (0); 2202 } 2203 2204 int 2205 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 2206 { 2207 va_list ap; 2208 struct vfsopt *opt; 2209 int ret; 2210 2211 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 2212 2213 TAILQ_FOREACH(opt, opts, link) { 2214 if (strcmp(name, opt->name) != 0) 2215 continue; 2216 opt->seen = 1; 2217 if (opt->len == 0 || opt->value == NULL) 2218 return (0); 2219 if (((char *)opt->value)[opt->len - 1] != '\0') 2220 return (0); 2221 va_start(ap, fmt); 2222 ret = vsscanf(opt->value, fmt, ap); 2223 va_end(ap); 2224 return (ret); 2225 } 2226 return (0); 2227 } 2228 2229 int 2230 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len) 2231 { 2232 struct vfsopt *opt; 2233 2234 TAILQ_FOREACH(opt, opts, link) { 2235 if (strcmp(name, opt->name) != 0) 2236 continue; 2237 opt->seen = 1; 2238 if (opt->value == NULL) 2239 opt->len = len; 2240 else { 2241 if (opt->len != len) 2242 return (EINVAL); 2243 bcopy(value, opt->value, len); 2244 } 2245 return (0); 2246 } 2247 return (ENOENT); 2248 } 2249 2250 int 2251 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len) 2252 { 2253 struct vfsopt *opt; 2254 2255 TAILQ_FOREACH(opt, opts, link) { 2256 if (strcmp(name, opt->name) != 0) 2257 continue; 2258 opt->seen = 1; 2259 if (opt->value == NULL) 2260 opt->len = len; 2261 else { 2262 if (opt->len < len) 2263 return (EINVAL); 2264 opt->len = len; 2265 bcopy(value, opt->value, len); 2266 } 2267 return (0); 2268 } 2269 return (ENOENT); 2270 } 2271 2272 int 2273 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value) 2274 { 2275 struct vfsopt *opt; 2276 2277 TAILQ_FOREACH(opt, opts, link) { 2278 if (strcmp(name, opt->name) != 0) 2279 continue; 2280 opt->seen = 1; 2281 if (opt->value == NULL) 2282 opt->len = strlen(value) + 1; 2283 else if (strlcpy(opt->value, value, opt->len) >= opt->len) 2284 return (EINVAL); 2285 return (0); 2286 } 2287 return (ENOENT); 2288 } 2289 2290 /* 2291 * Find and copy a mount option. 2292 * 2293 * The size of the buffer has to be specified 2294 * in len, if it is not the same length as the 2295 * mount option, EINVAL is returned. 2296 * Returns ENOENT if the option is not found. 2297 */ 2298 int 2299 vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len) 2300 { 2301 struct vfsopt *opt; 2302 2303 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 2304 2305 TAILQ_FOREACH(opt, opts, link) { 2306 if (strcmp(name, opt->name) == 0) { 2307 opt->seen = 1; 2308 if (len != opt->len) 2309 return (EINVAL); 2310 bcopy(opt->value, dest, opt->len); 2311 return (0); 2312 } 2313 } 2314 return (ENOENT); 2315 } 2316 2317 int 2318 __vfs_statfs(struct mount *mp, struct statfs *sbp) 2319 { 2320 2321 /* 2322 * Filesystems only fill in part of the structure for updates, we 2323 * have to read the entirety first to get all content. 2324 */ 2325 if (sbp != &mp->mnt_stat) 2326 memcpy(sbp, &mp->mnt_stat, sizeof(*sbp)); 2327 2328 /* 2329 * Set these in case the underlying filesystem fails to do so. 2330 */ 2331 sbp->f_version = STATFS_VERSION; 2332 sbp->f_namemax = NAME_MAX; 2333 sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 2334 2335 return (mp->mnt_op->vfs_statfs(mp, sbp)); 2336 } 2337 2338 void 2339 vfs_mountedfrom(struct mount *mp, const char *from) 2340 { 2341 2342 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 2343 strlcpy(mp->mnt_stat.f_mntfromname, from, 2344 sizeof mp->mnt_stat.f_mntfromname); 2345 } 2346 2347 /* 2348 * --------------------------------------------------------------------- 2349 * This is the api for building mount args and mounting filesystems from 2350 * inside the kernel. 2351 * 2352 * The API works by accumulation of individual args. First error is 2353 * latched. 2354 * 2355 * XXX: should be documented in new manpage kernel_mount(9) 2356 */ 2357 2358 /* A memory allocation which must be freed when we are done */ 2359 struct mntaarg { 2360 SLIST_ENTRY(mntaarg) next; 2361 }; 2362 2363 /* The header for the mount arguments */ 2364 struct mntarg { 2365 struct iovec *v; 2366 int len; 2367 int error; 2368 SLIST_HEAD(, mntaarg) list; 2369 }; 2370 2371 /* 2372 * Add a boolean argument. 2373 * 2374 * flag is the boolean value. 2375 * name must start with "no". 2376 */ 2377 struct mntarg * 2378 mount_argb(struct mntarg *ma, int flag, const char *name) 2379 { 2380 2381 KASSERT(name[0] == 'n' && name[1] == 'o', 2382 ("mount_argb(...,%s): name must start with 'no'", name)); 2383 2384 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 2385 } 2386 2387 /* 2388 * Add an argument printf style 2389 */ 2390 struct mntarg * 2391 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 2392 { 2393 va_list ap; 2394 struct mntaarg *maa; 2395 struct sbuf *sb; 2396 int len; 2397 2398 if (ma == NULL) { 2399 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2400 SLIST_INIT(&ma->list); 2401 } 2402 if (ma->error) 2403 return (ma); 2404 2405 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2406 M_MOUNT, M_WAITOK); 2407 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2408 ma->v[ma->len].iov_len = strlen(name) + 1; 2409 ma->len++; 2410 2411 sb = sbuf_new_auto(); 2412 va_start(ap, fmt); 2413 sbuf_vprintf(sb, fmt, ap); 2414 va_end(ap); 2415 sbuf_finish(sb); 2416 len = sbuf_len(sb) + 1; 2417 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2418 SLIST_INSERT_HEAD(&ma->list, maa, next); 2419 bcopy(sbuf_data(sb), maa + 1, len); 2420 sbuf_delete(sb); 2421 2422 ma->v[ma->len].iov_base = maa + 1; 2423 ma->v[ma->len].iov_len = len; 2424 ma->len++; 2425 2426 return (ma); 2427 } 2428 2429 /* 2430 * Add an argument which is a userland string. 2431 */ 2432 struct mntarg * 2433 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 2434 { 2435 struct mntaarg *maa; 2436 char *tbuf; 2437 2438 if (val == NULL) 2439 return (ma); 2440 if (ma == NULL) { 2441 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2442 SLIST_INIT(&ma->list); 2443 } 2444 if (ma->error) 2445 return (ma); 2446 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2447 SLIST_INSERT_HEAD(&ma->list, maa, next); 2448 tbuf = (void *)(maa + 1); 2449 ma->error = copyinstr(val, tbuf, len, NULL); 2450 return (mount_arg(ma, name, tbuf, -1)); 2451 } 2452 2453 /* 2454 * Plain argument. 2455 * 2456 * If length is -1, treat value as a C string. 2457 */ 2458 struct mntarg * 2459 mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 2460 { 2461 2462 if (ma == NULL) { 2463 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2464 SLIST_INIT(&ma->list); 2465 } 2466 if (ma->error) 2467 return (ma); 2468 2469 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2470 M_MOUNT, M_WAITOK); 2471 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2472 ma->v[ma->len].iov_len = strlen(name) + 1; 2473 ma->len++; 2474 2475 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 2476 if (len < 0) 2477 ma->v[ma->len].iov_len = strlen(val) + 1; 2478 else 2479 ma->v[ma->len].iov_len = len; 2480 ma->len++; 2481 return (ma); 2482 } 2483 2484 /* 2485 * Free a mntarg structure 2486 */ 2487 static void 2488 free_mntarg(struct mntarg *ma) 2489 { 2490 struct mntaarg *maa; 2491 2492 while (!SLIST_EMPTY(&ma->list)) { 2493 maa = SLIST_FIRST(&ma->list); 2494 SLIST_REMOVE_HEAD(&ma->list, next); 2495 free(maa, M_MOUNT); 2496 } 2497 free(ma->v, M_MOUNT); 2498 free(ma, M_MOUNT); 2499 } 2500 2501 /* 2502 * Mount a filesystem 2503 */ 2504 int 2505 kernel_mount(struct mntarg *ma, uint64_t flags) 2506 { 2507 struct uio auio; 2508 int error; 2509 2510 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 2511 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 2512 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 2513 2514 auio.uio_iov = ma->v; 2515 auio.uio_iovcnt = ma->len; 2516 auio.uio_segflg = UIO_SYSSPACE; 2517 2518 error = ma->error; 2519 if (!error) 2520 error = vfs_donmount(curthread, flags, &auio); 2521 free_mntarg(ma); 2522 return (error); 2523 } 2524 2525 /* 2526 * A printflike function to mount a filesystem. 2527 */ 2528 int 2529 kernel_vmount(int flags, ...) 2530 { 2531 struct mntarg *ma = NULL; 2532 va_list ap; 2533 const char *cp; 2534 const void *vp; 2535 int error; 2536 2537 va_start(ap, flags); 2538 for (;;) { 2539 cp = va_arg(ap, const char *); 2540 if (cp == NULL) 2541 break; 2542 vp = va_arg(ap, const void *); 2543 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); 2544 } 2545 va_end(ap); 2546 2547 error = kernel_mount(ma, flags); 2548 return (error); 2549 } 2550 2551 /* Map from mount options to printable formats. */ 2552 static struct mntoptnames optnames[] = { 2553 MNTOPT_NAMES 2554 }; 2555 2556 static void 2557 mount_devctl_event_mntopt(struct sbuf *sb, const char *what, struct vfsoptlist *opts) 2558 { 2559 struct vfsopt *opt; 2560 2561 if (opts == NULL || TAILQ_EMPTY(opts)) 2562 return; 2563 sbuf_printf(sb, " %s=\"", what); 2564 TAILQ_FOREACH(opt, opts, link) { 2565 if (opt->name[0] == '\0' || (opt->len > 0 && *(char *)opt->value == '\0')) 2566 continue; 2567 devctl_safe_quote_sb(sb, opt->name); 2568 if (opt->len > 0) { 2569 sbuf_putc(sb, '='); 2570 devctl_safe_quote_sb(sb, opt->value); 2571 } 2572 sbuf_putc(sb, ';'); 2573 } 2574 sbuf_putc(sb, '"'); 2575 } 2576 2577 #define DEVCTL_LEN 1024 2578 static void 2579 mount_devctl_event(const char *type, struct mount *mp, bool donew) 2580 { 2581 const uint8_t *cp; 2582 struct mntoptnames *fp; 2583 struct sbuf sb; 2584 struct statfs *sfp = &mp->mnt_stat; 2585 char *buf; 2586 2587 buf = malloc(DEVCTL_LEN, M_MOUNT, M_NOWAIT); 2588 if (buf == NULL) 2589 return; 2590 sbuf_new(&sb, buf, DEVCTL_LEN, SBUF_FIXEDLEN); 2591 sbuf_cpy(&sb, "mount-point=\""); 2592 devctl_safe_quote_sb(&sb, sfp->f_mntonname); 2593 sbuf_cat(&sb, "\" mount-dev=\""); 2594 devctl_safe_quote_sb(&sb, sfp->f_mntfromname); 2595 sbuf_cat(&sb, "\" mount-type=\""); 2596 devctl_safe_quote_sb(&sb, sfp->f_fstypename); 2597 sbuf_cat(&sb, "\" fsid=0x"); 2598 cp = (const uint8_t *)&sfp->f_fsid.val[0]; 2599 for (int i = 0; i < sizeof(sfp->f_fsid); i++) 2600 sbuf_printf(&sb, "%02x", cp[i]); 2601 sbuf_printf(&sb, " owner=%u flags=\"", sfp->f_owner); 2602 for (fp = optnames; fp->o_opt != 0; fp++) { 2603 if ((mp->mnt_flag & fp->o_opt) != 0) { 2604 sbuf_cat(&sb, fp->o_name); 2605 sbuf_putc(&sb, ';'); 2606 } 2607 } 2608 sbuf_putc(&sb, '"'); 2609 mount_devctl_event_mntopt(&sb, "opt", mp->mnt_opt); 2610 if (donew) 2611 mount_devctl_event_mntopt(&sb, "optnew", mp->mnt_optnew); 2612 sbuf_finish(&sb); 2613 2614 if (sbuf_error(&sb) == 0) 2615 devctl_notify("VFS", "FS", type, sbuf_data(&sb)); 2616 sbuf_delete(&sb); 2617 free(buf, M_MOUNT); 2618 } 2619 2620 /* 2621 * Suspend write operations on all local writeable filesystems. Does 2622 * full sync of them in the process. 2623 * 2624 * Iterate over the mount points in reverse order, suspending most 2625 * recently mounted filesystems first. It handles a case where a 2626 * filesystem mounted from a md(4) vnode-backed device should be 2627 * suspended before the filesystem that owns the vnode. 2628 */ 2629 void 2630 suspend_all_fs(void) 2631 { 2632 struct mount *mp; 2633 int error; 2634 2635 mtx_lock(&mountlist_mtx); 2636 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 2637 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT); 2638 if (error != 0) 2639 continue; 2640 if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL || 2641 (mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 2642 mtx_lock(&mountlist_mtx); 2643 vfs_unbusy(mp); 2644 continue; 2645 } 2646 error = vfs_write_suspend(mp, 0); 2647 if (error == 0) { 2648 MNT_ILOCK(mp); 2649 MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0); 2650 mp->mnt_kern_flag |= MNTK_SUSPEND_ALL; 2651 MNT_IUNLOCK(mp); 2652 mtx_lock(&mountlist_mtx); 2653 } else { 2654 printf("suspend of %s failed, error %d\n", 2655 mp->mnt_stat.f_mntonname, error); 2656 mtx_lock(&mountlist_mtx); 2657 vfs_unbusy(mp); 2658 } 2659 } 2660 mtx_unlock(&mountlist_mtx); 2661 } 2662 2663 void 2664 resume_all_fs(void) 2665 { 2666 struct mount *mp; 2667 2668 mtx_lock(&mountlist_mtx); 2669 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2670 if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0) 2671 continue; 2672 mtx_unlock(&mountlist_mtx); 2673 MNT_ILOCK(mp); 2674 MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0); 2675 mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL; 2676 MNT_IUNLOCK(mp); 2677 vfs_write_resume(mp, 0); 2678 mtx_lock(&mountlist_mtx); 2679 vfs_unbusy(mp); 2680 } 2681 mtx_unlock(&mountlist_mtx); 2682 } 2683