1 /*- 2 * Copyright (c) 1999-2004 Poul-Henning Kamp 3 * Copyright (c) 1999 Michael Smith 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_vfs_allow_nonmpsafe.h" 41 42 #include <sys/param.h> 43 #include <sys/conf.h> 44 #include <sys/fcntl.h> 45 #include <sys/jail.h> 46 #include <sys/kernel.h> 47 #include <sys/libkern.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/filedesc.h> 55 #include <sys/reboot.h> 56 #include <sys/sbuf.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysproto.h> 59 #include <sys/sx.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/systm.h> 63 #include <sys/vnode.h> 64 #include <vm/uma.h> 65 66 #include <geom/geom.h> 67 68 #include <machine/stdarg.h> 69 70 #include <security/audit/audit.h> 71 #include <security/mac/mac_framework.h> 72 73 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 74 75 static int vfs_domount(struct thread *td, const char *fstype, 76 char *fspath, int fsflags, struct vfsoptlist **optlist); 77 static void free_mntarg(struct mntarg *ma); 78 79 static int usermount = 0; 80 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 81 "Unprivileged users may mount and unmount file systems"); 82 83 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 84 static MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); 85 static uma_zone_t mount_zone; 86 87 /* List of mounted filesystems. */ 88 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 89 90 /* For any iteration/modification of mountlist */ 91 struct mtx mountlist_mtx; 92 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 93 94 /* 95 * Global opts, taken by all filesystems 96 */ 97 static const char *global_opts[] = { 98 "errmsg", 99 "fstype", 100 "fspath", 101 "ro", 102 "rw", 103 "nosuid", 104 "noexec", 105 NULL 106 }; 107 108 static int 109 mount_init(void *mem, int size, int flags) 110 { 111 struct mount *mp; 112 113 mp = (struct mount *)mem; 114 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 115 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); 116 return (0); 117 } 118 119 static void 120 mount_fini(void *mem, int size) 121 { 122 struct mount *mp; 123 124 mp = (struct mount *)mem; 125 lockdestroy(&mp->mnt_explock); 126 mtx_destroy(&mp->mnt_mtx); 127 } 128 129 static void 130 vfs_mount_init(void *dummy __unused) 131 { 132 133 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL, 134 NULL, mount_init, mount_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 135 } 136 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL); 137 138 /* 139 * --------------------------------------------------------------------- 140 * Functions for building and sanitizing the mount options 141 */ 142 143 /* Remove one mount option. */ 144 static void 145 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 146 { 147 148 TAILQ_REMOVE(opts, opt, link); 149 free(opt->name, M_MOUNT); 150 if (opt->value != NULL) 151 free(opt->value, M_MOUNT); 152 free(opt, M_MOUNT); 153 } 154 155 /* Release all resources related to the mount options. */ 156 void 157 vfs_freeopts(struct vfsoptlist *opts) 158 { 159 struct vfsopt *opt; 160 161 while (!TAILQ_EMPTY(opts)) { 162 opt = TAILQ_FIRST(opts); 163 vfs_freeopt(opts, opt); 164 } 165 free(opts, M_MOUNT); 166 } 167 168 void 169 vfs_deleteopt(struct vfsoptlist *opts, const char *name) 170 { 171 struct vfsopt *opt, *temp; 172 173 if (opts == NULL) 174 return; 175 TAILQ_FOREACH_SAFE(opt, opts, link, temp) { 176 if (strcmp(opt->name, name) == 0) 177 vfs_freeopt(opts, opt); 178 } 179 } 180 181 static int 182 vfs_isopt_ro(const char *opt) 183 { 184 185 if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 || 186 strcmp(opt, "norw") == 0) 187 return (1); 188 return (0); 189 } 190 191 static int 192 vfs_isopt_rw(const char *opt) 193 { 194 195 if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0) 196 return (1); 197 return (0); 198 } 199 200 /* 201 * Check if options are equal (with or without the "no" prefix). 202 */ 203 static int 204 vfs_equalopts(const char *opt1, const char *opt2) 205 { 206 char *p; 207 208 /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 209 if (strcmp(opt1, opt2) == 0) 210 return (1); 211 /* "noopt" vs. "opt" */ 212 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 213 return (1); 214 /* "opt" vs. "noopt" */ 215 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 216 return (1); 217 while ((p = strchr(opt1, '.')) != NULL && 218 !strncmp(opt1, opt2, ++p - opt1)) { 219 opt2 += p - opt1; 220 opt1 = p; 221 /* "foo.noopt" vs. "foo.opt" */ 222 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 223 return (1); 224 /* "foo.opt" vs. "foo.noopt" */ 225 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 226 return (1); 227 } 228 /* "ro" / "rdonly" / "norw" / "rw" / "noro" */ 229 if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) && 230 (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2))) 231 return (1); 232 return (0); 233 } 234 235 /* 236 * If a mount option is specified several times, 237 * (with or without the "no" prefix) only keep 238 * the last occurence of it. 239 */ 240 static void 241 vfs_sanitizeopts(struct vfsoptlist *opts) 242 { 243 struct vfsopt *opt, *opt2, *tmp; 244 245 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 246 opt2 = TAILQ_PREV(opt, vfsoptlist, link); 247 while (opt2 != NULL) { 248 if (vfs_equalopts(opt->name, opt2->name)) { 249 tmp = TAILQ_PREV(opt2, vfsoptlist, link); 250 vfs_freeopt(opts, opt2); 251 opt2 = tmp; 252 } else { 253 opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 254 } 255 } 256 } 257 } 258 259 /* 260 * Build a linked list of mount options from a struct uio. 261 */ 262 int 263 vfs_buildopts(struct uio *auio, struct vfsoptlist **options) 264 { 265 struct vfsoptlist *opts; 266 struct vfsopt *opt; 267 size_t memused, namelen, optlen; 268 unsigned int i, iovcnt; 269 int error; 270 271 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 272 TAILQ_INIT(opts); 273 memused = 0; 274 iovcnt = auio->uio_iovcnt; 275 for (i = 0; i < iovcnt; i += 2) { 276 namelen = auio->uio_iov[i].iov_len; 277 optlen = auio->uio_iov[i + 1].iov_len; 278 memused += sizeof(struct vfsopt) + optlen + namelen; 279 /* 280 * Avoid consuming too much memory, and attempts to overflow 281 * memused. 282 */ 283 if (memused > VFS_MOUNTARG_SIZE_MAX || 284 optlen > VFS_MOUNTARG_SIZE_MAX || 285 namelen > VFS_MOUNTARG_SIZE_MAX) { 286 error = EINVAL; 287 goto bad; 288 } 289 290 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 291 opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 292 opt->value = NULL; 293 opt->len = 0; 294 opt->pos = i / 2; 295 opt->seen = 0; 296 297 /* 298 * Do this early, so jumps to "bad" will free the current 299 * option. 300 */ 301 TAILQ_INSERT_TAIL(opts, opt, link); 302 303 if (auio->uio_segflg == UIO_SYSSPACE) { 304 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 305 } else { 306 error = copyin(auio->uio_iov[i].iov_base, opt->name, 307 namelen); 308 if (error) 309 goto bad; 310 } 311 /* Ensure names are null-terminated strings. */ 312 if (namelen == 0 || opt->name[namelen - 1] != '\0') { 313 error = EINVAL; 314 goto bad; 315 } 316 if (optlen != 0) { 317 opt->len = optlen; 318 opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 319 if (auio->uio_segflg == UIO_SYSSPACE) { 320 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 321 optlen); 322 } else { 323 error = copyin(auio->uio_iov[i + 1].iov_base, 324 opt->value, optlen); 325 if (error) 326 goto bad; 327 } 328 } 329 } 330 vfs_sanitizeopts(opts); 331 *options = opts; 332 return (0); 333 bad: 334 vfs_freeopts(opts); 335 return (error); 336 } 337 338 /* 339 * Merge the old mount options with the new ones passed 340 * in the MNT_UPDATE case. 341 * 342 * XXX: This function will keep a "nofoo" option in the new 343 * options. E.g, if the option's canonical name is "foo", 344 * "nofoo" ends up in the mount point's active options. 345 */ 346 static void 347 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts) 348 { 349 struct vfsopt *opt, *new; 350 351 TAILQ_FOREACH(opt, oldopts, link) { 352 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 353 new->name = strdup(opt->name, M_MOUNT); 354 if (opt->len != 0) { 355 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 356 bcopy(opt->value, new->value, opt->len); 357 } else 358 new->value = NULL; 359 new->len = opt->len; 360 new->seen = opt->seen; 361 TAILQ_INSERT_HEAD(toopts, new, link); 362 } 363 vfs_sanitizeopts(toopts); 364 } 365 366 /* 367 * Mount a filesystem. 368 */ 369 int 370 sys_nmount(td, uap) 371 struct thread *td; 372 struct nmount_args /* { 373 struct iovec *iovp; 374 unsigned int iovcnt; 375 int flags; 376 } */ *uap; 377 { 378 struct uio *auio; 379 int error; 380 u_int iovcnt; 381 382 AUDIT_ARG_FFLAGS(uap->flags); 383 CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__, 384 uap->iovp, uap->iovcnt, uap->flags); 385 386 /* 387 * Filter out MNT_ROOTFS. We do not want clients of nmount() in 388 * userspace to set this flag, but we must filter it out if we want 389 * MNT_UPDATE on the root file system to work. 390 * MNT_ROOTFS should only be set by the kernel when mounting its 391 * root file system. 392 */ 393 uap->flags &= ~MNT_ROOTFS; 394 395 iovcnt = uap->iovcnt; 396 /* 397 * Check that we have an even number of iovec's 398 * and that we have at least two options. 399 */ 400 if ((iovcnt & 1) || (iovcnt < 4)) { 401 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__, 402 uap->iovcnt); 403 return (EINVAL); 404 } 405 406 error = copyinuio(uap->iovp, iovcnt, &auio); 407 if (error) { 408 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno", 409 __func__, error); 410 return (error); 411 } 412 error = vfs_donmount(td, uap->flags, auio); 413 414 free(auio, M_IOV); 415 return (error); 416 } 417 418 /* 419 * --------------------------------------------------------------------- 420 * Various utility functions 421 */ 422 423 void 424 vfs_ref(struct mount *mp) 425 { 426 427 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 428 MNT_ILOCK(mp); 429 MNT_REF(mp); 430 MNT_IUNLOCK(mp); 431 } 432 433 void 434 vfs_rel(struct mount *mp) 435 { 436 437 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 438 MNT_ILOCK(mp); 439 MNT_REL(mp); 440 MNT_IUNLOCK(mp); 441 } 442 443 /* 444 * Allocate and initialize the mount point struct. 445 */ 446 struct mount * 447 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, 448 struct ucred *cred) 449 { 450 struct mount *mp; 451 452 mp = uma_zalloc(mount_zone, M_WAITOK); 453 bzero(&mp->mnt_startzero, 454 __rangeof(struct mount, mnt_startzero, mnt_endzero)); 455 TAILQ_INIT(&mp->mnt_nvnodelist); 456 mp->mnt_nvnodelistsize = 0; 457 mp->mnt_ref = 0; 458 (void) vfs_busy(mp, MBF_NOWAIT); 459 mp->mnt_op = vfsp->vfc_vfsops; 460 mp->mnt_vfc = vfsp; 461 vfsp->vfc_refcount++; /* XXX Unlocked */ 462 mp->mnt_stat.f_type = vfsp->vfc_typenum; 463 mp->mnt_gen++; 464 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 465 mp->mnt_vnodecovered = vp; 466 mp->mnt_cred = crdup(cred); 467 mp->mnt_stat.f_owner = cred->cr_uid; 468 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 469 mp->mnt_iosize_max = DFLTPHYS; 470 #ifdef MAC 471 mac_mount_init(mp); 472 mac_mount_create(cred, mp); 473 #endif 474 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 475 return (mp); 476 } 477 478 /* 479 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 480 */ 481 void 482 vfs_mount_destroy(struct mount *mp) 483 { 484 485 MNT_ILOCK(mp); 486 mp->mnt_kern_flag |= MNTK_REFEXPIRE; 487 if (mp->mnt_kern_flag & MNTK_MWAIT) { 488 mp->mnt_kern_flag &= ~MNTK_MWAIT; 489 wakeup(mp); 490 } 491 while (mp->mnt_ref) 492 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); 493 KASSERT(mp->mnt_ref == 0, 494 ("%s: invalid refcount in the drain path @ %s:%d", __func__, 495 __FILE__, __LINE__)); 496 if (mp->mnt_writeopcount != 0) 497 panic("vfs_mount_destroy: nonzero writeopcount"); 498 if (mp->mnt_secondary_writes != 0) 499 panic("vfs_mount_destroy: nonzero secondary_writes"); 500 mp->mnt_vfc->vfc_refcount--; 501 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { 502 struct vnode *vp; 503 504 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) 505 vprint("", vp); 506 panic("unmount: dangling vnode"); 507 } 508 if (mp->mnt_nvnodelistsize != 0) 509 panic("vfs_mount_destroy: nonzero nvnodelistsize"); 510 if (mp->mnt_lockref != 0) 511 panic("vfs_mount_destroy: nonzero lock refcount"); 512 MNT_IUNLOCK(mp); 513 #ifdef MAC 514 mac_mount_destroy(mp); 515 #endif 516 if (mp->mnt_opt != NULL) 517 vfs_freeopts(mp->mnt_opt); 518 crfree(mp->mnt_cred); 519 uma_zfree(mount_zone, mp); 520 } 521 522 int 523 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 524 { 525 struct vfsoptlist *optlist; 526 struct vfsopt *opt, *tmp_opt; 527 char *fstype, *fspath, *errmsg; 528 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 529 530 errmsg = fspath = NULL; 531 errmsg_len = fspathlen = 0; 532 errmsg_pos = -1; 533 534 error = vfs_buildopts(fsoptions, &optlist); 535 if (error) 536 return (error); 537 538 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 539 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 540 541 /* 542 * We need these two options before the others, 543 * and they are mandatory for any filesystem. 544 * Ensure they are NUL terminated as well. 545 */ 546 fstypelen = 0; 547 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 548 if (error || fstype[fstypelen - 1] != '\0') { 549 error = EINVAL; 550 if (errmsg != NULL) 551 strncpy(errmsg, "Invalid fstype", errmsg_len); 552 goto bail; 553 } 554 fspathlen = 0; 555 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 556 if (error || fspath[fspathlen - 1] != '\0') { 557 error = EINVAL; 558 if (errmsg != NULL) 559 strncpy(errmsg, "Invalid fspath", errmsg_len); 560 goto bail; 561 } 562 563 /* 564 * We need to see if we have the "update" option 565 * before we call vfs_domount(), since vfs_domount() has special 566 * logic based on MNT_UPDATE. This is very important 567 * when we want to update the root filesystem. 568 */ 569 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { 570 if (strcmp(opt->name, "update") == 0) { 571 fsflags |= MNT_UPDATE; 572 vfs_freeopt(optlist, opt); 573 } 574 else if (strcmp(opt->name, "async") == 0) 575 fsflags |= MNT_ASYNC; 576 else if (strcmp(opt->name, "force") == 0) { 577 fsflags |= MNT_FORCE; 578 vfs_freeopt(optlist, opt); 579 } 580 else if (strcmp(opt->name, "reload") == 0) { 581 fsflags |= MNT_RELOAD; 582 vfs_freeopt(optlist, opt); 583 } 584 else if (strcmp(opt->name, "multilabel") == 0) 585 fsflags |= MNT_MULTILABEL; 586 else if (strcmp(opt->name, "noasync") == 0) 587 fsflags &= ~MNT_ASYNC; 588 else if (strcmp(opt->name, "noatime") == 0) 589 fsflags |= MNT_NOATIME; 590 else if (strcmp(opt->name, "atime") == 0) { 591 free(opt->name, M_MOUNT); 592 opt->name = strdup("nonoatime", M_MOUNT); 593 } 594 else if (strcmp(opt->name, "noclusterr") == 0) 595 fsflags |= MNT_NOCLUSTERR; 596 else if (strcmp(opt->name, "clusterr") == 0) { 597 free(opt->name, M_MOUNT); 598 opt->name = strdup("nonoclusterr", M_MOUNT); 599 } 600 else if (strcmp(opt->name, "noclusterw") == 0) 601 fsflags |= MNT_NOCLUSTERW; 602 else if (strcmp(opt->name, "clusterw") == 0) { 603 free(opt->name, M_MOUNT); 604 opt->name = strdup("nonoclusterw", M_MOUNT); 605 } 606 else if (strcmp(opt->name, "noexec") == 0) 607 fsflags |= MNT_NOEXEC; 608 else if (strcmp(opt->name, "exec") == 0) { 609 free(opt->name, M_MOUNT); 610 opt->name = strdup("nonoexec", M_MOUNT); 611 } 612 else if (strcmp(opt->name, "nosuid") == 0) 613 fsflags |= MNT_NOSUID; 614 else if (strcmp(opt->name, "suid") == 0) { 615 free(opt->name, M_MOUNT); 616 opt->name = strdup("nonosuid", M_MOUNT); 617 } 618 else if (strcmp(opt->name, "nosymfollow") == 0) 619 fsflags |= MNT_NOSYMFOLLOW; 620 else if (strcmp(opt->name, "symfollow") == 0) { 621 free(opt->name, M_MOUNT); 622 opt->name = strdup("nonosymfollow", M_MOUNT); 623 } 624 else if (strcmp(opt->name, "noro") == 0) 625 fsflags &= ~MNT_RDONLY; 626 else if (strcmp(opt->name, "rw") == 0) 627 fsflags &= ~MNT_RDONLY; 628 else if (strcmp(opt->name, "ro") == 0) 629 fsflags |= MNT_RDONLY; 630 else if (strcmp(opt->name, "rdonly") == 0) { 631 free(opt->name, M_MOUNT); 632 opt->name = strdup("ro", M_MOUNT); 633 fsflags |= MNT_RDONLY; 634 } 635 else if (strcmp(opt->name, "suiddir") == 0) 636 fsflags |= MNT_SUIDDIR; 637 else if (strcmp(opt->name, "sync") == 0) 638 fsflags |= MNT_SYNCHRONOUS; 639 else if (strcmp(opt->name, "union") == 0) 640 fsflags |= MNT_UNION; 641 } 642 643 /* 644 * Be ultra-paranoid about making sure the type and fspath 645 * variables will fit in our mp buffers, including the 646 * terminating NUL. 647 */ 648 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 649 error = ENAMETOOLONG; 650 goto bail; 651 } 652 653 error = vfs_domount(td, fstype, fspath, fsflags, &optlist); 654 bail: 655 /* copyout the errmsg */ 656 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 657 && errmsg_len > 0 && errmsg != NULL) { 658 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 659 bcopy(errmsg, 660 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 661 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 662 } else { 663 copyout(errmsg, 664 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 665 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 666 } 667 } 668 669 if (optlist != NULL) 670 vfs_freeopts(optlist); 671 return (error); 672 } 673 674 /* 675 * Old mount API. 676 */ 677 #ifndef _SYS_SYSPROTO_H_ 678 struct mount_args { 679 char *type; 680 char *path; 681 int flags; 682 caddr_t data; 683 }; 684 #endif 685 /* ARGSUSED */ 686 int 687 sys_mount(td, uap) 688 struct thread *td; 689 struct mount_args /* { 690 char *type; 691 char *path; 692 int flags; 693 caddr_t data; 694 } */ *uap; 695 { 696 char *fstype; 697 struct vfsconf *vfsp = NULL; 698 struct mntarg *ma = NULL; 699 int error; 700 701 AUDIT_ARG_FFLAGS(uap->flags); 702 703 /* 704 * Filter out MNT_ROOTFS. We do not want clients of mount() in 705 * userspace to set this flag, but we must filter it out if we want 706 * MNT_UPDATE on the root file system to work. 707 * MNT_ROOTFS should only be set by the kernel when mounting its 708 * root file system. 709 */ 710 uap->flags &= ~MNT_ROOTFS; 711 712 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 713 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 714 if (error) { 715 free(fstype, M_TEMP); 716 return (error); 717 } 718 719 AUDIT_ARG_TEXT(fstype); 720 mtx_lock(&Giant); 721 vfsp = vfs_byname_kld(fstype, td, &error); 722 free(fstype, M_TEMP); 723 if (vfsp == NULL) { 724 mtx_unlock(&Giant); 725 return (ENOENT); 726 } 727 if (vfsp->vfc_vfsops->vfs_cmount == NULL) { 728 mtx_unlock(&Giant); 729 return (EOPNOTSUPP); 730 } 731 732 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 733 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 734 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 735 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 736 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 737 738 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags); 739 mtx_unlock(&Giant); 740 return (error); 741 } 742 743 /* 744 * vfs_domount_first(): first file system mount (not update) 745 */ 746 static int 747 vfs_domount_first( 748 struct thread *td, /* Calling thread. */ 749 struct vfsconf *vfsp, /* File system type. */ 750 char *fspath, /* Mount path. */ 751 struct vnode *vp, /* Vnode to be covered. */ 752 int fsflags, /* Flags common to all filesystems. */ 753 struct vfsoptlist **optlist /* Options local to the filesystem. */ 754 ) 755 { 756 struct vattr va; 757 struct mount *mp; 758 struct vnode *newdp; 759 int error; 760 761 mtx_assert(&Giant, MA_OWNED); 762 ASSERT_VOP_ELOCKED(vp, __func__); 763 KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here")); 764 765 /* 766 * If the user is not root, ensure that they own the directory 767 * onto which we are attempting to mount. 768 */ 769 error = VOP_GETATTR(vp, &va, td->td_ucred); 770 if (error == 0 && va.va_uid != td->td_ucred->cr_uid) 771 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 0); 772 if (error == 0) 773 error = vinvalbuf(vp, V_SAVE, 0, 0); 774 if (error == 0 && vp->v_type != VDIR) 775 error = ENOTDIR; 776 if (error == 0) { 777 VI_LOCK(vp); 778 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) 779 vp->v_iflag |= VI_MOUNT; 780 else 781 error = EBUSY; 782 VI_UNLOCK(vp); 783 } 784 if (error != 0) { 785 vput(vp); 786 return (error); 787 } 788 VOP_UNLOCK(vp, 0); 789 790 /* Allocate and initialize the filesystem. */ 791 mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); 792 /* XXXMAC: pass to vfs_mount_alloc? */ 793 mp->mnt_optnew = *optlist; 794 /* Set the mount level flags. */ 795 mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY)); 796 797 /* 798 * Mount the filesystem. 799 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 800 * get. No freeing of cn_pnbuf. 801 */ 802 error = VFS_MOUNT(mp); 803 #ifndef VFS_ALLOW_NONMPSAFE 804 if (error == 0 && VFS_NEEDSGIANT(mp)) { 805 (void)VFS_UNMOUNT(mp, fsflags); 806 error = ENXIO; 807 printf("%s: Mounting non-MPSAFE fs (%s) is disabled\n", 808 __func__, mp->mnt_vfc->vfc_name); 809 } 810 #endif 811 if (error != 0) { 812 vfs_unbusy(mp); 813 vfs_mount_destroy(mp); 814 VI_LOCK(vp); 815 vp->v_iflag &= ~VI_MOUNT; 816 VI_UNLOCK(vp); 817 vrele(vp); 818 return (error); 819 } 820 #ifdef VFS_ALLOW_NONMPSAFE 821 if (VFS_NEEDSGIANT(mp)) 822 printf("%s: Mounting non-MPSAFE fs (%s) is deprecated\n", 823 __func__, mp->mnt_vfc->vfc_name); 824 #endif 825 826 if (mp->mnt_opt != NULL) 827 vfs_freeopts(mp->mnt_opt); 828 mp->mnt_opt = mp->mnt_optnew; 829 *optlist = NULL; 830 (void)VFS_STATFS(mp, &mp->mnt_stat); 831 832 /* 833 * Prevent external consumers of mount options from reading mnt_optnew. 834 */ 835 mp->mnt_optnew = NULL; 836 837 MNT_ILOCK(mp); 838 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 839 mp->mnt_kern_flag |= MNTK_ASYNC; 840 else 841 mp->mnt_kern_flag &= ~MNTK_ASYNC; 842 MNT_IUNLOCK(mp); 843 844 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 845 cache_purge(vp); 846 VI_LOCK(vp); 847 vp->v_iflag &= ~VI_MOUNT; 848 VI_UNLOCK(vp); 849 vp->v_mountedhere = mp; 850 /* Place the new filesystem at the end of the mount list. */ 851 mtx_lock(&mountlist_mtx); 852 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 853 mtx_unlock(&mountlist_mtx); 854 vfs_event_signal(NULL, VQ_MOUNT, 0); 855 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) 856 panic("mount: lost mount"); 857 VOP_UNLOCK(newdp, 0); 858 VOP_UNLOCK(vp, 0); 859 mountcheckdirs(vp, newdp); 860 vrele(newdp); 861 if ((mp->mnt_flag & MNT_RDONLY) == 0) 862 vfs_allocate_syncvnode(mp); 863 vfs_unbusy(mp); 864 return (0); 865 } 866 867 /* 868 * vfs_domount_update(): update of mounted file system 869 */ 870 static int 871 vfs_domount_update( 872 struct thread *td, /* Calling thread. */ 873 struct vnode *vp, /* Mount point vnode. */ 874 int fsflags, /* Flags common to all filesystems. */ 875 struct vfsoptlist **optlist /* Options local to the filesystem. */ 876 ) 877 { 878 struct oexport_args oexport; 879 struct export_args export; 880 struct mount *mp; 881 int error, export_error, flag; 882 883 mtx_assert(&Giant, MA_OWNED); 884 ASSERT_VOP_ELOCKED(vp, __func__); 885 KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here")); 886 887 if ((vp->v_vflag & VV_ROOT) == 0) { 888 vput(vp); 889 return (EINVAL); 890 } 891 mp = vp->v_mount; 892 /* 893 * We only allow the filesystem to be reloaded if it 894 * is currently mounted read-only. 895 */ 896 flag = mp->mnt_flag; 897 if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) { 898 vput(vp); 899 return (EOPNOTSUPP); /* Needs translation */ 900 } 901 /* 902 * Only privileged root, or (if MNT_USER is set) the user that 903 * did the original mount is permitted to update it. 904 */ 905 error = vfs_suser(mp, td); 906 if (error != 0) { 907 vput(vp); 908 return (error); 909 } 910 if (vfs_busy(mp, MBF_NOWAIT)) { 911 vput(vp); 912 return (EBUSY); 913 } 914 VI_LOCK(vp); 915 if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { 916 VI_UNLOCK(vp); 917 vfs_unbusy(mp); 918 vput(vp); 919 return (EBUSY); 920 } 921 vp->v_iflag |= VI_MOUNT; 922 VI_UNLOCK(vp); 923 VOP_UNLOCK(vp, 0); 924 925 MNT_ILOCK(mp); 926 mp->mnt_flag &= ~MNT_UPDATEMASK; 927 mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | 928 MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); 929 if ((mp->mnt_flag & MNT_ASYNC) == 0) 930 mp->mnt_kern_flag &= ~MNTK_ASYNC; 931 MNT_IUNLOCK(mp); 932 mp->mnt_optnew = *optlist; 933 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 934 935 /* 936 * Mount the filesystem. 937 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 938 * get. No freeing of cn_pnbuf. 939 */ 940 error = VFS_MOUNT(mp); 941 942 export_error = 0; 943 if (error == 0) { 944 /* Process the export option. */ 945 if (vfs_copyopt(mp->mnt_optnew, "export", &export, 946 sizeof(export)) == 0) { 947 export_error = vfs_export(mp, &export); 948 } else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport, 949 sizeof(oexport)) == 0) { 950 export.ex_flags = oexport.ex_flags; 951 export.ex_root = oexport.ex_root; 952 export.ex_anon = oexport.ex_anon; 953 export.ex_addr = oexport.ex_addr; 954 export.ex_addrlen = oexport.ex_addrlen; 955 export.ex_mask = oexport.ex_mask; 956 export.ex_masklen = oexport.ex_masklen; 957 export.ex_indexfile = oexport.ex_indexfile; 958 export.ex_numsecflavors = 0; 959 export_error = vfs_export(mp, &export); 960 } 961 } 962 963 MNT_ILOCK(mp); 964 if (error == 0) { 965 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | 966 MNT_SNAPSHOT); 967 } else { 968 /* 969 * If we fail, restore old mount flags. MNT_QUOTA is special, 970 * because it is not part of MNT_UPDATEMASK, but it could have 971 * changed in the meantime if quotactl(2) was called. 972 * All in all we want current value of MNT_QUOTA, not the old 973 * one. 974 */ 975 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 976 } 977 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 978 mp->mnt_kern_flag |= MNTK_ASYNC; 979 else 980 mp->mnt_kern_flag &= ~MNTK_ASYNC; 981 MNT_IUNLOCK(mp); 982 983 if (error != 0) 984 goto end; 985 986 if (mp->mnt_opt != NULL) 987 vfs_freeopts(mp->mnt_opt); 988 mp->mnt_opt = mp->mnt_optnew; 989 *optlist = NULL; 990 (void)VFS_STATFS(mp, &mp->mnt_stat); 991 /* 992 * Prevent external consumers of mount options from reading 993 * mnt_optnew. 994 */ 995 mp->mnt_optnew = NULL; 996 997 if ((mp->mnt_flag & MNT_RDONLY) == 0) 998 vfs_allocate_syncvnode(mp); 999 else 1000 vfs_deallocate_syncvnode(mp); 1001 end: 1002 vfs_unbusy(mp); 1003 VI_LOCK(vp); 1004 vp->v_iflag &= ~VI_MOUNT; 1005 VI_UNLOCK(vp); 1006 vrele(vp); 1007 return (error != 0 ? error : export_error); 1008 } 1009 1010 /* 1011 * vfs_domount(): actually attempt a filesystem mount. 1012 */ 1013 static int 1014 vfs_domount( 1015 struct thread *td, /* Calling thread. */ 1016 const char *fstype, /* Filesystem type. */ 1017 char *fspath, /* Mount path. */ 1018 int fsflags, /* Flags common to all filesystems. */ 1019 struct vfsoptlist **optlist /* Options local to the filesystem. */ 1020 ) 1021 { 1022 struct vfsconf *vfsp; 1023 struct nameidata nd; 1024 struct vnode *vp; 1025 int error; 1026 1027 /* 1028 * Be ultra-paranoid about making sure the type and fspath 1029 * variables will fit in our mp buffers, including the 1030 * terminating NUL. 1031 */ 1032 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 1033 return (ENAMETOOLONG); 1034 1035 if (jailed(td->td_ucred) || usermount == 0) { 1036 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 1037 return (error); 1038 } 1039 1040 /* 1041 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 1042 */ 1043 if (fsflags & MNT_EXPORTED) { 1044 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); 1045 if (error) 1046 return (error); 1047 } 1048 if (fsflags & MNT_SUIDDIR) { 1049 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); 1050 if (error) 1051 return (error); 1052 } 1053 /* 1054 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. 1055 */ 1056 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { 1057 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) 1058 fsflags |= MNT_NOSUID | MNT_USER; 1059 } 1060 1061 /* Load KLDs before we lock the covered vnode to avoid reversals. */ 1062 vfsp = NULL; 1063 if ((fsflags & MNT_UPDATE) == 0) { 1064 /* Don't try to load KLDs if we're mounting the root. */ 1065 if (fsflags & MNT_ROOTFS) 1066 vfsp = vfs_byname(fstype); 1067 else 1068 vfsp = vfs_byname_kld(fstype, td, &error); 1069 if (vfsp == NULL) 1070 return (ENODEV); 1071 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) 1072 return (EPERM); 1073 } 1074 1075 /* 1076 * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE. 1077 */ 1078 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 1079 UIO_SYSSPACE, fspath, td); 1080 error = namei(&nd); 1081 if (error != 0) 1082 return (error); 1083 if (!NDHASGIANT(&nd)) 1084 mtx_lock(&Giant); 1085 NDFREE(&nd, NDF_ONLY_PNBUF); 1086 vp = nd.ni_vp; 1087 if ((fsflags & MNT_UPDATE) == 0) { 1088 error = vfs_domount_first(td, vfsp, fspath, vp, fsflags, 1089 optlist); 1090 } else { 1091 error = vfs_domount_update(td, vp, fsflags, optlist); 1092 } 1093 mtx_unlock(&Giant); 1094 1095 ASSERT_VI_UNLOCKED(vp, __func__); 1096 ASSERT_VOP_UNLOCKED(vp, __func__); 1097 1098 return (error); 1099 } 1100 1101 /* 1102 * Unmount a filesystem. 1103 * 1104 * Note: unmount takes a path to the vnode mounted on as argument, not 1105 * special file (as before). 1106 */ 1107 #ifndef _SYS_SYSPROTO_H_ 1108 struct unmount_args { 1109 char *path; 1110 int flags; 1111 }; 1112 #endif 1113 /* ARGSUSED */ 1114 int 1115 sys_unmount(td, uap) 1116 struct thread *td; 1117 register struct unmount_args /* { 1118 char *path; 1119 int flags; 1120 } */ *uap; 1121 { 1122 struct mount *mp; 1123 char *pathbuf; 1124 int error, id0, id1; 1125 1126 AUDIT_ARG_VALUE(uap->flags); 1127 if (jailed(td->td_ucred) || usermount == 0) { 1128 error = priv_check(td, PRIV_VFS_UNMOUNT); 1129 if (error) 1130 return (error); 1131 } 1132 1133 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1134 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 1135 if (error) { 1136 free(pathbuf, M_TEMP); 1137 return (error); 1138 } 1139 mtx_lock(&Giant); 1140 if (uap->flags & MNT_BYFSID) { 1141 AUDIT_ARG_TEXT(pathbuf); 1142 /* Decode the filesystem ID. */ 1143 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 1144 mtx_unlock(&Giant); 1145 free(pathbuf, M_TEMP); 1146 return (EINVAL); 1147 } 1148 1149 mtx_lock(&mountlist_mtx); 1150 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1151 if (mp->mnt_stat.f_fsid.val[0] == id0 && 1152 mp->mnt_stat.f_fsid.val[1] == id1) 1153 break; 1154 } 1155 mtx_unlock(&mountlist_mtx); 1156 } else { 1157 AUDIT_ARG_UPATH1(td, pathbuf); 1158 mtx_lock(&mountlist_mtx); 1159 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1160 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 1161 break; 1162 } 1163 mtx_unlock(&mountlist_mtx); 1164 } 1165 free(pathbuf, M_TEMP); 1166 if (mp == NULL) { 1167 /* 1168 * Previously we returned ENOENT for a nonexistent path and 1169 * EINVAL for a non-mountpoint. We cannot tell these apart 1170 * now, so in the !MNT_BYFSID case return the more likely 1171 * EINVAL for compatibility. 1172 */ 1173 mtx_unlock(&Giant); 1174 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 1175 } 1176 1177 /* 1178 * Don't allow unmounting the root filesystem. 1179 */ 1180 if (mp->mnt_flag & MNT_ROOTFS) { 1181 mtx_unlock(&Giant); 1182 return (EINVAL); 1183 } 1184 error = dounmount(mp, uap->flags, td); 1185 mtx_unlock(&Giant); 1186 return (error); 1187 } 1188 1189 /* 1190 * Do the actual filesystem unmount. 1191 */ 1192 int 1193 dounmount(mp, flags, td) 1194 struct mount *mp; 1195 int flags; 1196 struct thread *td; 1197 { 1198 struct vnode *coveredvp, *fsrootvp; 1199 int error; 1200 int async_flag; 1201 int mnt_gen_r; 1202 1203 mtx_assert(&Giant, MA_OWNED); 1204 1205 if ((coveredvp = mp->mnt_vnodecovered) != NULL) { 1206 mnt_gen_r = mp->mnt_gen; 1207 VI_LOCK(coveredvp); 1208 vholdl(coveredvp); 1209 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); 1210 vdrop(coveredvp); 1211 /* 1212 * Check for mp being unmounted while waiting for the 1213 * covered vnode lock. 1214 */ 1215 if (coveredvp->v_mountedhere != mp || 1216 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { 1217 VOP_UNLOCK(coveredvp, 0); 1218 return (EBUSY); 1219 } 1220 } 1221 /* 1222 * Only privileged root, or (if MNT_USER is set) the user that did the 1223 * original mount is permitted to unmount this filesystem. 1224 */ 1225 error = vfs_suser(mp, td); 1226 if (error) { 1227 if (coveredvp) 1228 VOP_UNLOCK(coveredvp, 0); 1229 return (error); 1230 } 1231 1232 MNT_ILOCK(mp); 1233 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1234 MNT_IUNLOCK(mp); 1235 if (coveredvp) 1236 VOP_UNLOCK(coveredvp, 0); 1237 return (EBUSY); 1238 } 1239 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; 1240 /* Allow filesystems to detect that a forced unmount is in progress. */ 1241 if (flags & MNT_FORCE) 1242 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1243 error = 0; 1244 if (mp->mnt_lockref) { 1245 mp->mnt_kern_flag |= MNTK_DRAINING; 1246 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, 1247 "mount drain", 0); 1248 } 1249 MNT_IUNLOCK(mp); 1250 KASSERT(mp->mnt_lockref == 0, 1251 ("%s: invalid lock refcount in the drain path @ %s:%d", 1252 __func__, __FILE__, __LINE__)); 1253 KASSERT(error == 0, 1254 ("%s: invalid return value for msleep in the drain path @ %s:%d", 1255 __func__, __FILE__, __LINE__)); 1256 vn_start_write(NULL, &mp, V_WAIT); 1257 1258 if (mp->mnt_flag & MNT_EXPUBLIC) 1259 vfs_setpublicfs(NULL, NULL, NULL); 1260 1261 vfs_msync(mp, MNT_WAIT); 1262 MNT_ILOCK(mp); 1263 async_flag = mp->mnt_flag & MNT_ASYNC; 1264 mp->mnt_flag &= ~MNT_ASYNC; 1265 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1266 MNT_IUNLOCK(mp); 1267 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1268 vfs_deallocate_syncvnode(mp); 1269 /* 1270 * For forced unmounts, move process cdir/rdir refs on the fs root 1271 * vnode to the covered vnode. For non-forced unmounts we want 1272 * such references to cause an EBUSY error. 1273 */ 1274 if ((flags & MNT_FORCE) && 1275 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { 1276 if (mp->mnt_vnodecovered != NULL) 1277 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1278 if (fsrootvp == rootvnode) { 1279 vrele(rootvnode); 1280 rootvnode = NULL; 1281 } 1282 vput(fsrootvp); 1283 } 1284 if (((mp->mnt_flag & MNT_RDONLY) || 1285 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || (flags & MNT_FORCE) != 0) 1286 error = VFS_UNMOUNT(mp, flags); 1287 vn_finished_write(mp); 1288 /* 1289 * If we failed to flush the dirty blocks for this mount point, 1290 * undo all the cdir/rdir and rootvnode changes we made above. 1291 * Unless we failed to do so because the device is reporting that 1292 * it doesn't exist anymore. 1293 */ 1294 if (error && error != ENXIO) { 1295 if ((flags & MNT_FORCE) && 1296 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { 1297 if (mp->mnt_vnodecovered != NULL) 1298 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1299 if (rootvnode == NULL) { 1300 rootvnode = fsrootvp; 1301 vref(rootvnode); 1302 } 1303 vput(fsrootvp); 1304 } 1305 MNT_ILOCK(mp); 1306 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; 1307 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1308 MNT_IUNLOCK(mp); 1309 vfs_allocate_syncvnode(mp); 1310 MNT_ILOCK(mp); 1311 } 1312 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1313 mp->mnt_flag |= async_flag; 1314 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1315 mp->mnt_kern_flag |= MNTK_ASYNC; 1316 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1317 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1318 wakeup(mp); 1319 } 1320 MNT_IUNLOCK(mp); 1321 if (coveredvp) 1322 VOP_UNLOCK(coveredvp, 0); 1323 return (error); 1324 } 1325 mtx_lock(&mountlist_mtx); 1326 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1327 mtx_unlock(&mountlist_mtx); 1328 if (coveredvp != NULL) { 1329 coveredvp->v_mountedhere = NULL; 1330 vput(coveredvp); 1331 } 1332 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1333 vfs_mount_destroy(mp); 1334 return (0); 1335 } 1336 1337 /* 1338 * Report errors during filesystem mounting. 1339 */ 1340 void 1341 vfs_mount_error(struct mount *mp, const char *fmt, ...) 1342 { 1343 struct vfsoptlist *moptlist = mp->mnt_optnew; 1344 va_list ap; 1345 int error, len; 1346 char *errmsg; 1347 1348 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1349 if (error || errmsg == NULL || len <= 0) 1350 return; 1351 1352 va_start(ap, fmt); 1353 vsnprintf(errmsg, (size_t)len, fmt, ap); 1354 va_end(ap); 1355 } 1356 1357 void 1358 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...) 1359 { 1360 va_list ap; 1361 int error, len; 1362 char *errmsg; 1363 1364 error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len); 1365 if (error || errmsg == NULL || len <= 0) 1366 return; 1367 1368 va_start(ap, fmt); 1369 vsnprintf(errmsg, (size_t)len, fmt, ap); 1370 va_end(ap); 1371 } 1372 1373 /* 1374 * --------------------------------------------------------------------- 1375 * Functions for querying mount options/arguments from filesystems. 1376 */ 1377 1378 /* 1379 * Check that no unknown options are given 1380 */ 1381 int 1382 vfs_filteropt(struct vfsoptlist *opts, const char **legal) 1383 { 1384 struct vfsopt *opt; 1385 char errmsg[255]; 1386 const char **t, *p, *q; 1387 int ret = 0; 1388 1389 TAILQ_FOREACH(opt, opts, link) { 1390 p = opt->name; 1391 q = NULL; 1392 if (p[0] == 'n' && p[1] == 'o') 1393 q = p + 2; 1394 for(t = global_opts; *t != NULL; t++) { 1395 if (strcmp(*t, p) == 0) 1396 break; 1397 if (q != NULL) { 1398 if (strcmp(*t, q) == 0) 1399 break; 1400 } 1401 } 1402 if (*t != NULL) 1403 continue; 1404 for(t = legal; *t != NULL; t++) { 1405 if (strcmp(*t, p) == 0) 1406 break; 1407 if (q != NULL) { 1408 if (strcmp(*t, q) == 0) 1409 break; 1410 } 1411 } 1412 if (*t != NULL) 1413 continue; 1414 snprintf(errmsg, sizeof(errmsg), 1415 "mount option <%s> is unknown", p); 1416 ret = EINVAL; 1417 } 1418 if (ret != 0) { 1419 TAILQ_FOREACH(opt, opts, link) { 1420 if (strcmp(opt->name, "errmsg") == 0) { 1421 strncpy((char *)opt->value, errmsg, opt->len); 1422 break; 1423 } 1424 } 1425 if (opt == NULL) 1426 printf("%s\n", errmsg); 1427 } 1428 return (ret); 1429 } 1430 1431 /* 1432 * Get a mount option by its name. 1433 * 1434 * Return 0 if the option was found, ENOENT otherwise. 1435 * If len is non-NULL it will be filled with the length 1436 * of the option. If buf is non-NULL, it will be filled 1437 * with the address of the option. 1438 */ 1439 int 1440 vfs_getopt(opts, name, buf, len) 1441 struct vfsoptlist *opts; 1442 const char *name; 1443 void **buf; 1444 int *len; 1445 { 1446 struct vfsopt *opt; 1447 1448 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1449 1450 TAILQ_FOREACH(opt, opts, link) { 1451 if (strcmp(name, opt->name) == 0) { 1452 opt->seen = 1; 1453 if (len != NULL) 1454 *len = opt->len; 1455 if (buf != NULL) 1456 *buf = opt->value; 1457 return (0); 1458 } 1459 } 1460 return (ENOENT); 1461 } 1462 1463 int 1464 vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1465 { 1466 struct vfsopt *opt; 1467 1468 if (opts == NULL) 1469 return (-1); 1470 1471 TAILQ_FOREACH(opt, opts, link) { 1472 if (strcmp(name, opt->name) == 0) { 1473 opt->seen = 1; 1474 return (opt->pos); 1475 } 1476 } 1477 return (-1); 1478 } 1479 1480 char * 1481 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1482 { 1483 struct vfsopt *opt; 1484 1485 *error = 0; 1486 TAILQ_FOREACH(opt, opts, link) { 1487 if (strcmp(name, opt->name) != 0) 1488 continue; 1489 opt->seen = 1; 1490 if (opt->len == 0 || 1491 ((char *)opt->value)[opt->len - 1] != '\0') { 1492 *error = EINVAL; 1493 return (NULL); 1494 } 1495 return (opt->value); 1496 } 1497 *error = ENOENT; 1498 return (NULL); 1499 } 1500 1501 int 1502 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, 1503 uint64_t val) 1504 { 1505 struct vfsopt *opt; 1506 1507 TAILQ_FOREACH(opt, opts, link) { 1508 if (strcmp(name, opt->name) == 0) { 1509 opt->seen = 1; 1510 if (w != NULL) 1511 *w |= val; 1512 return (1); 1513 } 1514 } 1515 if (w != NULL) 1516 *w &= ~val; 1517 return (0); 1518 } 1519 1520 int 1521 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1522 { 1523 va_list ap; 1524 struct vfsopt *opt; 1525 int ret; 1526 1527 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1528 1529 TAILQ_FOREACH(opt, opts, link) { 1530 if (strcmp(name, opt->name) != 0) 1531 continue; 1532 opt->seen = 1; 1533 if (opt->len == 0 || opt->value == NULL) 1534 return (0); 1535 if (((char *)opt->value)[opt->len - 1] != '\0') 1536 return (0); 1537 va_start(ap, fmt); 1538 ret = vsscanf(opt->value, fmt, ap); 1539 va_end(ap); 1540 return (ret); 1541 } 1542 return (0); 1543 } 1544 1545 int 1546 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len) 1547 { 1548 struct vfsopt *opt; 1549 1550 TAILQ_FOREACH(opt, opts, link) { 1551 if (strcmp(name, opt->name) != 0) 1552 continue; 1553 opt->seen = 1; 1554 if (opt->value == NULL) 1555 opt->len = len; 1556 else { 1557 if (opt->len != len) 1558 return (EINVAL); 1559 bcopy(value, opt->value, len); 1560 } 1561 return (0); 1562 } 1563 return (ENOENT); 1564 } 1565 1566 int 1567 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len) 1568 { 1569 struct vfsopt *opt; 1570 1571 TAILQ_FOREACH(opt, opts, link) { 1572 if (strcmp(name, opt->name) != 0) 1573 continue; 1574 opt->seen = 1; 1575 if (opt->value == NULL) 1576 opt->len = len; 1577 else { 1578 if (opt->len < len) 1579 return (EINVAL); 1580 opt->len = len; 1581 bcopy(value, opt->value, len); 1582 } 1583 return (0); 1584 } 1585 return (ENOENT); 1586 } 1587 1588 int 1589 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value) 1590 { 1591 struct vfsopt *opt; 1592 1593 TAILQ_FOREACH(opt, opts, link) { 1594 if (strcmp(name, opt->name) != 0) 1595 continue; 1596 opt->seen = 1; 1597 if (opt->value == NULL) 1598 opt->len = strlen(value) + 1; 1599 else if (strlcpy(opt->value, value, opt->len) >= opt->len) 1600 return (EINVAL); 1601 return (0); 1602 } 1603 return (ENOENT); 1604 } 1605 1606 /* 1607 * Find and copy a mount option. 1608 * 1609 * The size of the buffer has to be specified 1610 * in len, if it is not the same length as the 1611 * mount option, EINVAL is returned. 1612 * Returns ENOENT if the option is not found. 1613 */ 1614 int 1615 vfs_copyopt(opts, name, dest, len) 1616 struct vfsoptlist *opts; 1617 const char *name; 1618 void *dest; 1619 int len; 1620 { 1621 struct vfsopt *opt; 1622 1623 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 1624 1625 TAILQ_FOREACH(opt, opts, link) { 1626 if (strcmp(name, opt->name) == 0) { 1627 opt->seen = 1; 1628 if (len != opt->len) 1629 return (EINVAL); 1630 bcopy(opt->value, dest, opt->len); 1631 return (0); 1632 } 1633 } 1634 return (ENOENT); 1635 } 1636 1637 /* 1638 * This is a helper function for filesystems to traverse their 1639 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 1640 */ 1641 1642 struct vnode * 1643 __mnt_vnode_next(struct vnode **mvp, struct mount *mp) 1644 { 1645 struct vnode *vp; 1646 1647 mtx_assert(MNT_MTX(mp), MA_OWNED); 1648 1649 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 1650 if (should_yield()) { 1651 MNT_IUNLOCK(mp); 1652 kern_yield(PRI_UNCHANGED); 1653 MNT_ILOCK(mp); 1654 } 1655 vp = TAILQ_NEXT(*mvp, v_nmntvnodes); 1656 while (vp != NULL && vp->v_type == VMARKER) 1657 vp = TAILQ_NEXT(vp, v_nmntvnodes); 1658 1659 /* Check if we are done */ 1660 if (vp == NULL) { 1661 __mnt_vnode_markerfree(mvp, mp); 1662 return (NULL); 1663 } 1664 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 1665 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 1666 return (vp); 1667 } 1668 1669 struct vnode * 1670 __mnt_vnode_first(struct vnode **mvp, struct mount *mp) 1671 { 1672 struct vnode *vp; 1673 1674 mtx_assert(MNT_MTX(mp), MA_OWNED); 1675 1676 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 1677 while (vp != NULL && vp->v_type == VMARKER) 1678 vp = TAILQ_NEXT(vp, v_nmntvnodes); 1679 1680 /* Check if we are done */ 1681 if (vp == NULL) { 1682 *mvp = NULL; 1683 return (NULL); 1684 } 1685 MNT_REF(mp); 1686 MNT_IUNLOCK(mp); 1687 *mvp = (struct vnode *) malloc(sizeof(struct vnode), 1688 M_VNODE_MARKER, 1689 M_WAITOK | M_ZERO); 1690 MNT_ILOCK(mp); 1691 (*mvp)->v_type = VMARKER; 1692 1693 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 1694 while (vp != NULL && vp->v_type == VMARKER) 1695 vp = TAILQ_NEXT(vp, v_nmntvnodes); 1696 1697 /* Check if we are done */ 1698 if (vp == NULL) { 1699 MNT_IUNLOCK(mp); 1700 free(*mvp, M_VNODE_MARKER); 1701 MNT_ILOCK(mp); 1702 *mvp = NULL; 1703 MNT_REL(mp); 1704 return (NULL); 1705 } 1706 (*mvp)->v_mount = mp; 1707 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 1708 return (vp); 1709 } 1710 1711 1712 void 1713 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp) 1714 { 1715 1716 if (*mvp == NULL) 1717 return; 1718 1719 mtx_assert(MNT_MTX(mp), MA_OWNED); 1720 1721 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 1722 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 1723 MNT_IUNLOCK(mp); 1724 free(*mvp, M_VNODE_MARKER); 1725 MNT_ILOCK(mp); 1726 *mvp = NULL; 1727 MNT_REL(mp); 1728 } 1729 1730 1731 int 1732 __vfs_statfs(struct mount *mp, struct statfs *sbp) 1733 { 1734 int error; 1735 1736 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat); 1737 if (sbp != &mp->mnt_stat) 1738 *sbp = mp->mnt_stat; 1739 return (error); 1740 } 1741 1742 void 1743 vfs_mountedfrom(struct mount *mp, const char *from) 1744 { 1745 1746 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 1747 strlcpy(mp->mnt_stat.f_mntfromname, from, 1748 sizeof mp->mnt_stat.f_mntfromname); 1749 } 1750 1751 /* 1752 * --------------------------------------------------------------------- 1753 * This is the api for building mount args and mounting filesystems from 1754 * inside the kernel. 1755 * 1756 * The API works by accumulation of individual args. First error is 1757 * latched. 1758 * 1759 * XXX: should be documented in new manpage kernel_mount(9) 1760 */ 1761 1762 /* A memory allocation which must be freed when we are done */ 1763 struct mntaarg { 1764 SLIST_ENTRY(mntaarg) next; 1765 }; 1766 1767 /* The header for the mount arguments */ 1768 struct mntarg { 1769 struct iovec *v; 1770 int len; 1771 int error; 1772 SLIST_HEAD(, mntaarg) list; 1773 }; 1774 1775 /* 1776 * Add a boolean argument. 1777 * 1778 * flag is the boolean value. 1779 * name must start with "no". 1780 */ 1781 struct mntarg * 1782 mount_argb(struct mntarg *ma, int flag, const char *name) 1783 { 1784 1785 KASSERT(name[0] == 'n' && name[1] == 'o', 1786 ("mount_argb(...,%s): name must start with 'no'", name)); 1787 1788 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 1789 } 1790 1791 /* 1792 * Add an argument printf style 1793 */ 1794 struct mntarg * 1795 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 1796 { 1797 va_list ap; 1798 struct mntaarg *maa; 1799 struct sbuf *sb; 1800 int len; 1801 1802 if (ma == NULL) { 1803 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1804 SLIST_INIT(&ma->list); 1805 } 1806 if (ma->error) 1807 return (ma); 1808 1809 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1810 M_MOUNT, M_WAITOK); 1811 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1812 ma->v[ma->len].iov_len = strlen(name) + 1; 1813 ma->len++; 1814 1815 sb = sbuf_new_auto(); 1816 va_start(ap, fmt); 1817 sbuf_vprintf(sb, fmt, ap); 1818 va_end(ap); 1819 sbuf_finish(sb); 1820 len = sbuf_len(sb) + 1; 1821 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1822 SLIST_INSERT_HEAD(&ma->list, maa, next); 1823 bcopy(sbuf_data(sb), maa + 1, len); 1824 sbuf_delete(sb); 1825 1826 ma->v[ma->len].iov_base = maa + 1; 1827 ma->v[ma->len].iov_len = len; 1828 ma->len++; 1829 1830 return (ma); 1831 } 1832 1833 /* 1834 * Add an argument which is a userland string. 1835 */ 1836 struct mntarg * 1837 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 1838 { 1839 struct mntaarg *maa; 1840 char *tbuf; 1841 1842 if (val == NULL) 1843 return (ma); 1844 if (ma == NULL) { 1845 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1846 SLIST_INIT(&ma->list); 1847 } 1848 if (ma->error) 1849 return (ma); 1850 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1851 SLIST_INSERT_HEAD(&ma->list, maa, next); 1852 tbuf = (void *)(maa + 1); 1853 ma->error = copyinstr(val, tbuf, len, NULL); 1854 return (mount_arg(ma, name, tbuf, -1)); 1855 } 1856 1857 /* 1858 * Plain argument. 1859 * 1860 * If length is -1, treat value as a C string. 1861 */ 1862 struct mntarg * 1863 mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 1864 { 1865 1866 if (ma == NULL) { 1867 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1868 SLIST_INIT(&ma->list); 1869 } 1870 if (ma->error) 1871 return (ma); 1872 1873 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1874 M_MOUNT, M_WAITOK); 1875 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1876 ma->v[ma->len].iov_len = strlen(name) + 1; 1877 ma->len++; 1878 1879 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 1880 if (len < 0) 1881 ma->v[ma->len].iov_len = strlen(val) + 1; 1882 else 1883 ma->v[ma->len].iov_len = len; 1884 ma->len++; 1885 return (ma); 1886 } 1887 1888 /* 1889 * Free a mntarg structure 1890 */ 1891 static void 1892 free_mntarg(struct mntarg *ma) 1893 { 1894 struct mntaarg *maa; 1895 1896 while (!SLIST_EMPTY(&ma->list)) { 1897 maa = SLIST_FIRST(&ma->list); 1898 SLIST_REMOVE_HEAD(&ma->list, next); 1899 free(maa, M_MOUNT); 1900 } 1901 free(ma->v, M_MOUNT); 1902 free(ma, M_MOUNT); 1903 } 1904 1905 /* 1906 * Mount a filesystem 1907 */ 1908 int 1909 kernel_mount(struct mntarg *ma, int flags) 1910 { 1911 struct uio auio; 1912 int error; 1913 1914 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 1915 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 1916 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 1917 1918 auio.uio_iov = ma->v; 1919 auio.uio_iovcnt = ma->len; 1920 auio.uio_segflg = UIO_SYSSPACE; 1921 1922 error = ma->error; 1923 if (!error) 1924 error = vfs_donmount(curthread, flags, &auio); 1925 free_mntarg(ma); 1926 return (error); 1927 } 1928 1929 /* 1930 * A printflike function to mount a filesystem. 1931 */ 1932 int 1933 kernel_vmount(int flags, ...) 1934 { 1935 struct mntarg *ma = NULL; 1936 va_list ap; 1937 const char *cp; 1938 const void *vp; 1939 int error; 1940 1941 va_start(ap, flags); 1942 for (;;) { 1943 cp = va_arg(ap, const char *); 1944 if (cp == NULL) 1945 break; 1946 vp = va_arg(ap, const void *); 1947 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); 1948 } 1949 va_end(ap); 1950 1951 error = kernel_mount(ma, flags); 1952 return (error); 1953 } 1954 1955 void 1956 vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp) 1957 { 1958 1959 bcopy(oexp, exp, sizeof(*oexp)); 1960 exp->ex_numsecflavors = 0; 1961 } 1962