1 /*- 2 * Copyright (c) 1999-2004 Poul-Henning Kamp 3 * Copyright (c) 1999 Michael Smith 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/conf.h> 42 #include <sys/fcntl.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/libkern.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/filedesc.h> 53 #include <sys/reboot.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysproto.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/vnode.h> 61 #include <vm/uma.h> 62 63 #include <geom/geom.h> 64 65 #include <machine/stdarg.h> 66 67 #include <security/audit/audit.h> 68 #include <security/mac/mac_framework.h> 69 70 #include "opt_rootdevname.h" 71 #include "opt_mac.h" 72 73 #define ROOTNAME "root_device" 74 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 75 76 static int vfs_domount(struct thread *td, const char *fstype, 77 char *fspath, int fsflags, void *fsdata); 78 static int vfs_mountroot_ask(void); 79 static int vfs_mountroot_try(const char *mountfrom); 80 static void free_mntarg(struct mntarg *ma); 81 static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); 82 83 static int usermount = 0; 84 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 85 "Unprivileged users may mount and unmount file systems"); 86 87 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 88 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); 89 static uma_zone_t mount_zone; 90 91 /* List of mounted filesystems. */ 92 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 93 94 /* For any iteration/modification of mountlist */ 95 struct mtx mountlist_mtx; 96 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 97 98 TAILQ_HEAD(vfsoptlist, vfsopt); 99 struct vfsopt { 100 TAILQ_ENTRY(vfsopt) link; 101 char *name; 102 void *value; 103 int len; 104 }; 105 106 /* 107 * The vnode of the system's root (/ in the filesystem, without chroot 108 * active.) 109 */ 110 struct vnode *rootvnode; 111 112 /* 113 * The root filesystem is detailed in the kernel environment variable 114 * vfs.root.mountfrom, which is expected to be in the general format 115 * 116 * <vfsname>:[<path>] 117 * vfsname := the name of a VFS known to the kernel and capable 118 * of being mounted as root 119 * path := disk device name or other data used by the filesystem 120 * to locate its physical store 121 */ 122 123 /* 124 * Global opts, taken by all filesystems 125 */ 126 static const char *global_opts[] = { 127 "errmsg", 128 "fstype", 129 "fspath", 130 "ro", 131 "rw", 132 "nosuid", 133 "noexec", 134 NULL 135 }; 136 137 /* 138 * The root specifiers we will try if RB_CDROM is specified. 139 */ 140 static char *cdrom_rootdevnames[] = { 141 "cd9660:cd0", 142 "cd9660:acd0", 143 NULL 144 }; 145 146 /* legacy find-root code */ 147 char *rootdevnames[2] = {NULL, NULL}; 148 #ifndef ROOTDEVNAME 149 # define ROOTDEVNAME NULL 150 #endif 151 static const char *ctrootdevname = ROOTDEVNAME; 152 153 /* 154 * --------------------------------------------------------------------- 155 * Functions for building and sanitizing the mount options 156 */ 157 158 /* Remove one mount option. */ 159 static void 160 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 161 { 162 163 TAILQ_REMOVE(opts, opt, link); 164 free(opt->name, M_MOUNT); 165 if (opt->value != NULL) 166 free(opt->value, M_MOUNT); 167 #ifdef INVARIANTS 168 else if (opt->len != 0) 169 panic("%s: mount option with NULL value but length != 0", 170 __func__); 171 #endif 172 free(opt, M_MOUNT); 173 } 174 175 /* Release all resources related to the mount options. */ 176 void 177 vfs_freeopts(struct vfsoptlist *opts) 178 { 179 struct vfsopt *opt; 180 181 while (!TAILQ_EMPTY(opts)) { 182 opt = TAILQ_FIRST(opts); 183 vfs_freeopt(opts, opt); 184 } 185 free(opts, M_MOUNT); 186 } 187 188 void 189 vfs_deleteopt(struct vfsoptlist *opts, const char *name) 190 { 191 struct vfsopt *opt, *temp; 192 193 if (opts == NULL) 194 return; 195 TAILQ_FOREACH_SAFE(opt, opts, link, temp) { 196 if (strcmp(opt->name, name) == 0) 197 vfs_freeopt(opts, opt); 198 } 199 } 200 201 /* 202 * Check if options are equal (with or without the "no" prefix). 203 */ 204 static int 205 vfs_equalopts(const char *opt1, const char *opt2) 206 { 207 208 /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 209 if (strcmp(opt1, opt2) == 0) 210 return (1); 211 /* "noopt" vs. "opt" */ 212 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 213 return (1); 214 /* "opt" vs. "noopt" */ 215 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 216 return (1); 217 return (0); 218 } 219 220 /* 221 * If a mount option is specified several times, 222 * (with or without the "no" prefix) only keep 223 * the last occurence of it. 224 */ 225 static void 226 vfs_sanitizeopts(struct vfsoptlist *opts) 227 { 228 struct vfsopt *opt, *opt2, *tmp; 229 230 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 231 opt2 = TAILQ_PREV(opt, vfsoptlist, link); 232 while (opt2 != NULL) { 233 if (vfs_equalopts(opt->name, opt2->name)) { 234 tmp = TAILQ_PREV(opt2, vfsoptlist, link); 235 vfs_freeopt(opts, opt2); 236 opt2 = tmp; 237 } else { 238 opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 239 } 240 } 241 } 242 } 243 244 /* 245 * Build a linked list of mount options from a struct uio. 246 */ 247 static int 248 vfs_buildopts(struct uio *auio, struct vfsoptlist **options) 249 { 250 struct vfsoptlist *opts; 251 struct vfsopt *opt; 252 size_t memused; 253 unsigned int i, iovcnt; 254 int error, namelen, optlen; 255 256 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 257 TAILQ_INIT(opts); 258 memused = 0; 259 iovcnt = auio->uio_iovcnt; 260 for (i = 0; i < iovcnt; i += 2) { 261 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 262 namelen = auio->uio_iov[i].iov_len; 263 optlen = auio->uio_iov[i + 1].iov_len; 264 opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 265 opt->value = NULL; 266 opt->len = 0; 267 268 /* 269 * Do this early, so jumps to "bad" will free the current 270 * option. 271 */ 272 TAILQ_INSERT_TAIL(opts, opt, link); 273 memused += sizeof(struct vfsopt) + optlen + namelen; 274 275 /* 276 * Avoid consuming too much memory, and attempts to overflow 277 * memused. 278 */ 279 if (memused > VFS_MOUNTARG_SIZE_MAX || 280 optlen > VFS_MOUNTARG_SIZE_MAX || 281 namelen > VFS_MOUNTARG_SIZE_MAX) { 282 error = EINVAL; 283 goto bad; 284 } 285 286 if (auio->uio_segflg == UIO_SYSSPACE) { 287 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 288 } else { 289 error = copyin(auio->uio_iov[i].iov_base, opt->name, 290 namelen); 291 if (error) 292 goto bad; 293 } 294 /* Ensure names are null-terminated strings. */ 295 if (opt->name[namelen - 1] != '\0') { 296 error = EINVAL; 297 goto bad; 298 } 299 if (optlen != 0) { 300 opt->len = optlen; 301 opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 302 if (auio->uio_segflg == UIO_SYSSPACE) { 303 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 304 optlen); 305 } else { 306 error = copyin(auio->uio_iov[i + 1].iov_base, 307 opt->value, optlen); 308 if (error) 309 goto bad; 310 } 311 } 312 } 313 vfs_sanitizeopts(opts); 314 *options = opts; 315 return (0); 316 bad: 317 vfs_freeopts(opts); 318 return (error); 319 } 320 321 /* 322 * Merge the old mount options with the new ones passed 323 * in the MNT_UPDATE case. 324 * 325 * XXX This function will keep a "nofoo" option in the 326 * new options if there is no matching "foo" option 327 * to be cancelled in the old options. This is a bug 328 * if the option's canonical name is "foo". E.g., "noro" 329 * shouldn't end up in the mount point's active options, 330 * but it can. 331 */ 332 static void 333 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) 334 { 335 struct vfsopt *opt, *opt2, *new; 336 337 TAILQ_FOREACH(opt, opts, link) { 338 /* 339 * Check that this option hasn't been redefined 340 * nor cancelled with a "no" mount option. 341 */ 342 opt2 = TAILQ_FIRST(toopts); 343 while (opt2 != NULL) { 344 if (strcmp(opt2->name, opt->name) == 0) 345 goto next; 346 if (strncmp(opt2->name, "no", 2) == 0 && 347 strcmp(opt2->name + 2, opt->name) == 0) { 348 vfs_freeopt(toopts, opt2); 349 goto next; 350 } 351 opt2 = TAILQ_NEXT(opt2, link); 352 } 353 /* We want this option, duplicate it. */ 354 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 355 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); 356 strcpy(new->name, opt->name); 357 if (opt->len != 0) { 358 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 359 bcopy(opt->value, new->value, opt->len); 360 } else { 361 new->value = NULL; 362 } 363 new->len = opt->len; 364 TAILQ_INSERT_TAIL(toopts, new, link); 365 next: 366 continue; 367 } 368 } 369 370 /* 371 * Mount a filesystem. 372 */ 373 int 374 nmount(td, uap) 375 struct thread *td; 376 struct nmount_args /* { 377 struct iovec *iovp; 378 unsigned int iovcnt; 379 int flags; 380 } */ *uap; 381 { 382 struct uio *auio; 383 struct iovec *iov; 384 unsigned int i; 385 int error; 386 u_int iovcnt; 387 388 AUDIT_ARG(fflags, uap->flags); 389 390 /* 391 * Filter out MNT_ROOTFS. We do not want clients of nmount() in 392 * userspace to set this flag, but we must filter it out if we want 393 * MNT_UPDATE on the root file system to work. 394 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 395 */ 396 uap->flags &= ~MNT_ROOTFS; 397 398 iovcnt = uap->iovcnt; 399 /* 400 * Check that we have an even number of iovec's 401 * and that we have at least two options. 402 */ 403 if ((iovcnt & 1) || (iovcnt < 4)) 404 return (EINVAL); 405 406 error = copyinuio(uap->iovp, iovcnt, &auio); 407 if (error) 408 return (error); 409 iov = auio->uio_iov; 410 for (i = 0; i < iovcnt; i++) { 411 if (iov->iov_len > MMAXOPTIONLEN) { 412 free(auio, M_IOV); 413 return (EINVAL); 414 } 415 iov++; 416 } 417 error = vfs_donmount(td, uap->flags, auio); 418 419 free(auio, M_IOV); 420 return (error); 421 } 422 423 /* 424 * --------------------------------------------------------------------- 425 * Various utility functions 426 */ 427 428 void 429 vfs_ref(struct mount *mp) 430 { 431 432 MNT_ILOCK(mp); 433 MNT_REF(mp); 434 MNT_IUNLOCK(mp); 435 } 436 437 void 438 vfs_rel(struct mount *mp) 439 { 440 441 MNT_ILOCK(mp); 442 MNT_REL(mp); 443 MNT_IUNLOCK(mp); 444 } 445 446 static int 447 mount_init(void *mem, int size, int flags) 448 { 449 struct mount *mp; 450 451 mp = (struct mount *)mem; 452 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 453 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); 454 return (0); 455 } 456 457 static void 458 mount_fini(void *mem, int size) 459 { 460 struct mount *mp; 461 462 mp = (struct mount *)mem; 463 lockdestroy(&mp->mnt_explock); 464 mtx_destroy(&mp->mnt_mtx); 465 } 466 467 /* 468 * Allocate and initialize the mount point struct. 469 */ 470 struct mount * 471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, 472 struct ucred *cred) 473 { 474 struct mount *mp; 475 476 mp = uma_zalloc(mount_zone, M_WAITOK); 477 bzero(&mp->mnt_startzero, 478 __rangeof(struct mount, mnt_startzero, mnt_endzero)); 479 TAILQ_INIT(&mp->mnt_nvnodelist); 480 mp->mnt_nvnodelistsize = 0; 481 mp->mnt_ref = 0; 482 (void) vfs_busy(mp, MBF_NOWAIT); 483 mp->mnt_op = vfsp->vfc_vfsops; 484 mp->mnt_vfc = vfsp; 485 vfsp->vfc_refcount++; /* XXX Unlocked */ 486 mp->mnt_stat.f_type = vfsp->vfc_typenum; 487 mp->mnt_gen++; 488 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 489 mp->mnt_vnodecovered = vp; 490 mp->mnt_cred = crdup(cred); 491 mp->mnt_stat.f_owner = cred->cr_uid; 492 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 493 mp->mnt_iosize_max = DFLTPHYS; 494 #ifdef MAC 495 mac_mount_init(mp); 496 mac_mount_create(cred, mp); 497 #endif 498 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 499 return (mp); 500 } 501 502 /* 503 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 504 */ 505 void 506 vfs_mount_destroy(struct mount *mp) 507 { 508 509 MNT_ILOCK(mp); 510 mp->mnt_kern_flag |= MNTK_REFEXPIRE; 511 if (mp->mnt_kern_flag & MNTK_MWAIT) { 512 mp->mnt_kern_flag &= ~MNTK_MWAIT; 513 wakeup(mp); 514 } 515 while (mp->mnt_ref) 516 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); 517 KASSERT(mp->mnt_ref == 0, 518 ("%s: invalid refcount in the drain path @ %s:%d", __func__, 519 __FILE__, __LINE__)); 520 if (mp->mnt_writeopcount != 0) 521 panic("vfs_mount_destroy: nonzero writeopcount"); 522 if (mp->mnt_secondary_writes != 0) 523 panic("vfs_mount_destroy: nonzero secondary_writes"); 524 mp->mnt_vfc->vfc_refcount--; 525 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { 526 struct vnode *vp; 527 528 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) 529 vprint("", vp); 530 panic("unmount: dangling vnode"); 531 } 532 if (mp->mnt_nvnodelistsize != 0) 533 panic("vfs_mount_destroy: nonzero nvnodelistsize"); 534 if (mp->mnt_lockref != 0) 535 panic("vfs_mount_destroy: nonzero lock refcount"); 536 MNT_IUNLOCK(mp); 537 #ifdef MAC 538 mac_mount_destroy(mp); 539 #endif 540 if (mp->mnt_opt != NULL) 541 vfs_freeopts(mp->mnt_opt); 542 crfree(mp->mnt_cred); 543 uma_zfree(mount_zone, mp); 544 } 545 546 int 547 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 548 { 549 struct vfsoptlist *optlist; 550 struct vfsopt *opt, *noro_opt, *tmp_opt; 551 char *fstype, *fspath, *errmsg; 552 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 553 int has_rw, has_noro; 554 555 errmsg = fspath = NULL; 556 errmsg_len = has_noro = has_rw = fspathlen = 0; 557 errmsg_pos = -1; 558 559 error = vfs_buildopts(fsoptions, &optlist); 560 if (error) 561 return (error); 562 563 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 564 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 565 566 /* 567 * We need these two options before the others, 568 * and they are mandatory for any filesystem. 569 * Ensure they are NUL terminated as well. 570 */ 571 fstypelen = 0; 572 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 573 if (error || fstype[fstypelen - 1] != '\0') { 574 error = EINVAL; 575 if (errmsg != NULL) 576 strncpy(errmsg, "Invalid fstype", errmsg_len); 577 goto bail; 578 } 579 fspathlen = 0; 580 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 581 if (error || fspath[fspathlen - 1] != '\0') { 582 error = EINVAL; 583 if (errmsg != NULL) 584 strncpy(errmsg, "Invalid fspath", errmsg_len); 585 goto bail; 586 } 587 588 /* 589 * We need to see if we have the "update" option 590 * before we call vfs_domount(), since vfs_domount() has special 591 * logic based on MNT_UPDATE. This is very important 592 * when we want to update the root filesystem. 593 */ 594 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { 595 if (strcmp(opt->name, "update") == 0) { 596 fsflags |= MNT_UPDATE; 597 vfs_freeopt(optlist, opt); 598 } 599 else if (strcmp(opt->name, "async") == 0) 600 fsflags |= MNT_ASYNC; 601 else if (strcmp(opt->name, "force") == 0) { 602 fsflags |= MNT_FORCE; 603 vfs_freeopt(optlist, opt); 604 } 605 else if (strcmp(opt->name, "reload") == 0) { 606 fsflags |= MNT_RELOAD; 607 vfs_freeopt(optlist, opt); 608 } 609 else if (strcmp(opt->name, "multilabel") == 0) 610 fsflags |= MNT_MULTILABEL; 611 else if (strcmp(opt->name, "noasync") == 0) 612 fsflags &= ~MNT_ASYNC; 613 else if (strcmp(opt->name, "noatime") == 0) 614 fsflags |= MNT_NOATIME; 615 else if (strcmp(opt->name, "atime") == 0) { 616 free(opt->name, M_MOUNT); 617 opt->name = strdup("nonoatime", M_MOUNT); 618 } 619 else if (strcmp(opt->name, "noclusterr") == 0) 620 fsflags |= MNT_NOCLUSTERR; 621 else if (strcmp(opt->name, "clusterr") == 0) { 622 free(opt->name, M_MOUNT); 623 opt->name = strdup("nonoclusterr", M_MOUNT); 624 } 625 else if (strcmp(opt->name, "noclusterw") == 0) 626 fsflags |= MNT_NOCLUSTERW; 627 else if (strcmp(opt->name, "clusterw") == 0) { 628 free(opt->name, M_MOUNT); 629 opt->name = strdup("nonoclusterw", M_MOUNT); 630 } 631 else if (strcmp(opt->name, "noexec") == 0) 632 fsflags |= MNT_NOEXEC; 633 else if (strcmp(opt->name, "exec") == 0) { 634 free(opt->name, M_MOUNT); 635 opt->name = strdup("nonoexec", M_MOUNT); 636 } 637 else if (strcmp(opt->name, "nosuid") == 0) 638 fsflags |= MNT_NOSUID; 639 else if (strcmp(opt->name, "suid") == 0) { 640 free(opt->name, M_MOUNT); 641 opt->name = strdup("nonosuid", M_MOUNT); 642 } 643 else if (strcmp(opt->name, "nosymfollow") == 0) 644 fsflags |= MNT_NOSYMFOLLOW; 645 else if (strcmp(opt->name, "symfollow") == 0) { 646 free(opt->name, M_MOUNT); 647 opt->name = strdup("nonosymfollow", M_MOUNT); 648 } 649 else if (strcmp(opt->name, "noro") == 0) { 650 fsflags &= ~MNT_RDONLY; 651 has_noro = 1; 652 } 653 else if (strcmp(opt->name, "rw") == 0) { 654 fsflags &= ~MNT_RDONLY; 655 has_rw = 1; 656 } 657 else if (strcmp(opt->name, "ro") == 0) 658 fsflags |= MNT_RDONLY; 659 else if (strcmp(opt->name, "rdonly") == 0) { 660 free(opt->name, M_MOUNT); 661 opt->name = strdup("ro", M_MOUNT); 662 fsflags |= MNT_RDONLY; 663 } 664 else if (strcmp(opt->name, "suiddir") == 0) 665 fsflags |= MNT_SUIDDIR; 666 else if (strcmp(opt->name, "sync") == 0) 667 fsflags |= MNT_SYNCHRONOUS; 668 else if (strcmp(opt->name, "union") == 0) 669 fsflags |= MNT_UNION; 670 } 671 672 /* 673 * If "rw" was specified as a mount option, and we 674 * are trying to update a mount-point from "ro" to "rw", 675 * we need a mount option "noro", since in vfs_mergeopts(), 676 * "noro" will cancel "ro", but "rw" will not do anything. 677 */ 678 if (has_rw && !has_noro) { 679 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 680 noro_opt->name = strdup("noro", M_MOUNT); 681 noro_opt->value = NULL; 682 noro_opt->len = 0; 683 TAILQ_INSERT_TAIL(optlist, noro_opt, link); 684 } 685 686 /* 687 * Be ultra-paranoid about making sure the type and fspath 688 * variables will fit in our mp buffers, including the 689 * terminating NUL. 690 */ 691 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 692 error = ENAMETOOLONG; 693 goto bail; 694 } 695 696 mtx_lock(&Giant); 697 error = vfs_domount(td, fstype, fspath, fsflags, optlist); 698 mtx_unlock(&Giant); 699 bail: 700 /* copyout the errmsg */ 701 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 702 && errmsg_len > 0 && errmsg != NULL) { 703 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 704 bcopy(errmsg, 705 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 706 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 707 } else { 708 copyout(errmsg, 709 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 710 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 711 } 712 } 713 714 if (error != 0) 715 vfs_freeopts(optlist); 716 return (error); 717 } 718 719 /* 720 * Old mount API. 721 */ 722 #ifndef _SYS_SYSPROTO_H_ 723 struct mount_args { 724 char *type; 725 char *path; 726 int flags; 727 caddr_t data; 728 }; 729 #endif 730 /* ARGSUSED */ 731 int 732 mount(td, uap) 733 struct thread *td; 734 struct mount_args /* { 735 char *type; 736 char *path; 737 int flags; 738 caddr_t data; 739 } */ *uap; 740 { 741 char *fstype; 742 struct vfsconf *vfsp = NULL; 743 struct mntarg *ma = NULL; 744 int error; 745 746 AUDIT_ARG(fflags, uap->flags); 747 748 /* 749 * Filter out MNT_ROOTFS. We do not want clients of mount() in 750 * userspace to set this flag, but we must filter it out if we want 751 * MNT_UPDATE on the root file system to work. 752 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 753 */ 754 uap->flags &= ~MNT_ROOTFS; 755 756 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 757 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 758 if (error) { 759 free(fstype, M_TEMP); 760 return (error); 761 } 762 763 AUDIT_ARG(text, fstype); 764 mtx_lock(&Giant); 765 vfsp = vfs_byname_kld(fstype, td, &error); 766 free(fstype, M_TEMP); 767 if (vfsp == NULL) { 768 mtx_unlock(&Giant); 769 return (ENOENT); 770 } 771 if (vfsp->vfc_vfsops->vfs_cmount == NULL) { 772 mtx_unlock(&Giant); 773 return (EOPNOTSUPP); 774 } 775 776 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 777 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 778 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 779 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 780 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 781 782 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td); 783 mtx_unlock(&Giant); 784 return (error); 785 } 786 787 788 /* 789 * vfs_domount(): actually attempt a filesystem mount. 790 */ 791 static int 792 vfs_domount( 793 struct thread *td, /* Calling thread. */ 794 const char *fstype, /* Filesystem type. */ 795 char *fspath, /* Mount path. */ 796 int fsflags, /* Flags common to all filesystems. */ 797 void *fsdata /* Options local to the filesystem. */ 798 ) 799 { 800 struct vnode *vp; 801 struct mount *mp; 802 struct vfsconf *vfsp; 803 struct oexport_args oexport; 804 struct export_args export; 805 int error, flag = 0; 806 struct vattr va; 807 struct nameidata nd; 808 809 mtx_assert(&Giant, MA_OWNED); 810 /* 811 * Be ultra-paranoid about making sure the type and fspath 812 * variables will fit in our mp buffers, including the 813 * terminating NUL. 814 */ 815 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 816 return (ENAMETOOLONG); 817 818 if (jailed(td->td_ucred) || usermount == 0) { 819 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 820 return (error); 821 } 822 823 /* 824 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 825 */ 826 if (fsflags & MNT_EXPORTED) { 827 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); 828 if (error) 829 return (error); 830 } 831 if (fsflags & MNT_SUIDDIR) { 832 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); 833 if (error) 834 return (error); 835 } 836 /* 837 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. 838 */ 839 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { 840 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) 841 fsflags |= MNT_NOSUID | MNT_USER; 842 } 843 844 /* Load KLDs before we lock the covered vnode to avoid reversals. */ 845 vfsp = NULL; 846 if ((fsflags & MNT_UPDATE) == 0) { 847 /* Don't try to load KLDs if we're mounting the root. */ 848 if (fsflags & MNT_ROOTFS) 849 vfsp = vfs_byname(fstype); 850 else 851 vfsp = vfs_byname_kld(fstype, td, &error); 852 if (vfsp == NULL) 853 return (ENODEV); 854 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) 855 return (EPERM); 856 } 857 /* 858 * Get vnode to be covered 859 */ 860 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, 861 fspath, td); 862 if ((error = namei(&nd)) != 0) 863 return (error); 864 NDFREE(&nd, NDF_ONLY_PNBUF); 865 vp = nd.ni_vp; 866 if (fsflags & MNT_UPDATE) { 867 if ((vp->v_vflag & VV_ROOT) == 0) { 868 vput(vp); 869 return (EINVAL); 870 } 871 mp = vp->v_mount; 872 MNT_ILOCK(mp); 873 flag = mp->mnt_flag; 874 /* 875 * We only allow the filesystem to be reloaded if it 876 * is currently mounted read-only. 877 */ 878 if ((fsflags & MNT_RELOAD) && 879 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 880 MNT_IUNLOCK(mp); 881 vput(vp); 882 return (EOPNOTSUPP); /* Needs translation */ 883 } 884 MNT_IUNLOCK(mp); 885 /* 886 * Only privileged root, or (if MNT_USER is set) the user that 887 * did the original mount is permitted to update it. 888 */ 889 error = vfs_suser(mp, td); 890 if (error) { 891 vput(vp); 892 return (error); 893 } 894 if (vfs_busy(mp, MBF_NOWAIT)) { 895 vput(vp); 896 return (EBUSY); 897 } 898 VI_LOCK(vp); 899 if ((vp->v_iflag & VI_MOUNT) != 0 || 900 vp->v_mountedhere != NULL) { 901 VI_UNLOCK(vp); 902 vfs_unbusy(mp); 903 vput(vp); 904 return (EBUSY); 905 } 906 vp->v_iflag |= VI_MOUNT; 907 VI_UNLOCK(vp); 908 MNT_ILOCK(mp); 909 mp->mnt_flag |= fsflags & 910 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS); 911 MNT_IUNLOCK(mp); 912 VOP_UNLOCK(vp, 0); 913 mp->mnt_optnew = fsdata; 914 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 915 } else { 916 /* 917 * If the user is not root, ensure that they own the directory 918 * onto which we are attempting to mount. 919 */ 920 error = VOP_GETATTR(vp, &va, td->td_ucred); 921 if (error) { 922 vput(vp); 923 return (error); 924 } 925 if (va.va_uid != td->td_ucred->cr_uid) { 926 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 927 0); 928 if (error) { 929 vput(vp); 930 return (error); 931 } 932 } 933 error = vinvalbuf(vp, V_SAVE, 0, 0); 934 if (error != 0) { 935 vput(vp); 936 return (error); 937 } 938 if (vp->v_type != VDIR) { 939 vput(vp); 940 return (ENOTDIR); 941 } 942 VI_LOCK(vp); 943 if ((vp->v_iflag & VI_MOUNT) != 0 || 944 vp->v_mountedhere != NULL) { 945 VI_UNLOCK(vp); 946 vput(vp); 947 return (EBUSY); 948 } 949 vp->v_iflag |= VI_MOUNT; 950 VI_UNLOCK(vp); 951 952 /* 953 * Allocate and initialize the filesystem. 954 */ 955 mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); 956 VOP_UNLOCK(vp, 0); 957 958 /* XXXMAC: pass to vfs_mount_alloc? */ 959 mp->mnt_optnew = fsdata; 960 } 961 962 /* 963 * Set the mount level flags. 964 */ 965 MNT_ILOCK(mp); 966 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) | 967 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS | 968 MNT_RDONLY)); 969 if ((mp->mnt_flag & MNT_ASYNC) == 0) 970 mp->mnt_kern_flag &= ~MNTK_ASYNC; 971 MNT_IUNLOCK(mp); 972 /* 973 * Mount the filesystem. 974 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 975 * get. No freeing of cn_pnbuf. 976 */ 977 error = VFS_MOUNT(mp, td); 978 979 /* 980 * Process the export option only if we are 981 * updating mount options. 982 */ 983 if (!error && (fsflags & MNT_UPDATE)) { 984 if (vfs_copyopt(mp->mnt_optnew, "export", &export, 985 sizeof(export)) == 0) 986 error = vfs_export(mp, &export); 987 else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport, 988 sizeof(oexport)) == 0) { 989 export.ex_flags = oexport.ex_flags; 990 export.ex_root = oexport.ex_root; 991 export.ex_anon = oexport.ex_anon; 992 export.ex_addr = oexport.ex_addr; 993 export.ex_addrlen = oexport.ex_addrlen; 994 export.ex_mask = oexport.ex_mask; 995 export.ex_masklen = oexport.ex_masklen; 996 export.ex_indexfile = oexport.ex_indexfile; 997 export.ex_numsecflavors = 0; 998 error = vfs_export(mp, &export); 999 } 1000 } 1001 1002 if (!error) { 1003 if (mp->mnt_opt != NULL) 1004 vfs_freeopts(mp->mnt_opt); 1005 mp->mnt_opt = mp->mnt_optnew; 1006 (void)VFS_STATFS(mp, &mp->mnt_stat, td); 1007 } 1008 /* 1009 * Prevent external consumers of mount options from reading 1010 * mnt_optnew. 1011 */ 1012 mp->mnt_optnew = NULL; 1013 if (mp->mnt_flag & MNT_UPDATE) { 1014 MNT_ILOCK(mp); 1015 if (error) 1016 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | 1017 (flag & ~MNT_QUOTA); 1018 else 1019 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | 1020 MNT_FORCE | MNT_SNAPSHOT); 1021 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1022 mp->mnt_kern_flag |= MNTK_ASYNC; 1023 else 1024 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1025 MNT_IUNLOCK(mp); 1026 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1027 if (mp->mnt_syncer == NULL) 1028 error = vfs_allocate_syncvnode(mp); 1029 } else { 1030 if (mp->mnt_syncer != NULL) 1031 vrele(mp->mnt_syncer); 1032 mp->mnt_syncer = NULL; 1033 } 1034 vfs_unbusy(mp); 1035 VI_LOCK(vp); 1036 vp->v_iflag &= ~VI_MOUNT; 1037 VI_UNLOCK(vp); 1038 vrele(vp); 1039 return (error); 1040 } 1041 MNT_ILOCK(mp); 1042 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1043 mp->mnt_kern_flag |= MNTK_ASYNC; 1044 else 1045 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1046 MNT_IUNLOCK(mp); 1047 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1048 /* 1049 * Put the new filesystem on the mount list after root. 1050 */ 1051 cache_purge(vp); 1052 if (!error) { 1053 struct vnode *newdp; 1054 1055 VI_LOCK(vp); 1056 vp->v_iflag &= ~VI_MOUNT; 1057 VI_UNLOCK(vp); 1058 vp->v_mountedhere = mp; 1059 mtx_lock(&mountlist_mtx); 1060 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1061 mtx_unlock(&mountlist_mtx); 1062 vfs_event_signal(NULL, VQ_MOUNT, 0); 1063 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td)) 1064 panic("mount: lost mount"); 1065 mountcheckdirs(vp, newdp); 1066 vput(newdp); 1067 VOP_UNLOCK(vp, 0); 1068 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1069 error = vfs_allocate_syncvnode(mp); 1070 vfs_unbusy(mp); 1071 if (error) 1072 vrele(vp); 1073 } else { 1074 VI_LOCK(vp); 1075 vp->v_iflag &= ~VI_MOUNT; 1076 VI_UNLOCK(vp); 1077 vfs_unbusy(mp); 1078 vfs_mount_destroy(mp); 1079 vput(vp); 1080 } 1081 return (error); 1082 } 1083 1084 /* 1085 * Unmount a filesystem. 1086 * 1087 * Note: unmount takes a path to the vnode mounted on as argument, not 1088 * special file (as before). 1089 */ 1090 #ifndef _SYS_SYSPROTO_H_ 1091 struct unmount_args { 1092 char *path; 1093 int flags; 1094 }; 1095 #endif 1096 /* ARGSUSED */ 1097 int 1098 unmount(td, uap) 1099 struct thread *td; 1100 register struct unmount_args /* { 1101 char *path; 1102 int flags; 1103 } */ *uap; 1104 { 1105 struct mount *mp; 1106 char *pathbuf; 1107 int error, id0, id1; 1108 1109 if (jailed(td->td_ucred) || usermount == 0) { 1110 error = priv_check(td, PRIV_VFS_UNMOUNT); 1111 if (error) 1112 return (error); 1113 } 1114 1115 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1116 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 1117 if (error) { 1118 free(pathbuf, M_TEMP); 1119 return (error); 1120 } 1121 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1); 1122 mtx_lock(&Giant); 1123 if (uap->flags & MNT_BYFSID) { 1124 /* Decode the filesystem ID. */ 1125 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 1126 mtx_unlock(&Giant); 1127 free(pathbuf, M_TEMP); 1128 return (EINVAL); 1129 } 1130 1131 mtx_lock(&mountlist_mtx); 1132 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1133 if (mp->mnt_stat.f_fsid.val[0] == id0 && 1134 mp->mnt_stat.f_fsid.val[1] == id1) 1135 break; 1136 } 1137 mtx_unlock(&mountlist_mtx); 1138 } else { 1139 mtx_lock(&mountlist_mtx); 1140 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1141 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 1142 break; 1143 } 1144 mtx_unlock(&mountlist_mtx); 1145 } 1146 free(pathbuf, M_TEMP); 1147 if (mp == NULL) { 1148 /* 1149 * Previously we returned ENOENT for a nonexistent path and 1150 * EINVAL for a non-mountpoint. We cannot tell these apart 1151 * now, so in the !MNT_BYFSID case return the more likely 1152 * EINVAL for compatibility. 1153 */ 1154 mtx_unlock(&Giant); 1155 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 1156 } 1157 1158 /* 1159 * Don't allow unmounting the root filesystem. 1160 */ 1161 if (mp->mnt_flag & MNT_ROOTFS) { 1162 mtx_unlock(&Giant); 1163 return (EINVAL); 1164 } 1165 error = dounmount(mp, uap->flags, td); 1166 mtx_unlock(&Giant); 1167 return (error); 1168 } 1169 1170 /* 1171 * Do the actual filesystem unmount. 1172 */ 1173 int 1174 dounmount(mp, flags, td) 1175 struct mount *mp; 1176 int flags; 1177 struct thread *td; 1178 { 1179 struct vnode *coveredvp, *fsrootvp; 1180 int error; 1181 int async_flag; 1182 int mnt_gen_r; 1183 1184 mtx_assert(&Giant, MA_OWNED); 1185 1186 if ((coveredvp = mp->mnt_vnodecovered) != NULL) { 1187 mnt_gen_r = mp->mnt_gen; 1188 VI_LOCK(coveredvp); 1189 vholdl(coveredvp); 1190 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); 1191 vdrop(coveredvp); 1192 /* 1193 * Check for mp being unmounted while waiting for the 1194 * covered vnode lock. 1195 */ 1196 if (coveredvp->v_mountedhere != mp || 1197 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { 1198 VOP_UNLOCK(coveredvp, 0); 1199 return (EBUSY); 1200 } 1201 } 1202 /* 1203 * Only privileged root, or (if MNT_USER is set) the user that did the 1204 * original mount is permitted to unmount this filesystem. 1205 */ 1206 error = vfs_suser(mp, td); 1207 if (error) { 1208 if (coveredvp) 1209 VOP_UNLOCK(coveredvp, 0); 1210 return (error); 1211 } 1212 1213 MNT_ILOCK(mp); 1214 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1215 MNT_IUNLOCK(mp); 1216 if (coveredvp) 1217 VOP_UNLOCK(coveredvp, 0); 1218 return (EBUSY); 1219 } 1220 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; 1221 /* Allow filesystems to detect that a forced unmount is in progress. */ 1222 if (flags & MNT_FORCE) 1223 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1224 error = 0; 1225 if (mp->mnt_lockref) { 1226 if ((flags & MNT_FORCE) == 0) { 1227 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | 1228 MNTK_UNMOUNTF); 1229 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1230 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1231 wakeup(mp); 1232 } 1233 MNT_IUNLOCK(mp); 1234 if (coveredvp) 1235 VOP_UNLOCK(coveredvp, 0); 1236 return (EBUSY); 1237 } 1238 mp->mnt_kern_flag |= MNTK_DRAINING; 1239 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, 1240 "mount drain", 0); 1241 } 1242 MNT_IUNLOCK(mp); 1243 KASSERT(mp->mnt_lockref == 0, 1244 ("%s: invalid lock refcount in the drain path @ %s:%d", 1245 __func__, __FILE__, __LINE__)); 1246 KASSERT(error == 0, 1247 ("%s: invalid return value for msleep in the drain path @ %s:%d", 1248 __func__, __FILE__, __LINE__)); 1249 vn_start_write(NULL, &mp, V_WAIT); 1250 1251 if (mp->mnt_flag & MNT_EXPUBLIC) 1252 vfs_setpublicfs(NULL, NULL, NULL); 1253 1254 vfs_msync(mp, MNT_WAIT); 1255 MNT_ILOCK(mp); 1256 async_flag = mp->mnt_flag & MNT_ASYNC; 1257 mp->mnt_flag &= ~MNT_ASYNC; 1258 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1259 MNT_IUNLOCK(mp); 1260 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1261 if (mp->mnt_syncer != NULL) 1262 vrele(mp->mnt_syncer); 1263 /* 1264 * For forced unmounts, move process cdir/rdir refs on the fs root 1265 * vnode to the covered vnode. For non-forced unmounts we want 1266 * such references to cause an EBUSY error. 1267 */ 1268 if ((flags & MNT_FORCE) && 1269 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1270 if (mp->mnt_vnodecovered != NULL) 1271 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1272 if (fsrootvp == rootvnode) { 1273 vrele(rootvnode); 1274 rootvnode = NULL; 1275 } 1276 vput(fsrootvp); 1277 } 1278 if (((mp->mnt_flag & MNT_RDONLY) || 1279 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 1280 (flags & MNT_FORCE)) { 1281 error = VFS_UNMOUNT(mp, flags, td); 1282 } 1283 vn_finished_write(mp); 1284 /* 1285 * If we failed to flush the dirty blocks for this mount point, 1286 * undo all the cdir/rdir and rootvnode changes we made above. 1287 * Unless we failed to do so because the device is reporting that 1288 * it doesn't exist anymore. 1289 */ 1290 if (error && error != ENXIO) { 1291 if ((flags & MNT_FORCE) && 1292 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1293 if (mp->mnt_vnodecovered != NULL) 1294 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1295 if (rootvnode == NULL) { 1296 rootvnode = fsrootvp; 1297 vref(rootvnode); 1298 } 1299 vput(fsrootvp); 1300 } 1301 MNT_ILOCK(mp); 1302 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; 1303 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) { 1304 MNT_IUNLOCK(mp); 1305 (void) vfs_allocate_syncvnode(mp); 1306 MNT_ILOCK(mp); 1307 } 1308 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1309 mp->mnt_flag |= async_flag; 1310 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1311 mp->mnt_kern_flag |= MNTK_ASYNC; 1312 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1313 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1314 wakeup(mp); 1315 } 1316 MNT_IUNLOCK(mp); 1317 if (coveredvp) 1318 VOP_UNLOCK(coveredvp, 0); 1319 return (error); 1320 } 1321 mtx_lock(&mountlist_mtx); 1322 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1323 mtx_unlock(&mountlist_mtx); 1324 if (coveredvp != NULL) { 1325 coveredvp->v_mountedhere = NULL; 1326 vput(coveredvp); 1327 } 1328 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1329 vfs_mount_destroy(mp); 1330 return (0); 1331 } 1332 1333 /* 1334 * --------------------------------------------------------------------- 1335 * Mounting of root filesystem 1336 * 1337 */ 1338 1339 struct root_hold_token { 1340 const char *who; 1341 LIST_ENTRY(root_hold_token) list; 1342 }; 1343 1344 static LIST_HEAD(, root_hold_token) root_holds = 1345 LIST_HEAD_INITIALIZER(&root_holds); 1346 1347 static int root_mount_complete; 1348 1349 /* 1350 * Hold root mount. 1351 */ 1352 struct root_hold_token * 1353 root_mount_hold(const char *identifier) 1354 { 1355 struct root_hold_token *h; 1356 1357 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 1358 h->who = identifier; 1359 mtx_lock(&mountlist_mtx); 1360 LIST_INSERT_HEAD(&root_holds, h, list); 1361 mtx_unlock(&mountlist_mtx); 1362 return (h); 1363 } 1364 1365 /* 1366 * Release root mount. 1367 */ 1368 void 1369 root_mount_rel(struct root_hold_token *h) 1370 { 1371 1372 mtx_lock(&mountlist_mtx); 1373 LIST_REMOVE(h, list); 1374 wakeup(&root_holds); 1375 mtx_unlock(&mountlist_mtx); 1376 free(h, M_DEVBUF); 1377 } 1378 1379 /* 1380 * Wait for all subsystems to release root mount. 1381 */ 1382 static void 1383 root_mount_prepare(void) 1384 { 1385 struct root_hold_token *h; 1386 1387 for (;;) { 1388 DROP_GIANT(); 1389 g_waitidle(); 1390 PICKUP_GIANT(); 1391 mtx_lock(&mountlist_mtx); 1392 if (LIST_EMPTY(&root_holds)) { 1393 mtx_unlock(&mountlist_mtx); 1394 break; 1395 } 1396 printf("Root mount waiting for:"); 1397 LIST_FOREACH(h, &root_holds, list) 1398 printf(" %s", h->who); 1399 printf("\n"); 1400 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 1401 hz); 1402 } 1403 } 1404 1405 /* 1406 * Root was mounted, share the good news. 1407 */ 1408 static void 1409 root_mount_done(void) 1410 { 1411 1412 /* 1413 * Use a mutex to prevent the wakeup being missed and waiting for 1414 * an extra 1 second sleep. 1415 */ 1416 mtx_lock(&mountlist_mtx); 1417 root_mount_complete = 1; 1418 wakeup(&root_mount_complete); 1419 mtx_unlock(&mountlist_mtx); 1420 } 1421 1422 /* 1423 * Return true if root is already mounted. 1424 */ 1425 int 1426 root_mounted(void) 1427 { 1428 1429 /* No mutex is acquired here because int stores are atomic. */ 1430 return (root_mount_complete); 1431 } 1432 1433 /* 1434 * Wait until root is mounted. 1435 */ 1436 void 1437 root_mount_wait(void) 1438 { 1439 1440 /* 1441 * Panic on an obvious deadlock - the function can't be called from 1442 * a thread which is doing the whole SYSINIT stuff. 1443 */ 1444 KASSERT(curthread->td_proc->p_pid != 0, 1445 ("root_mount_wait: cannot be called from the swapper thread")); 1446 mtx_lock(&mountlist_mtx); 1447 while (!root_mount_complete) { 1448 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait", 1449 hz); 1450 } 1451 mtx_unlock(&mountlist_mtx); 1452 } 1453 1454 static void 1455 set_rootvnode(struct thread *td) 1456 { 1457 struct proc *p; 1458 1459 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td)) 1460 panic("Cannot find root vnode"); 1461 1462 p = td->td_proc; 1463 FILEDESC_XLOCK(p->p_fd); 1464 1465 if (p->p_fd->fd_cdir != NULL) 1466 vrele(p->p_fd->fd_cdir); 1467 p->p_fd->fd_cdir = rootvnode; 1468 VREF(rootvnode); 1469 1470 if (p->p_fd->fd_rdir != NULL) 1471 vrele(p->p_fd->fd_rdir); 1472 p->p_fd->fd_rdir = rootvnode; 1473 VREF(rootvnode); 1474 1475 FILEDESC_XUNLOCK(p->p_fd); 1476 1477 VOP_UNLOCK(rootvnode, 0); 1478 1479 EVENTHANDLER_INVOKE(mountroot); 1480 } 1481 1482 /* 1483 * Mount /devfs as our root filesystem, but do not put it on the mountlist 1484 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup. 1485 */ 1486 1487 static void 1488 devfs_first(void) 1489 { 1490 struct thread *td = curthread; 1491 struct vfsoptlist *opts; 1492 struct vfsconf *vfsp; 1493 struct mount *mp = NULL; 1494 int error; 1495 1496 vfsp = vfs_byname("devfs"); 1497 KASSERT(vfsp != NULL, ("Could not find devfs by name")); 1498 if (vfsp == NULL) 1499 return; 1500 1501 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); 1502 1503 error = VFS_MOUNT(mp, td); 1504 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 1505 if (error) 1506 return; 1507 1508 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 1509 TAILQ_INIT(opts); 1510 mp->mnt_opt = opts; 1511 1512 mtx_lock(&mountlist_mtx); 1513 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 1514 mtx_unlock(&mountlist_mtx); 1515 1516 set_rootvnode(td); 1517 1518 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 1519 if (error) 1520 printf("kern_symlink /dev -> / returns %d\n", error); 1521 } 1522 1523 /* 1524 * Surgically move our devfs to be mounted on /dev. 1525 */ 1526 1527 static void 1528 devfs_fixup(struct thread *td) 1529 { 1530 struct nameidata nd; 1531 int error; 1532 struct vnode *vp, *dvp; 1533 struct mount *mp; 1534 1535 /* Remove our devfs mount from the mountlist and purge the cache */ 1536 mtx_lock(&mountlist_mtx); 1537 mp = TAILQ_FIRST(&mountlist); 1538 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1539 mtx_unlock(&mountlist_mtx); 1540 cache_purgevfs(mp); 1541 1542 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); 1543 VI_LOCK(dvp); 1544 dvp->v_iflag &= ~VI_MOUNT; 1545 VI_UNLOCK(dvp); 1546 dvp->v_mountedhere = NULL; 1547 1548 /* Set up the real rootvnode, and purge the cache */ 1549 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 1550 set_rootvnode(td); 1551 cache_purgevfs(rootvnode->v_mount); 1552 1553 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 1554 error = namei(&nd); 1555 if (error) { 1556 printf("Lookup of /dev for devfs, error: %d\n", error); 1557 return; 1558 } 1559 NDFREE(&nd, NDF_ONLY_PNBUF); 1560 vp = nd.ni_vp; 1561 if (vp->v_type != VDIR) { 1562 vput(vp); 1563 } 1564 error = vinvalbuf(vp, V_SAVE, 0, 0); 1565 if (error) { 1566 vput(vp); 1567 } 1568 cache_purge(vp); 1569 mp->mnt_vnodecovered = vp; 1570 vp->v_mountedhere = mp; 1571 mtx_lock(&mountlist_mtx); 1572 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1573 mtx_unlock(&mountlist_mtx); 1574 VOP_UNLOCK(vp, 0); 1575 vput(dvp); 1576 vfs_unbusy(mp); 1577 1578 /* Unlink the no longer needed /dev/dev -> / symlink */ 1579 kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 1580 } 1581 1582 /* 1583 * Report errors during filesystem mounting. 1584 */ 1585 void 1586 vfs_mount_error(struct mount *mp, const char *fmt, ...) 1587 { 1588 struct vfsoptlist *moptlist = mp->mnt_optnew; 1589 va_list ap; 1590 int error, len; 1591 char *errmsg; 1592 1593 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1594 if (error || errmsg == NULL || len <= 0) 1595 return; 1596 1597 va_start(ap, fmt); 1598 vsnprintf(errmsg, (size_t)len, fmt, ap); 1599 va_end(ap); 1600 } 1601 1602 /* 1603 * Find and mount the root filesystem 1604 */ 1605 void 1606 vfs_mountroot(void) 1607 { 1608 char *cp; 1609 int error, i, asked = 0; 1610 1611 root_mount_prepare(); 1612 1613 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), 1614 NULL, NULL, mount_init, mount_fini, 1615 UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1616 devfs_first(); 1617 1618 /* 1619 * We are booted with instructions to prompt for the root filesystem. 1620 */ 1621 if (boothowto & RB_ASKNAME) { 1622 if (!vfs_mountroot_ask()) 1623 goto mounted; 1624 asked = 1; 1625 } 1626 1627 /* 1628 * The root filesystem information is compiled in, and we are 1629 * booted with instructions to use it. 1630 */ 1631 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 1632 if (!vfs_mountroot_try(ctrootdevname)) 1633 goto mounted; 1634 ctrootdevname = NULL; 1635 } 1636 1637 /* 1638 * We've been given the generic "use CDROM as root" flag. This is 1639 * necessary because one media may be used in many different 1640 * devices, so we need to search for them. 1641 */ 1642 if (boothowto & RB_CDROM) { 1643 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 1644 if (!vfs_mountroot_try(cdrom_rootdevnames[i])) 1645 goto mounted; 1646 } 1647 } 1648 1649 /* 1650 * Try to use the value read by the loader from /etc/fstab, or 1651 * supplied via some other means. This is the preferred 1652 * mechanism. 1653 */ 1654 cp = getenv("vfs.root.mountfrom"); 1655 if (cp != NULL) { 1656 error = vfs_mountroot_try(cp); 1657 freeenv(cp); 1658 if (!error) 1659 goto mounted; 1660 } 1661 1662 /* 1663 * Try values that may have been computed by code during boot 1664 */ 1665 if (!vfs_mountroot_try(rootdevnames[0])) 1666 goto mounted; 1667 if (!vfs_mountroot_try(rootdevnames[1])) 1668 goto mounted; 1669 1670 /* 1671 * If we (still) have a compiled-in default, try it. 1672 */ 1673 if (ctrootdevname != NULL) 1674 if (!vfs_mountroot_try(ctrootdevname)) 1675 goto mounted; 1676 /* 1677 * Everything so far has failed, prompt on the console if we haven't 1678 * already tried that. 1679 */ 1680 if (!asked) 1681 if (!vfs_mountroot_ask()) 1682 goto mounted; 1683 1684 panic("Root mount failed, startup aborted."); 1685 1686 mounted: 1687 root_mount_done(); 1688 } 1689 1690 /* 1691 * Mount (mountfrom) as the root filesystem. 1692 */ 1693 static int 1694 vfs_mountroot_try(const char *mountfrom) 1695 { 1696 struct mount *mp; 1697 char *vfsname, *path; 1698 time_t timebase; 1699 int error; 1700 char patt[32]; 1701 1702 vfsname = NULL; 1703 path = NULL; 1704 mp = NULL; 1705 error = EINVAL; 1706 1707 if (mountfrom == NULL) 1708 return (error); /* don't complain */ 1709 printf("Trying to mount root from %s\n", mountfrom); 1710 1711 /* parse vfs name and path */ 1712 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 1713 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 1714 vfsname[0] = path[0] = 0; 1715 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 1716 if (sscanf(mountfrom, patt, vfsname, path) < 1) 1717 goto out; 1718 1719 if (path[0] == '\0') 1720 strcpy(path, ROOTNAME); 1721 1722 error = kernel_vmount( 1723 MNT_RDONLY | MNT_ROOTFS, 1724 "fstype", vfsname, 1725 "fspath", "/", 1726 "from", path, 1727 NULL); 1728 if (error == 0) { 1729 /* 1730 * We mount devfs prior to mounting the / FS, so the first 1731 * entry will typically be devfs. 1732 */ 1733 mp = TAILQ_FIRST(&mountlist); 1734 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 1735 1736 /* 1737 * Iterate over all currently mounted file systems and use 1738 * the time stamp found to check and/or initialize the RTC. 1739 * Typically devfs has no time stamp and the only other FS 1740 * is the actual / FS. 1741 * Call inittodr() only once and pass it the largest of the 1742 * timestamps we encounter. 1743 */ 1744 timebase = 0; 1745 do { 1746 if (mp->mnt_time > timebase) 1747 timebase = mp->mnt_time; 1748 mp = TAILQ_NEXT(mp, mnt_list); 1749 } while (mp != NULL); 1750 inittodr(timebase); 1751 1752 devfs_fixup(curthread); 1753 } 1754 out: 1755 free(path, M_MOUNT); 1756 free(vfsname, M_MOUNT); 1757 return (error); 1758 } 1759 1760 /* 1761 * --------------------------------------------------------------------- 1762 * Interactive root filesystem selection code. 1763 */ 1764 1765 static int 1766 vfs_mountroot_ask(void) 1767 { 1768 char name[128]; 1769 1770 for(;;) { 1771 printf("\nManual root filesystem specification:\n"); 1772 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 1773 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__) 1774 printf(" eg. ufs:da0s1a\n"); 1775 #else 1776 printf(" eg. ufs:/dev/da0a\n"); 1777 #endif 1778 printf(" ? List valid disk boot devices\n"); 1779 printf(" <empty line> Abort manual input\n"); 1780 printf("\nmountroot> "); 1781 gets(name, sizeof(name), 1); 1782 if (name[0] == '\0') 1783 return (1); 1784 if (name[0] == '?') { 1785 printf("\nList of GEOM managed disk devices:\n "); 1786 g_dev_print(); 1787 continue; 1788 } 1789 if (!vfs_mountroot_try(name)) 1790 return (0); 1791 } 1792 } 1793 1794 /* 1795 * --------------------------------------------------------------------- 1796 * Functions for querying mount options/arguments from filesystems. 1797 */ 1798 1799 /* 1800 * Check that no unknown options are given 1801 */ 1802 int 1803 vfs_filteropt(struct vfsoptlist *opts, const char **legal) 1804 { 1805 struct vfsopt *opt; 1806 char errmsg[255]; 1807 const char **t, *p, *q; 1808 int ret = 0; 1809 1810 TAILQ_FOREACH(opt, opts, link) { 1811 p = opt->name; 1812 q = NULL; 1813 if (p[0] == 'n' && p[1] == 'o') 1814 q = p + 2; 1815 for(t = global_opts; *t != NULL; t++) { 1816 if (strcmp(*t, p) == 0) 1817 break; 1818 if (q != NULL) { 1819 if (strcmp(*t, q) == 0) 1820 break; 1821 } 1822 } 1823 if (*t != NULL) 1824 continue; 1825 for(t = legal; *t != NULL; t++) { 1826 if (strcmp(*t, p) == 0) 1827 break; 1828 if (q != NULL) { 1829 if (strcmp(*t, q) == 0) 1830 break; 1831 } 1832 } 1833 if (*t != NULL) 1834 continue; 1835 snprintf(errmsg, sizeof(errmsg), 1836 "mount option <%s> is unknown", p); 1837 printf("%s\n", errmsg); 1838 ret = EINVAL; 1839 } 1840 if (ret != 0) { 1841 TAILQ_FOREACH(opt, opts, link) { 1842 if (strcmp(opt->name, "errmsg") == 0) { 1843 strncpy((char *)opt->value, errmsg, opt->len); 1844 } 1845 } 1846 } 1847 return (ret); 1848 } 1849 1850 /* 1851 * Get a mount option by its name. 1852 * 1853 * Return 0 if the option was found, ENOENT otherwise. 1854 * If len is non-NULL it will be filled with the length 1855 * of the option. If buf is non-NULL, it will be filled 1856 * with the address of the option. 1857 */ 1858 int 1859 vfs_getopt(opts, name, buf, len) 1860 struct vfsoptlist *opts; 1861 const char *name; 1862 void **buf; 1863 int *len; 1864 { 1865 struct vfsopt *opt; 1866 1867 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1868 1869 TAILQ_FOREACH(opt, opts, link) { 1870 if (strcmp(name, opt->name) == 0) { 1871 if (len != NULL) 1872 *len = opt->len; 1873 if (buf != NULL) 1874 *buf = opt->value; 1875 return (0); 1876 } 1877 } 1878 return (ENOENT); 1879 } 1880 1881 static int 1882 vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1883 { 1884 struct vfsopt *opt; 1885 int i; 1886 1887 if (opts == NULL) 1888 return (-1); 1889 1890 i = 0; 1891 TAILQ_FOREACH(opt, opts, link) { 1892 if (strcmp(name, opt->name) == 0) 1893 return (i); 1894 ++i; 1895 } 1896 return (-1); 1897 } 1898 1899 char * 1900 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1901 { 1902 struct vfsopt *opt; 1903 1904 *error = 0; 1905 TAILQ_FOREACH(opt, opts, link) { 1906 if (strcmp(name, opt->name) != 0) 1907 continue; 1908 if (((char *)opt->value)[opt->len - 1] != '\0') { 1909 *error = EINVAL; 1910 return (NULL); 1911 } 1912 return (opt->value); 1913 } 1914 *error = ENOENT; 1915 return (NULL); 1916 } 1917 1918 int 1919 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) 1920 { 1921 struct vfsopt *opt; 1922 1923 TAILQ_FOREACH(opt, opts, link) { 1924 if (strcmp(name, opt->name) == 0) { 1925 if (w != NULL) 1926 *w |= val; 1927 return (1); 1928 } 1929 } 1930 if (w != NULL) 1931 *w &= ~val; 1932 return (0); 1933 } 1934 1935 int 1936 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1937 { 1938 va_list ap; 1939 struct vfsopt *opt; 1940 int ret; 1941 1942 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1943 1944 TAILQ_FOREACH(opt, opts, link) { 1945 if (strcmp(name, opt->name) != 0) 1946 continue; 1947 if (opt->len == 0 || opt->value == NULL) 1948 return (0); 1949 if (((char *)opt->value)[opt->len - 1] != '\0') 1950 return (0); 1951 va_start(ap, fmt); 1952 ret = vsscanf(opt->value, fmt, ap); 1953 va_end(ap); 1954 return (ret); 1955 } 1956 return (0); 1957 } 1958 1959 /* 1960 * Find and copy a mount option. 1961 * 1962 * The size of the buffer has to be specified 1963 * in len, if it is not the same length as the 1964 * mount option, EINVAL is returned. 1965 * Returns ENOENT if the option is not found. 1966 */ 1967 int 1968 vfs_copyopt(opts, name, dest, len) 1969 struct vfsoptlist *opts; 1970 const char *name; 1971 void *dest; 1972 int len; 1973 { 1974 struct vfsopt *opt; 1975 1976 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 1977 1978 TAILQ_FOREACH(opt, opts, link) { 1979 if (strcmp(name, opt->name) == 0) { 1980 if (len != opt->len) 1981 return (EINVAL); 1982 bcopy(opt->value, dest, opt->len); 1983 return (0); 1984 } 1985 } 1986 return (ENOENT); 1987 } 1988 1989 /* 1990 * This is a helper function for filesystems to traverse their 1991 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 1992 */ 1993 1994 struct vnode * 1995 __mnt_vnode_next(struct vnode **mvp, struct mount *mp) 1996 { 1997 struct vnode *vp; 1998 1999 mtx_assert(MNT_MTX(mp), MA_OWNED); 2000 2001 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2002 if ((*mvp)->v_yield++ == 500) { 2003 MNT_IUNLOCK(mp); 2004 (*mvp)->v_yield = 0; 2005 uio_yield(); 2006 MNT_ILOCK(mp); 2007 } 2008 vp = TAILQ_NEXT(*mvp, v_nmntvnodes); 2009 while (vp != NULL && vp->v_type == VMARKER) 2010 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2011 2012 /* Check if we are done */ 2013 if (vp == NULL) { 2014 __mnt_vnode_markerfree(mvp, mp); 2015 return (NULL); 2016 } 2017 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2018 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2019 return (vp); 2020 } 2021 2022 struct vnode * 2023 __mnt_vnode_first(struct vnode **mvp, struct mount *mp) 2024 { 2025 struct vnode *vp; 2026 2027 mtx_assert(MNT_MTX(mp), MA_OWNED); 2028 2029 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2030 while (vp != NULL && vp->v_type == VMARKER) 2031 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2032 2033 /* Check if we are done */ 2034 if (vp == NULL) { 2035 *mvp = NULL; 2036 return (NULL); 2037 } 2038 MNT_REF(mp); 2039 MNT_IUNLOCK(mp); 2040 *mvp = (struct vnode *) malloc(sizeof(struct vnode), 2041 M_VNODE_MARKER, 2042 M_WAITOK | M_ZERO); 2043 MNT_ILOCK(mp); 2044 (*mvp)->v_type = VMARKER; 2045 2046 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2047 while (vp != NULL && vp->v_type == VMARKER) 2048 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2049 2050 /* Check if we are done */ 2051 if (vp == NULL) { 2052 MNT_IUNLOCK(mp); 2053 free(*mvp, M_VNODE_MARKER); 2054 MNT_ILOCK(mp); 2055 *mvp = NULL; 2056 MNT_REL(mp); 2057 return (NULL); 2058 } 2059 (*mvp)->v_mount = mp; 2060 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2061 return (vp); 2062 } 2063 2064 2065 void 2066 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp) 2067 { 2068 2069 if (*mvp == NULL) 2070 return; 2071 2072 mtx_assert(MNT_MTX(mp), MA_OWNED); 2073 2074 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2075 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2076 MNT_IUNLOCK(mp); 2077 free(*mvp, M_VNODE_MARKER); 2078 MNT_ILOCK(mp); 2079 *mvp = NULL; 2080 MNT_REL(mp); 2081 } 2082 2083 2084 int 2085 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 2086 { 2087 int error; 2088 2089 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td); 2090 if (sbp != &mp->mnt_stat) 2091 *sbp = mp->mnt_stat; 2092 return (error); 2093 } 2094 2095 void 2096 vfs_mountedfrom(struct mount *mp, const char *from) 2097 { 2098 2099 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 2100 strlcpy(mp->mnt_stat.f_mntfromname, from, 2101 sizeof mp->mnt_stat.f_mntfromname); 2102 } 2103 2104 /* 2105 * --------------------------------------------------------------------- 2106 * This is the api for building mount args and mounting filesystems from 2107 * inside the kernel. 2108 * 2109 * The API works by accumulation of individual args. First error is 2110 * latched. 2111 * 2112 * XXX: should be documented in new manpage kernel_mount(9) 2113 */ 2114 2115 /* A memory allocation which must be freed when we are done */ 2116 struct mntaarg { 2117 SLIST_ENTRY(mntaarg) next; 2118 }; 2119 2120 /* The header for the mount arguments */ 2121 struct mntarg { 2122 struct iovec *v; 2123 int len; 2124 int error; 2125 SLIST_HEAD(, mntaarg) list; 2126 }; 2127 2128 /* 2129 * Add a boolean argument. 2130 * 2131 * flag is the boolean value. 2132 * name must start with "no". 2133 */ 2134 struct mntarg * 2135 mount_argb(struct mntarg *ma, int flag, const char *name) 2136 { 2137 2138 KASSERT(name[0] == 'n' && name[1] == 'o', 2139 ("mount_argb(...,%s): name must start with 'no'", name)); 2140 2141 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 2142 } 2143 2144 /* 2145 * Add an argument printf style 2146 */ 2147 struct mntarg * 2148 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 2149 { 2150 va_list ap; 2151 struct mntaarg *maa; 2152 struct sbuf *sb; 2153 int len; 2154 2155 if (ma == NULL) { 2156 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2157 SLIST_INIT(&ma->list); 2158 } 2159 if (ma->error) 2160 return (ma); 2161 2162 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2163 M_MOUNT, M_WAITOK); 2164 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2165 ma->v[ma->len].iov_len = strlen(name) + 1; 2166 ma->len++; 2167 2168 sb = sbuf_new_auto(); 2169 va_start(ap, fmt); 2170 sbuf_vprintf(sb, fmt, ap); 2171 va_end(ap); 2172 sbuf_finish(sb); 2173 len = sbuf_len(sb) + 1; 2174 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2175 SLIST_INSERT_HEAD(&ma->list, maa, next); 2176 bcopy(sbuf_data(sb), maa + 1, len); 2177 sbuf_delete(sb); 2178 2179 ma->v[ma->len].iov_base = maa + 1; 2180 ma->v[ma->len].iov_len = len; 2181 ma->len++; 2182 2183 return (ma); 2184 } 2185 2186 /* 2187 * Add an argument which is a userland string. 2188 */ 2189 struct mntarg * 2190 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 2191 { 2192 struct mntaarg *maa; 2193 char *tbuf; 2194 2195 if (val == NULL) 2196 return (ma); 2197 if (ma == NULL) { 2198 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2199 SLIST_INIT(&ma->list); 2200 } 2201 if (ma->error) 2202 return (ma); 2203 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2204 SLIST_INSERT_HEAD(&ma->list, maa, next); 2205 tbuf = (void *)(maa + 1); 2206 ma->error = copyinstr(val, tbuf, len, NULL); 2207 return (mount_arg(ma, name, tbuf, -1)); 2208 } 2209 2210 /* 2211 * Plain argument. 2212 * 2213 * If length is -1, treat value as a C string. 2214 */ 2215 struct mntarg * 2216 mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 2217 { 2218 2219 if (ma == NULL) { 2220 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2221 SLIST_INIT(&ma->list); 2222 } 2223 if (ma->error) 2224 return (ma); 2225 2226 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2227 M_MOUNT, M_WAITOK); 2228 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2229 ma->v[ma->len].iov_len = strlen(name) + 1; 2230 ma->len++; 2231 2232 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 2233 if (len < 0) 2234 ma->v[ma->len].iov_len = strlen(val) + 1; 2235 else 2236 ma->v[ma->len].iov_len = len; 2237 ma->len++; 2238 return (ma); 2239 } 2240 2241 /* 2242 * Free a mntarg structure 2243 */ 2244 static void 2245 free_mntarg(struct mntarg *ma) 2246 { 2247 struct mntaarg *maa; 2248 2249 while (!SLIST_EMPTY(&ma->list)) { 2250 maa = SLIST_FIRST(&ma->list); 2251 SLIST_REMOVE_HEAD(&ma->list, next); 2252 free(maa, M_MOUNT); 2253 } 2254 free(ma->v, M_MOUNT); 2255 free(ma, M_MOUNT); 2256 } 2257 2258 /* 2259 * Mount a filesystem 2260 */ 2261 int 2262 kernel_mount(struct mntarg *ma, int flags) 2263 { 2264 struct uio auio; 2265 int error; 2266 2267 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 2268 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 2269 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 2270 2271 auio.uio_iov = ma->v; 2272 auio.uio_iovcnt = ma->len; 2273 auio.uio_segflg = UIO_SYSSPACE; 2274 2275 error = ma->error; 2276 if (!error) 2277 error = vfs_donmount(curthread, flags, &auio); 2278 free_mntarg(ma); 2279 return (error); 2280 } 2281 2282 /* 2283 * A printflike function to mount a filesystem. 2284 */ 2285 int 2286 kernel_vmount(int flags, ...) 2287 { 2288 struct mntarg *ma = NULL; 2289 va_list ap; 2290 const char *cp; 2291 const void *vp; 2292 int error; 2293 2294 va_start(ap, flags); 2295 for (;;) { 2296 cp = va_arg(ap, const char *); 2297 if (cp == NULL) 2298 break; 2299 vp = va_arg(ap, const void *); 2300 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); 2301 } 2302 va_end(ap); 2303 2304 error = kernel_mount(ma, flags); 2305 return (error); 2306 } 2307