xref: /freebsd/sys/kern/vfs_mount.c (revision 6e0da4f753ed6b5d26395001a6194b4fdea70177)
1 /*-
2  * Copyright (c) 1999-2004 Poul-Henning Kamp
3  * Copyright (c) 1999 Michael Smith
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/cons.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/mac.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/namei.h>
50 #include <sys/proc.h>
51 #include <sys/filedesc.h>
52 #include <sys/reboot.h>
53 #include <sys/syscallsubr.h>
54 #include <sys/sysproto.h>
55 #include <sys/sx.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysent.h>
58 #include <sys/systm.h>
59 #include <sys/vnode.h>
60 
61 #include <geom/geom.h>
62 
63 #include <machine/stdarg.h>
64 
65 #include "opt_rootdevname.h"
66 #include "opt_ddb.h"
67 #include "opt_mac.h"
68 
69 #ifdef DDB
70 #include <ddb/ddb.h>
71 #endif
72 
73 #define	ROOTNAME		"root_device"
74 #define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
75 
76 static void	gets(char *cp);
77 static int	vfs_domount(struct thread *td, const char *fstype,
78 		    char *fspath, int fsflags, void *fsdata);
79 static int	vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
80 		    const char *fspath, struct thread *td, struct mount **mpp);
81 static int	vfs_mountroot_ask(void);
82 static int	vfs_mountroot_try(const char *mountfrom);
83 static int	vfs_donmount(struct thread *td, int fsflags,
84 		    struct uio *fsoptions);
85 
86 static int	usermount = 0;
87 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
88     "Unprivileged users may mount and unmount file systems");
89 
90 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
91 
92 /* List of mounted filesystems. */
93 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
94 
95 /* For any iteration/modification of mountlist */
96 struct mtx mountlist_mtx;
97 
98 TAILQ_HEAD(vfsoptlist, vfsopt);
99 struct vfsopt {
100 	TAILQ_ENTRY(vfsopt) link;
101 	char	*name;
102 	void	*value;
103 	int	len;
104 };
105 
106 /*
107  * The vnode of the system's root (/ in the filesystem, without chroot
108  * active.)
109  */
110 struct vnode	*rootvnode;
111 
112 /*
113  * The root filesystem is detailed in the kernel environment variable
114  * vfs.root.mountfrom, which is expected to be in the general format
115  *
116  * <vfsname>:[<path>]
117  * vfsname   := the name of a VFS known to the kernel and capable
118  *              of being mounted as root
119  * path      := disk device name or other data used by the filesystem
120  *              to locate its physical store
121  */
122 
123 /*
124  * Global opts, taken by all filesystems
125  */
126 static const char *global_opts[] = {
127 	"fstype",
128 	"fspath",
129 	"ro",
130 	"suid",
131 	"exec",
132 	NULL
133 };
134 
135 /*
136  * The root specifiers we will try if RB_CDROM is specified.
137  */
138 static char *cdrom_rootdevnames[] = {
139 	"cd9660:cd0",
140 	"cd9660:acd0",
141 	NULL
142 };
143 
144 /* legacy find-root code */
145 char		*rootdevnames[2] = {NULL, NULL};
146 #ifndef ROOTDEVNAME
147 #  define ROOTDEVNAME NULL
148 #endif
149 const char	*ctrootdevname = ROOTDEVNAME;
150 
151 /*
152  * ---------------------------------------------------------------------
153  * Functions for building and sanitizing the mount options
154  */
155 
156 /* Remove one mount option. */
157 static void
158 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
159 {
160 
161 	TAILQ_REMOVE(opts, opt, link);
162 	free(opt->name, M_MOUNT);
163 	if (opt->value != NULL)
164 		free(opt->value, M_MOUNT);
165 #ifdef INVARIANTS
166 	else if (opt->len != 0)
167 		panic("%s: mount option with NULL value but length != 0",
168 		    __func__);
169 #endif
170 	free(opt, M_MOUNT);
171 }
172 
173 /* Release all resources related to the mount options. */
174 static void
175 vfs_freeopts(struct vfsoptlist *opts)
176 {
177 	struct vfsopt *opt;
178 
179 	while (!TAILQ_EMPTY(opts)) {
180 		opt = TAILQ_FIRST(opts);
181 		vfs_freeopt(opts, opt);
182 	}
183 	free(opts, M_MOUNT);
184 }
185 
186 /*
187  * Check if options are equal (with or without the "no" prefix).
188  */
189 static int
190 vfs_equalopts(const char *opt1, const char *opt2)
191 {
192 
193 	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
194 	if (strcmp(opt1, opt2) == 0)
195 		return (1);
196 	/* "noopt" vs. "opt" */
197 	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
198 		return (1);
199 	/* "opt" vs. "noopt" */
200 	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
201 		return (1);
202 	return (0);
203 }
204 
205 /*
206  * If a mount option is specified several times,
207  * (with or without the "no" prefix) only keep
208  * the last occurence of it.
209  */
210 static void
211 vfs_sanitizeopts(struct vfsoptlist *opts)
212 {
213 	struct vfsopt *opt, *opt2, *tmp;
214 
215 	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
216 		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
217 		while (opt2 != NULL) {
218 			if (vfs_equalopts(opt->name, opt2->name)) {
219 				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
220 				vfs_freeopt(opts, opt2);
221 				opt2 = tmp;
222 			} else {
223 				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
224 			}
225 		}
226 	}
227 }
228 
229 /*
230  * Build a linked list of mount options from a struct uio.
231  */
232 static int
233 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
234 {
235 	struct vfsoptlist *opts;
236 	struct vfsopt *opt;
237 	size_t memused;
238 	unsigned int i, iovcnt;
239 	int error, namelen, optlen;
240 
241 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
242 	TAILQ_INIT(opts);
243 	memused = 0;
244 	iovcnt = auio->uio_iovcnt;
245 	for (i = 0; i < iovcnt; i += 2) {
246 		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
247 		namelen = auio->uio_iov[i].iov_len;
248 		optlen = auio->uio_iov[i + 1].iov_len;
249 		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
250 		opt->value = NULL;
251 		opt->len = 0;
252 
253 		/*
254 		 * Do this early, so jumps to "bad" will free the current
255 		 * option.
256 		 */
257 		TAILQ_INSERT_TAIL(opts, opt, link);
258 		memused += sizeof(struct vfsopt) + optlen + namelen;
259 
260 		/*
261 		 * Avoid consuming too much memory, and attempts to overflow
262 		 * memused.
263 		 */
264 		if (memused > VFS_MOUNTARG_SIZE_MAX ||
265 		    optlen > VFS_MOUNTARG_SIZE_MAX ||
266 		    namelen > VFS_MOUNTARG_SIZE_MAX) {
267 			error = EINVAL;
268 			goto bad;
269 		}
270 
271 		if (auio->uio_segflg == UIO_SYSSPACE) {
272 			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
273 		} else {
274 			error = copyin(auio->uio_iov[i].iov_base, opt->name,
275 			    namelen);
276 			if (error)
277 				goto bad;
278 		}
279 		/* Ensure names are null-terminated strings. */
280 		if (opt->name[namelen - 1] != '\0') {
281 			error = EINVAL;
282 			goto bad;
283 		}
284 		if (optlen != 0) {
285 			opt->len = optlen;
286 			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
287 			if (auio->uio_segflg == UIO_SYSSPACE) {
288 				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
289 				    optlen);
290 			} else {
291 				error = copyin(auio->uio_iov[i + 1].iov_base,
292 				    opt->value, optlen);
293 				if (error)
294 					goto bad;
295 			}
296 		}
297 	}
298 	vfs_sanitizeopts(opts);
299 	*options = opts;
300 	return (0);
301 bad:
302 	vfs_freeopts(opts);
303 	return (error);
304 }
305 
306 /*
307  * Merge the old mount options with the new ones passed
308  * in the MNT_UPDATE case.
309  */
310 static void
311 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
312 {
313 	struct vfsopt *opt, *opt2, *new;
314 
315 	TAILQ_FOREACH(opt, opts, link) {
316 		/*
317 		 * Check that this option hasn't been redefined
318 		 * nor cancelled with a "no" mount option.
319 		 */
320 		opt2 = TAILQ_FIRST(toopts);
321 		while (opt2 != NULL) {
322 			if (strcmp(opt2->name, opt->name) == 0)
323 				goto next;
324 			if (strncmp(opt2->name, "no", 2) == 0 &&
325 			    strcmp(opt2->name + 2, opt->name) == 0) {
326 				vfs_freeopt(toopts, opt2);
327 				goto next;
328 			}
329 			opt2 = TAILQ_NEXT(opt2, link);
330 		}
331 		/* We want this option, duplicate it. */
332 		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
333 		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
334 		strcpy(new->name, opt->name);
335 		if (opt->len != 0) {
336 			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
337 			bcopy(opt->value, new->value, opt->len);
338 		} else {
339 			new->value = NULL;
340 		}
341 		new->len = opt->len;
342 		TAILQ_INSERT_TAIL(toopts, new, link);
343 next:
344 		continue;
345 	}
346 }
347 
348 /*
349  * ---------------------------------------------------------------------
350  * Mount a filesystem
351  */
352 int
353 nmount(td, uap)
354 	struct thread *td;
355 	struct nmount_args /* {
356 		struct iovec *iovp;
357 		unsigned int iovcnt;
358 		int flags;
359 	} */ *uap;
360 {
361 	struct uio *auio;
362 	struct iovec *iov;
363 	unsigned int i;
364 	int error;
365 	u_int iovcnt;
366 
367 	/* Kick out MNT_ROOTFS early as it is legal internally */
368 	if (uap->flags & MNT_ROOTFS)
369 		return (EINVAL);
370 
371 	iovcnt = uap->iovcnt;
372 	/*
373 	 * Check that we have an even number of iovec's
374 	 * and that we have at least two options.
375 	 */
376 	if ((iovcnt & 1) || (iovcnt < 4))
377 		return (EINVAL);
378 
379 	error = copyinuio(uap->iovp, iovcnt, &auio);
380 	if (error)
381 		return (error);
382 	iov = auio->uio_iov;
383 	for (i = 0; i < iovcnt; i++) {
384 		if (iov->iov_len > MMAXOPTIONLEN) {
385 			free(auio, M_IOV);
386 			return (EINVAL);
387 		}
388 		iov++;
389 	}
390 	error = vfs_donmount(td, uap->flags, auio);
391 	free(auio, M_IOV);
392 	return (error);
393 }
394 
395 /*
396  * ---------------------------------------------------------------------
397  * Various utility functions
398  */
399 
400 /*
401  * Allocate and initialize the mount point struct.
402  */
403 static int
404 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
405     const char *fspath, struct thread *td, struct mount **mpp)
406 {
407 	struct mount *mp;
408 
409 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
410 	TAILQ_INIT(&mp->mnt_nvnodelist);
411 	mp->mnt_nvnodelistsize = 0;
412 	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
413 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
414 	vfs_busy(mp, LK_NOWAIT, 0, td);
415 	mp->mnt_op = vfsp->vfc_vfsops;
416 	mp->mnt_vfc = vfsp;
417 	vfsp->vfc_refcount++;
418 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
419 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
420 	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
421 	mp->mnt_vnodecovered = vp;
422 	mp->mnt_cred = crdup(td->td_ucred);
423 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
424 	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
425 	mp->mnt_iosize_max = DFLTPHYS;
426 #ifdef MAC
427 	mac_init_mount(mp);
428 	mac_create_mount(td->td_ucred, mp);
429 #endif
430 	*mpp = mp;
431 	return (0);
432 }
433 
434 /*
435  * Destroy the mount struct previously allocated by vfs_mount_alloc().
436  */
437 void
438 vfs_mount_destroy(struct mount *mp, struct thread *td)
439 {
440 
441 	mp->mnt_vfc->vfc_refcount--;
442 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
443 		panic("unmount: dangling vnode");
444 	vfs_unbusy(mp,td);
445 	lockdestroy(&mp->mnt_lock);
446 	mtx_destroy(&mp->mnt_mtx);
447 	if (mp->mnt_kern_flag & MNTK_MWAIT)
448 		wakeup(mp);
449 #ifdef MAC
450 	mac_destroy_mount(mp);
451 #endif
452 	if (mp->mnt_opt != NULL)
453 		vfs_freeopts(mp->mnt_opt);
454 	crfree(mp->mnt_cred);
455 	free(mp, M_MOUNT);
456 }
457 
458 static int
459 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
460 {
461 	struct vfsoptlist *optlist;
462 	char *fstype, *fspath;
463 	int error, fstypelen, fspathlen;
464 
465 	error = vfs_buildopts(fsoptions, &optlist);
466 	if (error)
467 		return (error);
468 
469 	/*
470 	 * We need these two options before the others,
471 	 * and they are mandatory for any filesystem.
472 	 * Ensure they are NUL terminated as well.
473 	 */
474 	fstypelen = 0;
475 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
476 	if (error || fstype[fstypelen - 1] != '\0') {
477 		error = EINVAL;
478 		goto bail;
479 	}
480 	fspathlen = 0;
481 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
482 	if (error || fspath[fspathlen - 1] != '\0') {
483 		error = EINVAL;
484 		goto bail;
485 	}
486 
487 	/*
488 	 * Be ultra-paranoid about making sure the type and fspath
489 	 * variables will fit in our mp buffers, including the
490 	 * terminating NUL.
491 	 */
492 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
493 		error = ENAMETOOLONG;
494 		goto bail;
495 	}
496 
497 	mtx_lock(&Giant);
498 	error = vfs_domount(td, fstype, fspath, fsflags, optlist);
499 	mtx_unlock(&Giant);
500 bail:
501 	if (error)
502 		vfs_freeopts(optlist);
503 	return (error);
504 }
505 
506 /*
507  * ---------------------------------------------------------------------
508  * Old mount API.
509  */
510 #ifndef _SYS_SYSPROTO_H_
511 struct mount_args {
512 	char	*type;
513 	char	*path;
514 	int	flags;
515 	caddr_t	data;
516 };
517 #endif
518 /* ARGSUSED */
519 int
520 mount(td, uap)
521 	struct thread *td;
522 	struct mount_args /* {
523 		char *type;
524 		char *path;
525 		int flags;
526 		caddr_t data;
527 	} */ *uap;
528 {
529 	char *fstype;
530 	struct vfsconf *vfsp = NULL;
531 	struct mntarg *ma = NULL;
532 	int error;
533 
534 	/* Kick out MNT_ROOTFS early as it is legal internally */
535 	uap->flags &= ~MNT_ROOTFS;
536 
537 	if (uap->data == NULL)
538 		return (EINVAL);
539 
540 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
541 	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
542 	if (!error) {
543 		mtx_lock(&Giant);	/* XXX ? */
544 		vfsp = vfs_byname_kld(fstype, td, &error);
545 		mtx_unlock(&Giant);
546 	}
547 	free(fstype, M_TEMP);
548 	if (error)
549 		return (error);
550 	if (vfsp == NULL)
551 		return (ENOENT);
552 	if (vfsp->vfc_vfsops->vfs_cmount == NULL)
553 		return (EOPNOTSUPP);
554 
555 	ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
556 	ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
557 	ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
558 	ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
559 	ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
560 
561 	error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
562 	return (error);
563 }
564 
565 
566 /*
567  * vfs_domount(): actually attempt a filesystem mount.
568  */
569 static int
570 vfs_domount(
571 	struct thread *td,	/* Flags common to all filesystems. */
572 	const char *fstype,	/* Filesystem type. */
573 	char *fspath,		/* Mount path. */
574 	int fsflags,		/* Flags common to all filesystems. */
575 	void *fsdata		/* Options local to the filesystem. */
576 	)
577 {
578 	struct vnode *vp;
579 	struct mount *mp;
580 	struct vfsconf *vfsp;
581 	int error, flag = 0, kern_flag = 0;
582 	struct vattr va;
583 	struct nameidata nd;
584 
585 	mtx_assert(&Giant, MA_OWNED);
586 
587 	/*
588 	 * Be ultra-paranoid about making sure the type and fspath
589 	 * variables will fit in our mp buffers, including the
590 	 * terminating NUL.
591 	 */
592 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
593 		return (ENAMETOOLONG);
594 
595 	if (jailed(td->td_ucred))
596 		return (EPERM);
597 	if (usermount == 0) {
598 		if ((error = suser(td)) != 0)
599 			return (error);
600 	}
601 
602 	/*
603 	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
604 	 */
605 	if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
606 		if ((error = suser(td)) != 0)
607 			return (error);
608 	}
609 	/*
610 	 * Silently enforce MNT_NOSUID and MNT_USER for
611 	 * unprivileged users.
612 	 */
613 	if (suser(td) != 0)
614 		fsflags |= MNT_NOSUID | MNT_USER;
615 	/*
616 	 * Get vnode to be covered
617 	 */
618 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
619 	if ((error = namei(&nd)) != 0)
620 		return (error);
621 	NDFREE(&nd, NDF_ONLY_PNBUF);
622 	vp = nd.ni_vp;
623 	if (fsflags & MNT_UPDATE) {
624 		if ((vp->v_vflag & VV_ROOT) == 0) {
625 			vput(vp);
626 			return (EINVAL);
627 		}
628 		mp = vp->v_mount;
629 		flag = mp->mnt_flag;
630 		kern_flag = mp->mnt_kern_flag;
631 		/*
632 		 * We only allow the filesystem to be reloaded if it
633 		 * is currently mounted read-only.
634 		 */
635 		if ((fsflags & MNT_RELOAD) &&
636 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
637 			vput(vp);
638 			return (EOPNOTSUPP);	/* Needs translation */
639 		}
640 		/*
641 		 * Only privileged root, or (if MNT_USER is set) the user that
642 		 * did the original mount is permitted to update it.
643 		 */
644 		error = vfs_suser(mp, td);
645 		if (error) {
646 			vput(vp);
647 			return (error);
648 		}
649 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
650 			vput(vp);
651 			return (EBUSY);
652 		}
653 		VI_LOCK(vp);
654 		if ((vp->v_iflag & VI_MOUNT) != 0 ||
655 		    vp->v_mountedhere != NULL) {
656 			VI_UNLOCK(vp);
657 			vfs_unbusy(mp, td);
658 			vput(vp);
659 			return (EBUSY);
660 		}
661 		vp->v_iflag |= VI_MOUNT;
662 		VI_UNLOCK(vp);
663 		mp->mnt_flag |= fsflags &
664 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
665 		VOP_UNLOCK(vp, 0, td);
666 		mp->mnt_optnew = fsdata;
667 		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
668 	} else {
669 		/*
670 		 * If the user is not root, ensure that they own the directory
671 		 * onto which we are attempting to mount.
672 		 */
673 		error = VOP_GETATTR(vp, &va, td->td_ucred, td);
674 		if (error) {
675 			vput(vp);
676 			return (error);
677 		}
678 		if (va.va_uid != td->td_ucred->cr_uid) {
679 			if ((error = suser(td)) != 0) {
680 				vput(vp);
681 				return (error);
682 			}
683 		}
684 		error = vinvalbuf(vp, V_SAVE, td, 0, 0);
685 		if (error != 0) {
686 			vput(vp);
687 			return (error);
688 		}
689 		if (vp->v_type != VDIR) {
690 			vput(vp);
691 			return (ENOTDIR);
692 		}
693 		vfsp = vfs_byname_kld(fstype, td, &error);
694 		if (vfsp == NULL) {
695 			vput(vp);
696 			return (error);
697 		}
698 		VI_LOCK(vp);
699 		if ((vp->v_iflag & VI_MOUNT) != 0 ||
700 		    vp->v_mountedhere != NULL) {
701 			VI_UNLOCK(vp);
702 			vput(vp);
703 			return (EBUSY);
704 		}
705 		vp->v_iflag |= VI_MOUNT;
706 		VI_UNLOCK(vp);
707 
708 		/*
709 		 * Allocate and initialize the filesystem.
710 		 */
711 		error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
712 		if (error) {
713 			vput(vp);
714 			return (error);
715 		}
716 		VOP_UNLOCK(vp, 0, td);
717 
718 		/* XXXMAC: pass to vfs_mount_alloc? */
719 		mp->mnt_optnew = fsdata;
720 	}
721 
722 	/*
723 	 * Set the mount level flags.
724 	 */
725 	if (fsflags & MNT_RDONLY)
726 		mp->mnt_flag |= MNT_RDONLY;
727 	mp->mnt_flag &=~ MNT_UPDATEMASK;
728 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
729 	/*
730 	 * Mount the filesystem.
731 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
732 	 * get.  No freeing of cn_pnbuf.
733 	 */
734         error = VFS_MOUNT(mp, td);
735 	if (!error) {
736 		if (mp->mnt_opt != NULL)
737 			vfs_freeopts(mp->mnt_opt);
738 		mp->mnt_opt = mp->mnt_optnew;
739 		VFS_STATFS(mp, &mp->mnt_stat, td);
740 	}
741 	/*
742 	 * Prevent external consumers of mount options from reading
743 	 * mnt_optnew.
744 	*/
745 	mp->mnt_optnew = NULL;
746 	if (mp->mnt_flag & MNT_UPDATE) {
747 		mp->mnt_flag &=
748 		    ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
749 		if (error) {
750 			mp->mnt_flag = flag;
751 			mp->mnt_kern_flag = kern_flag;
752 		}
753 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
754 			if (mp->mnt_syncer == NULL)
755 				error = vfs_allocate_syncvnode(mp);
756 		} else {
757 			if (mp->mnt_syncer != NULL)
758 				vrele(mp->mnt_syncer);
759 			mp->mnt_syncer = NULL;
760 		}
761 		vfs_unbusy(mp, td);
762 		VI_LOCK(vp);
763 		vp->v_iflag &= ~VI_MOUNT;
764 		VI_UNLOCK(vp);
765 		vrele(vp);
766 		return (error);
767 	}
768 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
769 	/*
770 	 * Put the new filesystem on the mount list after root.
771 	 */
772 	cache_purge(vp);
773 	if (!error) {
774 		struct vnode *newdp;
775 
776 		VI_LOCK(vp);
777 		vp->v_iflag &= ~VI_MOUNT;
778 		VI_UNLOCK(vp);
779 		vp->v_mountedhere = mp;
780 		mtx_lock(&mountlist_mtx);
781 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
782 		mtx_unlock(&mountlist_mtx);
783 		vfs_event_signal(NULL, VQ_MOUNT, 0);
784 		if (VFS_ROOT(mp, &newdp, td))
785 			panic("mount: lost mount");
786 		mountcheckdirs(vp, newdp);
787 		vput(newdp);
788 		VOP_UNLOCK(vp, 0, td);
789 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
790 			error = vfs_allocate_syncvnode(mp);
791 		vfs_unbusy(mp, td);
792 		if (error || (error = VFS_START(mp, 0, td)) != 0)
793 			vrele(vp);
794 	} else {
795 		VI_LOCK(vp);
796 		vp->v_iflag &= ~VI_MOUNT;
797 		VI_UNLOCK(vp);
798 		vfs_mount_destroy(mp, td);
799 		vput(vp);
800 	}
801 	return (error);
802 }
803 
804 /*
805  * ---------------------------------------------------------------------
806  * Unmount a filesystem.
807  *
808  * Note: unmount takes a path to the vnode mounted on as argument,
809  * not special file (as before).
810  */
811 #ifndef _SYS_SYSPROTO_H_
812 struct unmount_args {
813 	char	*path;
814 	int	flags;
815 };
816 #endif
817 /* ARGSUSED */
818 int
819 unmount(td, uap)
820 	struct thread *td;
821 	register struct unmount_args /* {
822 		char *path;
823 		int flags;
824 	} */ *uap;
825 {
826 	struct mount *mp;
827 	char *pathbuf;
828 	int error, id0, id1;
829 
830 	if (jailed(td->td_ucred))
831 		return (EPERM);
832 	if (usermount == 0) {
833 		if ((error = suser(td)) != 0)
834 			return (error);
835 	}
836 
837 	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
838 	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
839 	if (error) {
840 		free(pathbuf, M_TEMP);
841 		return (error);
842 	}
843 	if (uap->flags & MNT_BYFSID) {
844 		/* Decode the filesystem ID. */
845 		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
846 			free(pathbuf, M_TEMP);
847 			return (EINVAL);
848 		}
849 
850 		mtx_lock(&mountlist_mtx);
851 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
852 			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
853 			    mp->mnt_stat.f_fsid.val[1] == id1)
854 				break;
855 		}
856 		mtx_unlock(&mountlist_mtx);
857 	} else {
858 		mtx_lock(&mountlist_mtx);
859 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
860 			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
861 				break;
862 		}
863 		mtx_unlock(&mountlist_mtx);
864 	}
865 	free(pathbuf, M_TEMP);
866 	if (mp == NULL) {
867 		/*
868 		 * Previously we returned ENOENT for a nonexistent path and
869 		 * EINVAL for a non-mountpoint.  We cannot tell these apart
870 		 * now, so in the !MNT_BYFSID case return the more likely
871 		 * EINVAL for compatibility.
872 		 */
873 		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
874 	}
875 
876 	/*
877 	 * Only privileged root, or (if MNT_USER is set) the user that did the
878 	 * original mount is permitted to unmount this filesystem.
879 	 */
880 	error = vfs_suser(mp, td);
881 	if (error)
882 		return (error);
883 
884 	/*
885 	 * Don't allow unmounting the root filesystem.
886 	 */
887 	if (mp->mnt_flag & MNT_ROOTFS)
888 		return (EINVAL);
889 	mtx_lock(&Giant);
890 	error = dounmount(mp, uap->flags, td);
891 	mtx_unlock(&Giant);
892 	return (error);
893 }
894 
895 /*
896  * Do the actual filesystem unmount.
897  */
898 int
899 dounmount(mp, flags, td)
900 	struct mount *mp;
901 	int flags;
902 	struct thread *td;
903 {
904 	struct vnode *coveredvp, *fsrootvp;
905 	int error;
906 	int async_flag;
907 
908 	mtx_assert(&Giant, MA_OWNED);
909 
910 	mtx_lock(&mountlist_mtx);
911 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
912 		mtx_unlock(&mountlist_mtx);
913 		return (EBUSY);
914 	}
915 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
916 	/* Allow filesystems to detect that a forced unmount is in progress. */
917 	if (flags & MNT_FORCE)
918 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
919 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
920 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
921 	if (error) {
922 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
923 		if (mp->mnt_kern_flag & MNTK_MWAIT)
924 			wakeup(mp);
925 		return (error);
926 	}
927 	vn_start_write(NULL, &mp, V_WAIT);
928 
929 	if (mp->mnt_flag & MNT_EXPUBLIC)
930 		vfs_setpublicfs(NULL, NULL, NULL);
931 
932 	vfs_msync(mp, MNT_WAIT);
933 	async_flag = mp->mnt_flag & MNT_ASYNC;
934 	mp->mnt_flag &= ~MNT_ASYNC;
935 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
936 	if (mp->mnt_syncer != NULL)
937 		vrele(mp->mnt_syncer);
938 	/*
939 	 * For forced unmounts, move process cdir/rdir refs on the fs root
940 	 * vnode to the covered vnode.  For non-forced unmounts we want
941 	 * such references to cause an EBUSY error.
942 	 */
943 	if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
944 		if (mp->mnt_vnodecovered != NULL)
945 			mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
946 		if (fsrootvp == rootvnode) {
947 			vrele(rootvnode);
948 			rootvnode = NULL;
949 		}
950 		vput(fsrootvp);
951 	}
952 	if (((mp->mnt_flag & MNT_RDONLY) ||
953 	     (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
954 	    (flags & MNT_FORCE)) {
955 		error = VFS_UNMOUNT(mp, flags, td);
956 	}
957 	vn_finished_write(mp);
958 	if (error) {
959 		/* Undo cdir/rdir and rootvnode changes made above. */
960 		if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
961 			if (mp->mnt_vnodecovered != NULL)
962 				mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
963 			if (rootvnode == NULL) {
964 				rootvnode = fsrootvp;
965 				vref(rootvnode);
966 			}
967 			vput(fsrootvp);
968 		}
969 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
970 			(void) vfs_allocate_syncvnode(mp);
971 		mtx_lock(&mountlist_mtx);
972 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
973 		mp->mnt_flag |= async_flag;
974 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
975 		    &mountlist_mtx, td);
976 		if (mp->mnt_kern_flag & MNTK_MWAIT)
977 			wakeup(mp);
978 		return (error);
979 	}
980 	mtx_lock(&mountlist_mtx);
981 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
982 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
983 		coveredvp->v_mountedhere = NULL;
984 	mtx_unlock(&mountlist_mtx);
985 	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
986 	vfs_mount_destroy(mp, td);
987 	if (coveredvp != NULL)
988 		vrele(coveredvp);
989 	return (0);
990 }
991 
992 /*
993  * ---------------------------------------------------------------------
994  * Mounting of root filesystem
995  *
996  */
997 
998 static void
999 set_rootvnode(struct thread *td)
1000 {
1001 	struct proc *p;
1002 
1003 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
1004 		panic("Cannot find root vnode");
1005 
1006 	p = td->td_proc;
1007 	FILEDESC_LOCK(p->p_fd);
1008 
1009 	if (p->p_fd->fd_cdir != NULL)
1010 		vrele(p->p_fd->fd_cdir);
1011 	p->p_fd->fd_cdir = rootvnode;
1012 	VREF(rootvnode);
1013 
1014 	if (p->p_fd->fd_rdir != NULL)
1015 		vrele(p->p_fd->fd_rdir);
1016 	p->p_fd->fd_rdir = rootvnode;
1017 	VREF(rootvnode);
1018 
1019 	FILEDESC_UNLOCK(p->p_fd);
1020 
1021 	VOP_UNLOCK(rootvnode, 0, td);
1022 }
1023 
1024 /*
1025  * Mount /devfs as our root filesystem, but do not put it on the mountlist
1026  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
1027  */
1028 
1029 static struct mount *
1030 devfs_first(void)
1031 {
1032 	struct thread *td = curthread;
1033 	struct vfsconf *vfsp;
1034 	struct mount *mp = NULL;
1035 	int error;
1036 
1037 	vfsp = vfs_byname("devfs");
1038 	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1039 	if (vfsp == NULL)
1040 		return(NULL);
1041 
1042 	error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
1043 	KASSERT(error == 0, ("vfs_mount_alloc failed %d", error));
1044 	if (error)
1045 		return (NULL);
1046 
1047 	error = VFS_MOUNT(mp, curthread);
1048 	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1049 	if (error)
1050 		return (NULL);
1051 
1052 	VFS_START(mp, 0, td);
1053 
1054 	mtx_lock(&mountlist_mtx);
1055 	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1056 	mtx_unlock(&mountlist_mtx);
1057 
1058 	set_rootvnode(td);
1059 
1060 	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1061 	if (error)
1062 		printf("kern_symlink /dev -> / returns %d\n", error);
1063 
1064 	return (mp);
1065 }
1066 
1067 /*
1068  * Surgically move our devfs to be mounted on /dev.
1069  */
1070 
1071 static void
1072 devfs_fixup(struct thread *td)
1073 {
1074 	struct nameidata nd;
1075 	int error;
1076 	struct vnode *vp, *dvp;
1077 	struct mount *mp;
1078 
1079 	/* Remove our devfs mount from the mountlist and purge the cache */
1080 	mtx_lock(&mountlist_mtx);
1081 	mp = TAILQ_FIRST(&mountlist);
1082 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1083 	mtx_unlock(&mountlist_mtx);
1084 	cache_purgevfs(mp);
1085 
1086 	VFS_ROOT(mp, &dvp, td);
1087 	VI_LOCK(dvp);
1088 	dvp->v_iflag &= ~VI_MOUNT;
1089 	dvp->v_mountedhere = NULL;
1090 	VI_UNLOCK(dvp);
1091 
1092 	/* Set up the real rootvnode, and purge the cache */
1093 	TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1094 	set_rootvnode(td);
1095 	cache_purgevfs(rootvnode->v_mount);
1096 
1097 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1098 	error = namei(&nd);
1099 	if (error) {
1100 		printf("Lookup /dev -> %d\n", error);
1101 		return;
1102 	}
1103 	NDFREE(&nd, NDF_ONLY_PNBUF);
1104 	vp = nd.ni_vp;
1105 	if (vp->v_type != VDIR) {
1106 		vput(vp);
1107 	}
1108 	error = vinvalbuf(vp, V_SAVE, td, 0, 0);
1109 	if (error) {
1110 		vput(vp);
1111 	}
1112 	cache_purge(vp);
1113 	mp->mnt_vnodecovered = vp;
1114 	vp->v_mountedhere = mp;
1115 	mtx_lock(&mountlist_mtx);
1116 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1117 	mtx_unlock(&mountlist_mtx);
1118 	VOP_UNLOCK(vp, 0, td);
1119 	vfs_unbusy(mp, td);
1120 	vput(dvp);
1121 
1122 	/* Unlink the no longer needed /dev/dev -> / symlink */
1123 	kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1124 }
1125 
1126 /*
1127  * Find and mount the root filesystem
1128  */
1129 void
1130 vfs_mountroot(void)
1131 {
1132 	char *cp;
1133 	int error, i, asked = 0;
1134 	struct mount *mp;
1135 
1136 	/*
1137 	 * Wait for GEOM to settle down
1138 	 */
1139 	DROP_GIANT();
1140 	g_waitidle();
1141 	PICKUP_GIANT();
1142 
1143 	mp = devfs_first();
1144 
1145 	/*
1146 	 * We are booted with instructions to prompt for the root filesystem.
1147 	 */
1148 	if (boothowto & RB_ASKNAME) {
1149 		if (!vfs_mountroot_ask())
1150 			return;
1151 		asked = 1;
1152 	}
1153 
1154 	/*
1155 	 * The root filesystem information is compiled in, and we are
1156 	 * booted with instructions to use it.
1157 	 */
1158 	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1159 		if (!vfs_mountroot_try(ctrootdevname))
1160 			return;
1161 		ctrootdevname = NULL;
1162 	}
1163 
1164 	/*
1165 	 * We've been given the generic "use CDROM as root" flag.  This is
1166 	 * necessary because one media may be used in many different
1167 	 * devices, so we need to search for them.
1168 	 */
1169 	if (boothowto & RB_CDROM) {
1170 		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1171 			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1172 				return;
1173 		}
1174 	}
1175 
1176 	/*
1177 	 * Try to use the value read by the loader from /etc/fstab, or
1178 	 * supplied via some other means.  This is the preferred
1179 	 * mechanism.
1180 	 */
1181 	cp = getenv("vfs.root.mountfrom");
1182 	if (cp != NULL) {
1183 		error = vfs_mountroot_try(cp);
1184 		freeenv(cp);
1185 		if (!error)
1186 			return;
1187 	}
1188 
1189 	/*
1190 	 * Try values that may have been computed by code during boot
1191 	 */
1192 	if (!vfs_mountroot_try(rootdevnames[0]))
1193 		return;
1194 	if (!vfs_mountroot_try(rootdevnames[1]))
1195 		return;
1196 
1197 	/*
1198 	 * If we (still) have a compiled-in default, try it.
1199 	 */
1200 	if (ctrootdevname != NULL)
1201 		if (!vfs_mountroot_try(ctrootdevname))
1202 			return;
1203 	/*
1204 	 * Everything so far has failed, prompt on the console if we haven't
1205 	 * already tried that.
1206 	 */
1207 	if (!asked)
1208 		if (!vfs_mountroot_ask())
1209 			return;
1210 
1211 	panic("Root mount failed, startup aborted.");
1212 }
1213 
1214 /*
1215  * Mount (mountfrom) as the root filesystem.
1216  */
1217 static int
1218 vfs_mountroot_try(const char *mountfrom)
1219 {
1220         struct mount	*mp;
1221 	char		*vfsname, *path;
1222 	int		error;
1223 	char		patt[32];
1224 	int		s;
1225 
1226 	vfsname = NULL;
1227 	path    = NULL;
1228 	mp      = NULL;
1229 	error   = EINVAL;
1230 
1231 	if (mountfrom == NULL)
1232 		return (error);		/* don't complain */
1233 
1234 	s = splcam();			/* Overkill, but annoying without it */
1235 	printf("Trying to mount root from %s\n", mountfrom);
1236 	splx(s);
1237 
1238 	/* parse vfs name and path */
1239 	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1240 	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1241 	vfsname[0] = path[0] = 0;
1242 	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1243 	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1244 		return (error);
1245 
1246 	if (path[0] == '\0')
1247 		strcpy(path, ROOTNAME);
1248 
1249 	error = kernel_vmount(
1250 	    MNT_RDONLY | MNT_ROOTFS,
1251 	    "fstype", vfsname,
1252 	    "fspath", "/",
1253 	    "from", path,
1254 	    NULL);
1255 	if (error == 0) {
1256 		mp = TAILQ_FIRST(&mountlist);
1257 
1258 		/* sanity check system clock against root fs timestamp */
1259 		inittodr(mp->mnt_time);
1260 		vfs_unbusy(mp, curthread);
1261 		error = VFS_START(mp, 0, curthread);
1262 
1263 		devfs_fixup(curthread);
1264 	}
1265 	return (error);
1266 }
1267 
1268 /*
1269  * ---------------------------------------------------------------------
1270  * Interactive root filesystem selection code.
1271  */
1272 
1273 static int
1274 vfs_mountroot_ask(void)
1275 {
1276 	char name[128];
1277 
1278 	for(;;) {
1279 		printf("\nManual root filesystem specification:\n");
1280 		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1281 #if defined(__i386__) || defined(__ia64__)
1282 		printf("                       eg. ufs:da0s1a\n");
1283 #else
1284 		printf("                       eg. ufs:/dev/da0a\n");
1285 #endif
1286 		printf("  ?                  List valid disk boot devices\n");
1287 		printf("  <empty line>       Abort manual input\n");
1288 		printf("\nmountroot> ");
1289 		gets(name);
1290 		if (name[0] == '\0')
1291 			return (1);
1292 		if (name[0] == '?') {
1293 			printf("\nList of GEOM managed disk devices:\n  ");
1294 			g_dev_print();
1295 			continue;
1296 		}
1297 		if (!vfs_mountroot_try(name))
1298 			return (0);
1299 	}
1300 }
1301 
1302 /*
1303  * Local helper function for vfs_mountroot_ask.
1304  */
1305 static void
1306 gets(char *cp)
1307 {
1308 	char *lp;
1309 	int c;
1310 
1311 	lp = cp;
1312 	for (;;) {
1313 		printf("%c", c = cngetc() & 0177);
1314 		switch (c) {
1315 		case -1:
1316 		case '\n':
1317 		case '\r':
1318 			*lp++ = '\0';
1319 			return;
1320 		case '\b':
1321 		case '\177':
1322 			if (lp > cp) {
1323 				printf(" \b");
1324 				lp--;
1325 			}
1326 			continue;
1327 		case '#':
1328 			lp--;
1329 			if (lp < cp)
1330 				lp = cp;
1331 			continue;
1332 		case '@':
1333 		case 'u' & 037:
1334 			lp = cp;
1335 			printf("%c", '\n');
1336 			continue;
1337 		default:
1338 			*lp++ = c;
1339 		}
1340 	}
1341 }
1342 
1343 /*
1344  * ---------------------------------------------------------------------
1345  * Functions for querying mount options/arguments from filesystems.
1346  */
1347 
1348 /*
1349  * Check that no unknown options are given
1350  */
1351 int
1352 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1353 {
1354 	struct vfsopt *opt;
1355 	const char **t, *p;
1356 
1357 
1358 	TAILQ_FOREACH(opt, opts, link) {
1359 		p = opt->name;
1360 		if (p[0] == 'n' && p[1] == 'o')
1361 			p += 2;
1362 		for(t = global_opts; *t != NULL; t++)
1363 			if (!strcmp(*t, p))
1364 				break;
1365 		if (*t != NULL)
1366 			continue;
1367 		for(t = legal; *t != NULL; t++)
1368 			if (!strcmp(*t, p))
1369 				break;
1370 		if (*t != NULL)
1371 			continue;
1372 		printf("mount option <%s> is unknown\n", p);
1373 		return (EINVAL);
1374 	}
1375 	return (0);
1376 }
1377 
1378 /*
1379  * Get a mount option by its name.
1380  *
1381  * Return 0 if the option was found, ENOENT otherwise.
1382  * If len is non-NULL it will be filled with the length
1383  * of the option. If buf is non-NULL, it will be filled
1384  * with the address of the option.
1385  */
1386 int
1387 vfs_getopt(opts, name, buf, len)
1388 	struct vfsoptlist *opts;
1389 	const char *name;
1390 	void **buf;
1391 	int *len;
1392 {
1393 	struct vfsopt *opt;
1394 
1395 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1396 
1397 	TAILQ_FOREACH(opt, opts, link) {
1398 		if (strcmp(name, opt->name) == 0) {
1399 			if (len != NULL)
1400 				*len = opt->len;
1401 			if (buf != NULL)
1402 				*buf = opt->value;
1403 			return (0);
1404 		}
1405 	}
1406 	return (ENOENT);
1407 }
1408 
1409 char *
1410 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1411 {
1412 	struct vfsopt *opt;
1413 
1414 	*error = 0;
1415 	TAILQ_FOREACH(opt, opts, link) {
1416 		if (strcmp(name, opt->name) != 0)
1417 			continue;
1418 		if (((char *)opt->value)[opt->len - 1] != '\0') {
1419 			*error = EINVAL;
1420 			return (NULL);
1421 		}
1422 		return (opt->value);
1423 	}
1424 	return (NULL);
1425 }
1426 
1427 int
1428 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1429 {
1430 	struct vfsopt *opt;
1431 
1432 	TAILQ_FOREACH(opt, opts, link) {
1433 		if (strcmp(name, opt->name) == 0) {
1434 			if (w != NULL)
1435 				*w |= val;
1436 			return (1);
1437 		}
1438 	}
1439 	if (w != NULL)
1440 		*w &= ~val;
1441 	return (0);
1442 }
1443 
1444 int
1445 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1446 {
1447 	va_list ap;
1448 	struct vfsopt *opt;
1449 	int ret;
1450 
1451 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1452 
1453 	TAILQ_FOREACH(opt, opts, link) {
1454 		if (strcmp(name, opt->name) != 0)
1455 			continue;
1456 		if (((char *)opt->value)[opt->len - 1] != '\0')
1457 			return (0);
1458 		va_start(ap, fmt);
1459 		ret = vsscanf(opt->value, fmt, ap);
1460 		va_end(ap);
1461 		return (ret);
1462 	}
1463 	return (0);
1464 }
1465 
1466 /*
1467  * Find and copy a mount option.
1468  *
1469  * The size of the buffer has to be specified
1470  * in len, if it is not the same length as the
1471  * mount option, EINVAL is returned.
1472  * Returns ENOENT if the option is not found.
1473  */
1474 int
1475 vfs_copyopt(opts, name, dest, len)
1476 	struct vfsoptlist *opts;
1477 	const char *name;
1478 	void *dest;
1479 	int len;
1480 {
1481 	struct vfsopt *opt;
1482 
1483 	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1484 
1485 	TAILQ_FOREACH(opt, opts, link) {
1486 		if (strcmp(name, opt->name) == 0) {
1487 			if (len != opt->len)
1488 				return (EINVAL);
1489 			bcopy(opt->value, dest, opt->len);
1490 			return (0);
1491 		}
1492 	}
1493 	return (ENOENT);
1494 }
1495 
1496 /*
1497  * This is a helper function for filesystems to traverse their
1498  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
1499  */
1500 
1501 struct vnode *
1502 __mnt_vnode_next(struct vnode **nvp, struct mount *mp)
1503 {
1504 	struct vnode *vp;
1505 
1506 	mtx_assert(&mp->mnt_mtx, MA_OWNED);
1507 
1508 	vp = *nvp;
1509 	/* Check if we are done */
1510 	if (vp == NULL)
1511 		return (NULL);
1512 	/* If our next vnode is no longer ours, start over */
1513 	if (vp->v_mount != mp)
1514 		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1515 	/* Save pointer to next vnode in list */
1516 	if (vp != NULL)
1517 		*nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1518 	else
1519 		*nvp = NULL;
1520 	return (vp);
1521 }
1522 
1523 int
1524 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
1525 {
1526 	int error;
1527 
1528 	error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
1529 	if (sbp != &mp->mnt_stat)
1530 		*sbp = mp->mnt_stat;
1531 	return (error);
1532 }
1533 
1534 void
1535 vfs_mountedfrom(struct mount *mp, const char *from)
1536 {
1537 
1538 	bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
1539 	strlcpy(mp->mnt_stat.f_mntfromname, from,
1540 	    sizeof mp->mnt_stat.f_mntfromname);
1541 }
1542 
1543 /*
1544  * ---------------------------------------------------------------------
1545  * This is the api for building mount args and mounting filesystems from
1546  * inside the kernel.
1547  *
1548  * The API works by accumulation of individual args.  First error is
1549  * latched.
1550  *
1551  * XXX: should be documented in new manpage kernel_mount(9)
1552  */
1553 
1554 /* A memory allocation which must be freed when we are done */
1555 struct mntaarg {
1556 	SLIST_ENTRY(mntaarg)	next;
1557 };
1558 
1559 /* The header for the mount arguments */
1560 struct mntarg {
1561 	struct iovec *v;
1562 	int len;
1563 	int error;
1564 	SLIST_HEAD(, mntaarg)	list;
1565 };
1566 
1567 /*
1568  * Add a boolean argument.
1569  *
1570  * flag is the boolean value.
1571  * name must start with "no".
1572  */
1573 struct mntarg *
1574 mount_argb(struct mntarg *ma, int flag, const char *name)
1575 {
1576 
1577 	KASSERT(name[0] == 'n' && name[1] == 'o',
1578 	    ("mount_argb(...,%s): name must start with 'no'", name));
1579 
1580 	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
1581 }
1582 
1583 /*
1584  * Add an argument printf style
1585  */
1586 struct mntarg *
1587 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
1588 {
1589 	va_list ap;
1590 	struct mntaarg *maa;
1591 	struct sbuf *sb;
1592 	int len;
1593 
1594 	if (ma == NULL) {
1595 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1596 		SLIST_INIT(&ma->list);
1597 	}
1598 	if (ma->error)
1599 		return (ma);
1600 
1601 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1602 	    M_MOUNT, M_WAITOK);
1603 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1604 	ma->v[ma->len].iov_len = strlen(name) + 1;
1605 	ma->len++;
1606 
1607 	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1608 	va_start(ap, fmt);
1609 	sbuf_vprintf(sb, fmt, ap);
1610 	va_end(ap);
1611 	sbuf_finish(sb);
1612 	len = sbuf_len(sb) + 1;
1613 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1614 	SLIST_INSERT_HEAD(&ma->list, maa, next);
1615 	bcopy(sbuf_data(sb), maa + 1, len);
1616 	sbuf_delete(sb);
1617 
1618 	ma->v[ma->len].iov_base = maa + 1;
1619 	ma->v[ma->len].iov_len = len;
1620 	ma->len++;
1621 
1622 	return (ma);
1623 }
1624 
1625 /*
1626  * Add an argument which is a userland string.
1627  */
1628 struct mntarg *
1629 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
1630 {
1631 	struct mntaarg *maa;
1632 	char *tbuf;
1633 
1634 	if (val == NULL)
1635 		return (ma);
1636 	if (ma == NULL) {
1637 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1638 		SLIST_INIT(&ma->list);
1639 	}
1640 	if (ma->error)
1641 		return (ma);
1642 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1643 	SLIST_INSERT_HEAD(&ma->list, maa, next);
1644 	tbuf = (void *)(maa + 1);
1645 	ma->error = copyinstr(val, tbuf, len, NULL);
1646 	return (mount_arg(ma, name, tbuf, -1));
1647 }
1648 
1649 /*
1650  * Plain argument.
1651  *
1652  * If length is -1, use printf.
1653  */
1654 struct mntarg *
1655 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
1656 {
1657 
1658 	if (ma == NULL) {
1659 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1660 		SLIST_INIT(&ma->list);
1661 	}
1662 	if (ma->error)
1663 		return (ma);
1664 
1665 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1666 	    M_MOUNT, M_WAITOK);
1667 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1668 	ma->v[ma->len].iov_len = strlen(name) + 1;
1669 	ma->len++;
1670 
1671 	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
1672 	if (len < 0)
1673 		ma->v[ma->len].iov_len = strlen(val) + 1;
1674 	else
1675 		ma->v[ma->len].iov_len = len;
1676 	ma->len++;
1677 	return (ma);
1678 }
1679 
1680 /*
1681  * Free a mntarg structure
1682  */
1683 void
1684 free_mntarg(struct mntarg *ma)
1685 {
1686 	struct mntaarg *maa;
1687 
1688 	while (!SLIST_EMPTY(&ma->list)) {
1689 		maa = SLIST_FIRST(&ma->list);
1690 		SLIST_REMOVE_HEAD(&ma->list, next);
1691 		free(maa, M_MOUNT);
1692 	}
1693 	free(ma->v, M_MOUNT);
1694 	free(ma, M_MOUNT);
1695 }
1696 
1697 /*
1698  * Mount a filesystem
1699  */
1700 int
1701 kernel_mount(struct mntarg *ma, int flags)
1702 {
1703 	struct uio auio;
1704 	int error;
1705 
1706 	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
1707 	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
1708 	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
1709 
1710 	auio.uio_iov = ma->v;
1711 	auio.uio_iovcnt = ma->len;
1712 	auio.uio_segflg = UIO_SYSSPACE;
1713 
1714 	error = ma->error;
1715 	if (!error)
1716 		error = vfs_donmount(curthread, flags, &auio);
1717 	free_mntarg(ma);
1718 	return (error);
1719 }
1720 
1721 /*
1722  * A printflike function to mount a filesystem.
1723  */
1724 int
1725 kernel_vmount(int flags, ...)
1726 {
1727 	struct mntarg *ma = NULL;
1728 	va_list ap;
1729 	const char *cp;
1730 	const void *vp;
1731 	int error;
1732 
1733 	va_start(ap, flags);
1734 	for (;;) {
1735 		cp = va_arg(ap, const char *);
1736 		if (cp == NULL)
1737 			break;
1738 		vp = va_arg(ap, const void *);
1739 		ma = mount_arg(ma, cp, vp, -1);
1740 	}
1741 	va_end(ap);
1742 
1743 	error = kernel_mount(ma, flags);
1744 	return (error);
1745 }
1746