xref: /freebsd/sys/kern/vfs_mountroot.c (revision cc16dea626cf2fc80cde667ac4798065108e596c)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * Copyright (c) 1999-2004 Poul-Henning Kamp
4  * Copyright (c) 1999 Michael Smith
5  * Copyright (c) 1989, 1993
6  *      The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "opt_rootdevname.h"
39 
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/param.h>
44 #include <sys/conf.h>
45 #include <sys/cons.h>
46 #include <sys/fcntl.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/mdioctl.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/namei.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/filedesc.h>
57 #include <sys/reboot.h>
58 #include <sys/sbuf.h>
59 #include <sys/stat.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysproto.h>
62 #include <sys/sx.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/systm.h>
66 #include <sys/vnode.h>
67 
68 #include <geom/geom.h>
69 
70 /*
71  * The root filesystem is detailed in the kernel environment variable
72  * vfs.root.mountfrom, which is expected to be in the general format
73  *
74  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
75  * vfsname   := the name of a VFS known to the kernel and capable
76  *              of being mounted as root
77  * path      := disk device name or other data used by the filesystem
78  *              to locate its physical store
79  *
80  * If the environment variable vfs.root.mountfrom is a space separated list,
81  * each list element is tried in turn and the root filesystem will be mounted
82  * from the first one that suceeds.
83  *
84  * The environment variable vfs.root.mountfrom.options is a comma delimited
85  * set of string mount options.  These mount options must be parseable
86  * by nmount() in the kernel.
87  */
88 
89 static int parse_mount(char **);
90 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
91 
92 /*
93  * The vnode of the system's root (/ in the filesystem, without chroot
94  * active.)
95  */
96 struct vnode *rootvnode;
97 
98 char *rootdevnames[2] = {NULL, NULL};
99 
100 struct root_hold_token {
101 	const char			*who;
102 	LIST_ENTRY(root_hold_token)	list;
103 };
104 
105 static LIST_HEAD(, root_hold_token)	root_holds =
106     LIST_HEAD_INITIALIZER(root_holds);
107 
108 enum action {
109 	A_CONTINUE,
110 	A_PANIC,
111 	A_REBOOT,
112 	A_RETRY
113 };
114 
115 static enum action root_mount_onfail = A_CONTINUE;
116 
117 static int root_mount_mddev;
118 static int root_mount_complete;
119 
120 /* By default wait up to 3 seconds for devices to appear. */
121 static int root_mount_timeout = 3;
122 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
123 
124 struct root_hold_token *
125 root_mount_hold(const char *identifier)
126 {
127 	struct root_hold_token *h;
128 
129 	if (root_mounted())
130 		return (NULL);
131 
132 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
133 	h->who = identifier;
134 	mtx_lock(&mountlist_mtx);
135 	LIST_INSERT_HEAD(&root_holds, h, list);
136 	mtx_unlock(&mountlist_mtx);
137 	return (h);
138 }
139 
140 void
141 root_mount_rel(struct root_hold_token *h)
142 {
143 
144 	if (h == NULL)
145 		return;
146 	mtx_lock(&mountlist_mtx);
147 	LIST_REMOVE(h, list);
148 	wakeup(&root_holds);
149 	mtx_unlock(&mountlist_mtx);
150 	free(h, M_DEVBUF);
151 }
152 
153 int
154 root_mounted(void)
155 {
156 
157 	/* No mutex is acquired here because int stores are atomic. */
158 	return (root_mount_complete);
159 }
160 
161 void
162 root_mount_wait(void)
163 {
164 
165 	/*
166 	 * Panic on an obvious deadlock - the function can't be called from
167 	 * a thread which is doing the whole SYSINIT stuff.
168 	 */
169 	KASSERT(curthread->td_proc->p_pid != 0,
170 	    ("root_mount_wait: cannot be called from the swapper thread"));
171 	mtx_lock(&mountlist_mtx);
172 	while (!root_mount_complete) {
173 		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
174 		    hz);
175 	}
176 	mtx_unlock(&mountlist_mtx);
177 }
178 
179 static void
180 set_rootvnode(void)
181 {
182 	struct proc *p;
183 
184 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
185 		panic("Cannot find root vnode");
186 
187 	VOP_UNLOCK(rootvnode, 0);
188 
189 	p = curthread->td_proc;
190 	FILEDESC_XLOCK(p->p_fd);
191 
192 	if (p->p_fd->fd_cdir != NULL)
193 		vrele(p->p_fd->fd_cdir);
194 	p->p_fd->fd_cdir = rootvnode;
195 	VREF(rootvnode);
196 
197 	if (p->p_fd->fd_rdir != NULL)
198 		vrele(p->p_fd->fd_rdir);
199 	p->p_fd->fd_rdir = rootvnode;
200 	VREF(rootvnode);
201 
202 	FILEDESC_XUNLOCK(p->p_fd);
203 }
204 
205 static int
206 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
207 {
208 	struct vfsoptlist *opts;
209 	struct vfsconf *vfsp;
210 	struct mount *mp;
211 	int error;
212 
213 	*mpp = NULL;
214 
215 	vfsp = vfs_byname("devfs");
216 	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
217 	if (vfsp == NULL)
218 		return (ENOENT);
219 
220 	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
221 
222 	error = VFS_MOUNT(mp);
223 	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
224 	if (error)
225 		return (error);
226 
227 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
228 	TAILQ_INIT(opts);
229 	mp->mnt_opt = opts;
230 
231 	mtx_lock(&mountlist_mtx);
232 	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
233 	mtx_unlock(&mountlist_mtx);
234 
235 	*mpp = mp;
236 	set_rootvnode();
237 
238 	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
239 	if (error)
240 		printf("kern_symlink /dev -> / returns %d\n", error);
241 
242 	return (error);
243 }
244 
245 static int
246 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
247 {
248 	struct nameidata nd;
249 	struct mount *mporoot, *mpnroot;
250 	struct vnode *vp, *vporoot, *vpdevfs;
251 	char *fspath;
252 	int error;
253 
254 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
255 
256 	/* Shuffle the mountlist. */
257 	mtx_lock(&mountlist_mtx);
258 	mporoot = TAILQ_FIRST(&mountlist);
259 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
260 	if (mporoot != mpdevfs) {
261 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
262 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
263 	}
264 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
265 	mtx_unlock(&mountlist_mtx);
266 
267 	cache_purgevfs(mporoot);
268 	if (mporoot != mpdevfs)
269 		cache_purgevfs(mpdevfs);
270 
271 	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
272 
273 	VI_LOCK(vporoot);
274 	vporoot->v_iflag &= ~VI_MOUNT;
275 	VI_UNLOCK(vporoot);
276 	vporoot->v_mountedhere = NULL;
277 	mporoot->mnt_flag &= ~MNT_ROOTFS;
278 	mporoot->mnt_vnodecovered = NULL;
279 	vput(vporoot);
280 
281 	/* Set up the new rootvnode, and purge the cache */
282 	mpnroot->mnt_vnodecovered = NULL;
283 	set_rootvnode();
284 	cache_purgevfs(rootvnode->v_mount);
285 
286 	if (mporoot != mpdevfs) {
287 		/* Remount old root under /.mount or /mnt */
288 		fspath = "/.mount";
289 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
290 		    fspath, td);
291 		error = namei(&nd);
292 		if (error) {
293 			NDFREE(&nd, NDF_ONLY_PNBUF);
294 			fspath = "/mnt";
295 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
296 			    fspath, td);
297 			error = namei(&nd);
298 		}
299 		if (!error) {
300 			vp = nd.ni_vp;
301 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
302 			if (!error)
303 				error = vinvalbuf(vp, V_SAVE, 0, 0);
304 			if (!error) {
305 				cache_purge(vp);
306 				mporoot->mnt_vnodecovered = vp;
307 				vp->v_mountedhere = mporoot;
308 				strlcpy(mporoot->mnt_stat.f_mntonname,
309 				    fspath, MNAMELEN);
310 				VOP_UNLOCK(vp, 0);
311 			} else
312 				vput(vp);
313 		}
314 		NDFREE(&nd, NDF_ONLY_PNBUF);
315 
316 		if (error && bootverbose)
317 			printf("mountroot: unable to remount previous root "
318 			    "under /.mount or /mnt (error %d).\n", error);
319 	}
320 
321 	/* Remount devfs under /dev */
322 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
323 	error = namei(&nd);
324 	if (!error) {
325 		vp = nd.ni_vp;
326 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
327 		if (!error)
328 			error = vinvalbuf(vp, V_SAVE, 0, 0);
329 		if (!error) {
330 			vpdevfs = mpdevfs->mnt_vnodecovered;
331 			if (vpdevfs != NULL) {
332 				cache_purge(vpdevfs);
333 				vpdevfs->v_mountedhere = NULL;
334 				vrele(vpdevfs);
335 			}
336 			mpdevfs->mnt_vnodecovered = vp;
337 			vp->v_mountedhere = mpdevfs;
338 			VOP_UNLOCK(vp, 0);
339 		} else
340 			vput(vp);
341 	}
342 	if (error && bootverbose)
343 		printf("mountroot: unable to remount devfs under /dev "
344 		    "(error %d).\n", error);
345 	NDFREE(&nd, NDF_ONLY_PNBUF);
346 
347 	if (mporoot == mpdevfs) {
348 		vfs_unbusy(mpdevfs);
349 		/* Unlink the no longer needed /dev/dev -> / symlink */
350 		error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
351 		if (error && bootverbose)
352 			printf("mountroot: unable to unlink /dev/dev "
353 			    "(error %d)\n", error);
354 	}
355 
356 	return (0);
357 }
358 
359 /*
360  * Configuration parser.
361  */
362 
363 /* Parser character classes. */
364 #define	CC_WHITESPACE		-1
365 #define	CC_NONWHITESPACE	-2
366 
367 /* Parse errors. */
368 #define	PE_EOF			-1
369 #define	PE_EOL			-2
370 
371 static __inline int
372 parse_peek(char **conf)
373 {
374 
375 	return (**conf);
376 }
377 
378 static __inline void
379 parse_poke(char **conf, int c)
380 {
381 
382 	**conf = c;
383 }
384 
385 static __inline void
386 parse_advance(char **conf)
387 {
388 
389 	(*conf)++;
390 }
391 
392 static __inline int
393 parse_isspace(int c)
394 {
395 
396 	return ((c == ' ' || c == '\t' || c == '\n') ? 1 : 0);
397 }
398 
399 static int
400 parse_skipto(char **conf, int mc)
401 {
402 	int c, match;
403 
404 	while (1) {
405 		c = parse_peek(conf);
406 		if (c == 0)
407 			return (PE_EOF);
408 		switch (mc) {
409 		case CC_WHITESPACE:
410 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
411 			break;
412 		case CC_NONWHITESPACE:
413 			if (c == '\n')
414 				return (PE_EOL);
415 			match = (c != ' ' && c != '\t') ? 1 : 0;
416 			break;
417 		default:
418 			match = (c == mc) ? 1 : 0;
419 			break;
420 		}
421 		if (match)
422 			break;
423 		parse_advance(conf);
424 	}
425 	return (0);
426 }
427 
428 static int
429 parse_token(char **conf, char **tok)
430 {
431 	char *p;
432 	size_t len;
433 	int error;
434 
435 	*tok = NULL;
436 	error = parse_skipto(conf, CC_NONWHITESPACE);
437 	if (error)
438 		return (error);
439 	p = *conf;
440 	error = parse_skipto(conf, CC_WHITESPACE);
441 	len = *conf - p;
442 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
443 	bcopy(p, *tok, len);
444 	return (0);
445 }
446 
447 static void
448 parse_dir_ask_printenv(const char *var)
449 {
450 	char *val;
451 
452 	val = getenv(var);
453 	if (val != NULL) {
454 		printf("  %s=%s\n", var, val);
455 		freeenv(val);
456 	}
457 }
458 
459 static int
460 parse_dir_ask(char **conf)
461 {
462 	char name[80];
463 	char *mnt;
464 	int error;
465 
466 	printf("\nLoader variables:\n");
467 	parse_dir_ask_printenv("vfs.root.mountfrom");
468 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
469 
470 	printf("\nManual root filesystem specification:\n");
471 	printf("  <fstype>:<device> [options]\n");
472 	printf("      Mount <device> using filesystem <fstype>\n");
473 	printf("      and with the specified (optional) option list.\n");
474 	printf("\n");
475 	printf("    eg. ufs:/dev/da0s1a\n");
476 	printf("        zfs:tank\n");
477 	printf("        cd9660:/dev/acd0 ro\n");
478 	printf("          (which is equivalent to: ");
479 	printf("mount -t cd9660 -o ro /dev/acd0 /)\n");
480 	printf("\n");
481 	printf("  ?               List valid disk boot devices\n");
482 	printf("  .               Yield 1 second (for background tasks)\n");
483 	printf("  <empty line>    Abort manual input\n");
484 
485 	do {
486 		error = EINVAL;
487 		printf("\nmountroot> ");
488 		cngets(name, sizeof(name), GETS_ECHO);
489 		if (name[0] == '\0')
490 			break;
491 		if (name[0] == '?' && name[1] == '\0') {
492 			printf("\nList of GEOM managed disk devices:\n  ");
493 			g_dev_print();
494 			continue;
495 		}
496 		if (name[0] == '.' && name[1] == '\0') {
497 			pause("rmask", hz);
498 			continue;
499 		}
500 		mnt = name;
501 		error = parse_mount(&mnt);
502 		if (error == -1)
503 			printf("Invalid file system specification.\n");
504 	} while (error != 0);
505 
506 	return (error);
507 }
508 
509 static int
510 parse_dir_md(char **conf)
511 {
512 	struct stat sb;
513 	struct thread *td;
514 	struct md_ioctl *mdio;
515 	char *path, *tok;
516 	int error, fd, len;
517 
518 	td = curthread;
519 
520 	error = parse_token(conf, &tok);
521 	if (error)
522 		return (error);
523 
524 	len = strlen(tok);
525 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
526 	path = (void *)(mdio + 1);
527 	bcopy(tok, path, len);
528 	free(tok, M_TEMP);
529 
530 	/* Get file status. */
531 	error = kern_stat(td, path, UIO_SYSSPACE, &sb);
532 	if (error)
533 		goto out;
534 
535 	/* Open /dev/mdctl so that we can attach/detach. */
536 	error = kern_open(td, "/dev/" MDCTL_NAME, UIO_SYSSPACE, O_RDWR, 0);
537 	if (error)
538 		goto out;
539 
540 	fd = td->td_retval[0];
541 	mdio->md_version = MDIOVERSION;
542 	mdio->md_type = MD_VNODE;
543 
544 	if (root_mount_mddev != -1) {
545 		mdio->md_unit = root_mount_mddev;
546 		DROP_GIANT();
547 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
548 		PICKUP_GIANT();
549 		/* Ignore errors. We don't care. */
550 		root_mount_mddev = -1;
551 	}
552 
553 	mdio->md_file = (void *)(mdio + 1);
554 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
555 	mdio->md_mediasize = sb.st_size;
556 	mdio->md_unit = 0;
557 	DROP_GIANT();
558 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
559 	PICKUP_GIANT();
560 	if (error)
561 		goto out;
562 
563 	if (mdio->md_unit > 9) {
564 		printf("rootmount: too many md units\n");
565 		mdio->md_file = NULL;
566 		mdio->md_options = 0;
567 		mdio->md_mediasize = 0;
568 		DROP_GIANT();
569 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
570 		PICKUP_GIANT();
571 		/* Ignore errors. We don't care. */
572 		error = ERANGE;
573 		goto out;
574 	}
575 
576 	root_mount_mddev = mdio->md_unit;
577 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
578 
579 	error = kern_close(td, fd);
580 
581  out:
582 	free(mdio, M_TEMP);
583 	return (error);
584 }
585 
586 static int
587 parse_dir_onfail(char **conf)
588 {
589 	char *action;
590 	int error;
591 
592 	error = parse_token(conf, &action);
593 	if (error)
594 		return (error);
595 
596 	if (!strcmp(action, "continue"))
597 		root_mount_onfail = A_CONTINUE;
598 	else if (!strcmp(action, "panic"))
599 		root_mount_onfail = A_PANIC;
600 	else if (!strcmp(action, "reboot"))
601 		root_mount_onfail = A_REBOOT;
602 	else if (!strcmp(action, "retry"))
603 		root_mount_onfail = A_RETRY;
604 	else {
605 		printf("rootmount: %s: unknown action\n", action);
606 		error = EINVAL;
607 	}
608 
609 	free(action, M_TEMP);
610 	return (0);
611 }
612 
613 static int
614 parse_dir_timeout(char **conf)
615 {
616 	char *tok, *endtok;
617 	long secs;
618 	int error;
619 
620 	error = parse_token(conf, &tok);
621 	if (error)
622 		return (error);
623 
624 	secs = strtol(tok, &endtok, 0);
625 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
626 	if (!error)
627 		root_mount_timeout = secs;
628 	free(tok, M_TEMP);
629 	return (error);
630 }
631 
632 static int
633 parse_directive(char **conf)
634 {
635 	char *dir;
636 	int error;
637 
638 	error = parse_token(conf, &dir);
639 	if (error)
640 		return (error);
641 
642 	if (strcmp(dir, ".ask") == 0)
643 		error = parse_dir_ask(conf);
644 	else if (strcmp(dir, ".md") == 0)
645 		error = parse_dir_md(conf);
646 	else if (strcmp(dir, ".onfail") == 0)
647 		error = parse_dir_onfail(conf);
648 	else if (strcmp(dir, ".timeout") == 0)
649 		error = parse_dir_timeout(conf);
650 	else {
651 		printf("mountroot: invalid directive `%s'\n", dir);
652 		/* Ignore the rest of the line. */
653 		(void)parse_skipto(conf, '\n');
654 		error = EINVAL;
655 	}
656 	free(dir, M_TEMP);
657 	return (error);
658 }
659 
660 static int
661 parse_mount_dev_present(const char *dev)
662 {
663 	struct nameidata nd;
664 	int error;
665 
666 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
667 	error = namei(&nd);
668 	if (!error)
669 		vput(nd.ni_vp);
670 	NDFREE(&nd, NDF_ONLY_PNBUF);
671 	return (error != 0) ? 0 : 1;
672 }
673 
674 #define	ERRMSGL	255
675 static int
676 parse_mount(char **conf)
677 {
678 	char *errmsg;
679 	struct mntarg *ma;
680 	char *dev, *fs, *opts, *tok;
681 	int delay, error, timeout;
682 
683 	error = parse_token(conf, &tok);
684 	if (error)
685 		return (error);
686 	fs = tok;
687 	error = parse_skipto(&tok, ':');
688 	if (error) {
689 		free(fs, M_TEMP);
690 		return (error);
691 	}
692 	parse_poke(&tok, '\0');
693 	parse_advance(&tok);
694 	dev = tok;
695 
696 	if (root_mount_mddev != -1) {
697 		/* Handle substitution for the md unit number. */
698 		tok = strstr(dev, "md#");
699 		if (tok != NULL)
700 			tok[2] = '0' + root_mount_mddev;
701 	}
702 
703 	/* Parse options. */
704 	error = parse_token(conf, &tok);
705 	opts = (error == 0) ? tok : NULL;
706 
707 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
708 	    (opts != NULL) ? opts : "");
709 
710 	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
711 
712 	if (vfs_byname(fs) == NULL) {
713 		strlcpy(errmsg, "unknown file system", sizeof(errmsg));
714 		error = ENOENT;
715 		goto out;
716 	}
717 
718 	if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
719 	    dev[0] != '\0' && !parse_mount_dev_present(dev)) {
720 		printf("mountroot: waiting for device %s ...\n", dev);
721 		delay = hz / 10;
722 		timeout = root_mount_timeout * hz;
723 		do {
724 			pause("rmdev", delay);
725 			timeout -= delay;
726 		} while (timeout > 0 && !parse_mount_dev_present(dev));
727 		if (timeout <= 0) {
728 			error = ENODEV;
729 			goto out;
730 		}
731 	}
732 
733 	ma = NULL;
734 	ma = mount_arg(ma, "fstype", fs, -1);
735 	ma = mount_arg(ma, "fspath", "/", -1);
736 	ma = mount_arg(ma, "from", dev, -1);
737 	ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
738 	ma = mount_arg(ma, "ro", NULL, 0);
739 	ma = parse_mountroot_options(ma, opts);
740 	error = kernel_mount(ma, MNT_ROOTFS);
741 
742  out:
743 	if (error) {
744 		printf("Mounting from %s:%s failed with error %d",
745 		    fs, dev, error);
746 		if (errmsg[0] != '\0')
747 			printf(": %s", errmsg);
748 		printf(".\n");
749 	}
750 	free(fs, M_TEMP);
751 	free(errmsg, M_TEMP);
752 	if (opts != NULL)
753 		free(opts, M_TEMP);
754 	/* kernel_mount can return -1 on error. */
755 	return ((error < 0) ? EDOOFUS : error);
756 }
757 #undef ERRMSGL
758 
759 static int
760 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
761 {
762 	struct mount *mp;
763 	char *conf;
764 	int error;
765 
766 	root_mount_mddev = -1;
767 
768 retry:
769 	conf = sbuf_data(sb);
770 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
771 	error = (mp == NULL) ? 0 : EDOOFUS;
772 	root_mount_onfail = A_CONTINUE;
773 	while (mp == NULL) {
774 		error = parse_skipto(&conf, CC_NONWHITESPACE);
775 		if (error == PE_EOL) {
776 			parse_advance(&conf);
777 			continue;
778 		}
779 		if (error < 0)
780 			break;
781 		switch (parse_peek(&conf)) {
782 		case '#':
783 			error = parse_skipto(&conf, '\n');
784 			break;
785 		case '.':
786 			error = parse_directive(&conf);
787 			break;
788 		default:
789 			error = parse_mount(&conf);
790 			break;
791 		}
792 		if (error < 0)
793 			break;
794 		/* Ignore any trailing garbage on the line. */
795 		if (parse_peek(&conf) != '\n') {
796 			printf("mountroot: advancing to next directive...\n");
797 			(void)parse_skipto(&conf, '\n');
798 		}
799 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
800 	}
801 	if (mp != NULL)
802 		return (0);
803 
804 	/*
805 	 * We failed to mount (a new) root.
806 	 */
807 	switch (root_mount_onfail) {
808 	case A_CONTINUE:
809 		break;
810 	case A_PANIC:
811 		panic("mountroot: unable to (re-)mount root.");
812 		/* NOTREACHED */
813 	case A_RETRY:
814 		goto retry;
815 	case A_REBOOT:
816 		kern_reboot(RB_NOSYNC);
817 		/* NOTREACHED */
818 	}
819 
820 	return (error);
821 }
822 
823 static void
824 vfs_mountroot_conf0(struct sbuf *sb)
825 {
826 	char *s, *tok, *mnt, *opt;
827 	int error;
828 
829 	sbuf_printf(sb, ".onfail panic\n");
830 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
831 	if (boothowto & RB_ASKNAME)
832 		sbuf_printf(sb, ".ask\n");
833 #ifdef ROOTDEVNAME
834 	if (boothowto & RB_DFLTROOT)
835 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
836 #endif
837 	if (boothowto & RB_CDROM) {
838 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
839 		sbuf_printf(sb, ".timeout 0\n");
840 		sbuf_printf(sb, "cd9660:/dev/acd0 ro\n");
841 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
842 	}
843 	s = getenv("vfs.root.mountfrom");
844 	if (s != NULL) {
845 		opt = getenv("vfs.root.mountfrom.options");
846 		tok = s;
847 		error = parse_token(&tok, &mnt);
848 		while (!error) {
849 			sbuf_printf(sb, "%s %s\n", mnt,
850 			    (opt != NULL) ? opt : "");
851 			free(mnt, M_TEMP);
852 			error = parse_token(&tok, &mnt);
853 		}
854 		if (opt != NULL)
855 			freeenv(opt);
856 		freeenv(s);
857 	}
858 	if (rootdevnames[0] != NULL)
859 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
860 	if (rootdevnames[1] != NULL)
861 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
862 #ifdef ROOTDEVNAME
863 	if (!(boothowto & RB_DFLTROOT))
864 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
865 #endif
866 	if (!(boothowto & RB_ASKNAME))
867 		sbuf_printf(sb, ".ask\n");
868 }
869 
870 static int
871 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
872 {
873 	static char buf[128];
874 	struct nameidata nd;
875 	off_t ofs;
876 	ssize_t resid;
877 	int error, flags, len;
878 
879 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
880 	flags = FREAD;
881 	error = vn_open(&nd, &flags, 0, NULL);
882 	if (error)
883 		return (error);
884 
885 	NDFREE(&nd, NDF_ONLY_PNBUF);
886 	ofs = 0;
887 	len = sizeof(buf) - 1;
888 	while (1) {
889 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
890 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
891 		    NOCRED, &resid, td);
892 		if (error)
893 			break;
894 		if (resid == len)
895 			break;
896 		buf[len - resid] = 0;
897 		sbuf_printf(sb, "%s", buf);
898 		ofs += len - resid;
899 	}
900 
901 	VOP_UNLOCK(nd.ni_vp, 0);
902 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
903 	return (error);
904 }
905 
906 static void
907 vfs_mountroot_wait(void)
908 {
909 	struct root_hold_token *h;
910 	struct timeval lastfail;
911 	int curfail;
912 
913 	curfail = 0;
914 	while (1) {
915 		DROP_GIANT();
916 		g_waitidle();
917 		PICKUP_GIANT();
918 		mtx_lock(&mountlist_mtx);
919 		if (LIST_EMPTY(&root_holds)) {
920 			mtx_unlock(&mountlist_mtx);
921 			break;
922 		}
923 		if (ppsratecheck(&lastfail, &curfail, 1)) {
924 			printf("Root mount waiting for:");
925 			LIST_FOREACH(h, &root_holds, list)
926 				printf(" %s", h->who);
927 			printf("\n");
928 		}
929 		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
930 		    hz);
931 	}
932 }
933 
934 void
935 vfs_mountroot(void)
936 {
937 	struct mount *mp;
938 	struct sbuf *sb;
939 	struct thread *td;
940 	time_t timebase;
941 	int error;
942 
943 	td = curthread;
944 
945 	vfs_mountroot_wait();
946 
947 	sb = sbuf_new_auto();
948 	vfs_mountroot_conf0(sb);
949 	sbuf_finish(sb);
950 
951 	error = vfs_mountroot_devfs(td, &mp);
952 	while (!error) {
953 		error = vfs_mountroot_parse(sb, mp);
954 		if (!error) {
955 			error = vfs_mountroot_shuffle(td, mp);
956 			if (!error) {
957 				sbuf_clear(sb);
958 				error = vfs_mountroot_readconf(td, sb);
959 				sbuf_finish(sb);
960 			}
961 		}
962 	}
963 
964 	sbuf_delete(sb);
965 
966 	/*
967 	 * Iterate over all currently mounted file systems and use
968 	 * the time stamp found to check and/or initialize the RTC.
969 	 * Call inittodr() only once and pass it the largest of the
970 	 * timestamps we encounter.
971 	 */
972 	timebase = 0;
973 	mtx_lock(&mountlist_mtx);
974 	mp = TAILQ_FIRST(&mountlist);
975 	while (mp != NULL) {
976 		if (mp->mnt_time > timebase)
977 			timebase = mp->mnt_time;
978 		mp = TAILQ_NEXT(mp, mnt_list);
979 	}
980 	mtx_unlock(&mountlist_mtx);
981 	inittodr(timebase);
982 
983 	/* Keep prison0's root in sync with the global rootvnode. */
984 	mtx_lock(&prison0.pr_mtx);
985 	prison0.pr_root = rootvnode;
986 	vref(prison0.pr_root);
987 	mtx_unlock(&prison0.pr_mtx);
988 
989 	mtx_lock(&mountlist_mtx);
990 	atomic_store_rel_int(&root_mount_complete, 1);
991 	wakeup(&root_mount_complete);
992 	mtx_unlock(&mountlist_mtx);
993 
994 	EVENTHANDLER_INVOKE(mountroot);
995 }
996 
997 static struct mntarg *
998 parse_mountroot_options(struct mntarg *ma, const char *options)
999 {
1000 	char *p;
1001 	char *name, *name_arg;
1002 	char *val, *val_arg;
1003 	char *opts;
1004 
1005 	if (options == NULL || options[0] == '\0')
1006 		return (ma);
1007 
1008 	p = opts = strdup(options, M_MOUNT);
1009 	if (opts == NULL) {
1010 		return (ma);
1011 	}
1012 
1013 	while((name = strsep(&p, ",")) != NULL) {
1014 		if (name[0] == '\0')
1015 			break;
1016 
1017 		val = strchr(name, '=');
1018 		if (val != NULL) {
1019 			*val = '\0';
1020 			++val;
1021 		}
1022 		if( strcmp(name, "rw") == 0 ||
1023 		    strcmp(name, "noro") == 0) {
1024 			/*
1025 			 * The first time we mount the root file system,
1026 			 * we need to mount 'ro', so We need to ignore
1027 			 * 'rw' and 'noro' mount options.
1028 			 */
1029 			continue;
1030 		}
1031 		name_arg = strdup(name, M_MOUNT);
1032 		val_arg = NULL;
1033 		if (val != NULL)
1034 			val_arg = strdup(val, M_MOUNT);
1035 
1036 		ma = mount_arg(ma, name_arg, val_arg,
1037 		    (val_arg != NULL ? -1 : 0));
1038 	}
1039 	free(opts, M_MOUNT);
1040 	return (ma);
1041 }
1042