xref: /freebsd/sys/kern/vfs_mountroot.c (revision 6d732c66bca5da4d261577aad2c8ea84519b0bea)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * Copyright (c) 1999-2004 Poul-Henning Kamp
4  * Copyright (c) 1999 Michael Smith
5  * Copyright (c) 1989, 1993
6  *      The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "opt_rootdevname.h"
39 
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/param.h>
44 #include <sys/conf.h>
45 #include <sys/cons.h>
46 #include <sys/fcntl.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/mdioctl.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/namei.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/filedesc.h>
57 #include <sys/reboot.h>
58 #include <sys/sbuf.h>
59 #include <sys/stat.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysproto.h>
62 #include <sys/sx.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/systm.h>
66 #include <sys/vnode.h>
67 
68 #include <geom/geom.h>
69 
70 /*
71  * The root filesystem is detailed in the kernel environment variable
72  * vfs.root.mountfrom, which is expected to be in the general format
73  *
74  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
75  * vfsname   := the name of a VFS known to the kernel and capable
76  *              of being mounted as root
77  * path      := disk device name or other data used by the filesystem
78  *              to locate its physical store
79  *
80  * If the environment variable vfs.root.mountfrom is a space separated list,
81  * each list element is tried in turn and the root filesystem will be mounted
82  * from the first one that suceeds.
83  *
84  * The environment variable vfs.root.mountfrom.options is a comma delimited
85  * set of string mount options.  These mount options must be parseable
86  * by nmount() in the kernel.
87  */
88 
89 static int parse_mount(char **);
90 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
91 
92 /*
93  * The vnode of the system's root (/ in the filesystem, without chroot
94  * active.)
95  */
96 struct vnode *rootvnode;
97 
98 char *rootdevnames[2] = {NULL, NULL};
99 
100 struct root_hold_token {
101 	const char			*who;
102 	LIST_ENTRY(root_hold_token)	list;
103 };
104 
105 static LIST_HEAD(, root_hold_token)	root_holds =
106     LIST_HEAD_INITIALIZER(root_holds);
107 
108 enum action {
109 	A_CONTINUE,
110 	A_PANIC,
111 	A_REBOOT,
112 	A_RETRY
113 };
114 
115 static enum action root_mount_onfail = A_CONTINUE;
116 
117 static int root_mount_mddev;
118 static int root_mount_complete;
119 
120 /* By default wait up to 3 seconds for devices to appear. */
121 static int root_mount_timeout = 3;
122 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
123 
124 struct root_hold_token *
125 root_mount_hold(const char *identifier)
126 {
127 	struct root_hold_token *h;
128 
129 	if (root_mounted())
130 		return (NULL);
131 
132 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
133 	h->who = identifier;
134 	mtx_lock(&mountlist_mtx);
135 	LIST_INSERT_HEAD(&root_holds, h, list);
136 	mtx_unlock(&mountlist_mtx);
137 	return (h);
138 }
139 
140 void
141 root_mount_rel(struct root_hold_token *h)
142 {
143 
144 	if (h == NULL)
145 		return;
146 	mtx_lock(&mountlist_mtx);
147 	LIST_REMOVE(h, list);
148 	wakeup(&root_holds);
149 	mtx_unlock(&mountlist_mtx);
150 	free(h, M_DEVBUF);
151 }
152 
153 int
154 root_mounted(void)
155 {
156 
157 	/* No mutex is acquired here because int stores are atomic. */
158 	return (root_mount_complete);
159 }
160 
161 void
162 root_mount_wait(void)
163 {
164 
165 	/*
166 	 * Panic on an obvious deadlock - the function can't be called from
167 	 * a thread which is doing the whole SYSINIT stuff.
168 	 */
169 	KASSERT(curthread->td_proc->p_pid != 0,
170 	    ("root_mount_wait: cannot be called from the swapper thread"));
171 	mtx_lock(&mountlist_mtx);
172 	while (!root_mount_complete) {
173 		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
174 		    hz);
175 	}
176 	mtx_unlock(&mountlist_mtx);
177 }
178 
179 static void
180 set_rootvnode(void)
181 {
182 	struct proc *p;
183 
184 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
185 		panic("Cannot find root vnode");
186 
187 	VOP_UNLOCK(rootvnode, 0);
188 
189 	p = curthread->td_proc;
190 	FILEDESC_XLOCK(p->p_fd);
191 
192 	if (p->p_fd->fd_cdir != NULL)
193 		vrele(p->p_fd->fd_cdir);
194 	p->p_fd->fd_cdir = rootvnode;
195 	VREF(rootvnode);
196 
197 	if (p->p_fd->fd_rdir != NULL)
198 		vrele(p->p_fd->fd_rdir);
199 	p->p_fd->fd_rdir = rootvnode;
200 	VREF(rootvnode);
201 
202 	FILEDESC_XUNLOCK(p->p_fd);
203 }
204 
205 static int
206 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
207 {
208 	struct vfsoptlist *opts;
209 	struct vfsconf *vfsp;
210 	struct mount *mp;
211 	int error;
212 
213 	*mpp = NULL;
214 
215 	vfsp = vfs_byname("devfs");
216 	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
217 	if (vfsp == NULL)
218 		return (ENOENT);
219 
220 	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
221 
222 	error = VFS_MOUNT(mp);
223 	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
224 	if (error)
225 		return (error);
226 
227 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
228 	TAILQ_INIT(opts);
229 	mp->mnt_opt = opts;
230 
231 	mtx_lock(&mountlist_mtx);
232 	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
233 	mtx_unlock(&mountlist_mtx);
234 
235 	*mpp = mp;
236 	set_rootvnode();
237 
238 	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
239 	if (error)
240 		printf("kern_symlink /dev -> / returns %d\n", error);
241 
242 	return (error);
243 }
244 
245 static int
246 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
247 {
248 	struct nameidata nd;
249 	struct mount *mporoot, *mpnroot;
250 	struct vnode *vp, *vporoot, *vpdevfs;
251 	char *fspath;
252 	int error;
253 
254 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
255 
256 	/* Shuffle the mountlist. */
257 	mtx_lock(&mountlist_mtx);
258 	mporoot = TAILQ_FIRST(&mountlist);
259 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
260 	if (mporoot != mpdevfs) {
261 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
262 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
263 	}
264 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
265 	mtx_unlock(&mountlist_mtx);
266 
267 	cache_purgevfs(mporoot);
268 	if (mporoot != mpdevfs)
269 		cache_purgevfs(mpdevfs);
270 
271 	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
272 
273 	VI_LOCK(vporoot);
274 	vporoot->v_iflag &= ~VI_MOUNT;
275 	VI_UNLOCK(vporoot);
276 	vporoot->v_mountedhere = NULL;
277 	mporoot->mnt_flag &= ~MNT_ROOTFS;
278 	mporoot->mnt_vnodecovered = NULL;
279 	vput(vporoot);
280 
281 	/* Set up the new rootvnode, and purge the cache */
282 	mpnroot->mnt_vnodecovered = NULL;
283 	set_rootvnode();
284 	cache_purgevfs(rootvnode->v_mount);
285 
286 	if (mporoot != mpdevfs) {
287 		/* Remount old root under /.mount or /mnt */
288 		fspath = "/.mount";
289 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
290 		    fspath, td);
291 		error = namei(&nd);
292 		if (error) {
293 			NDFREE(&nd, NDF_ONLY_PNBUF);
294 			fspath = "/mnt";
295 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
296 			    fspath, td);
297 			error = namei(&nd);
298 		}
299 		if (!error) {
300 			vp = nd.ni_vp;
301 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
302 			if (!error)
303 				error = vinvalbuf(vp, V_SAVE, 0, 0);
304 			if (!error) {
305 				cache_purge(vp);
306 				mporoot->mnt_vnodecovered = vp;
307 				vp->v_mountedhere = mporoot;
308 				strlcpy(mporoot->mnt_stat.f_mntonname,
309 				    fspath, MNAMELEN);
310 				VOP_UNLOCK(vp, 0);
311 			} else
312 				vput(vp);
313 		}
314 		NDFREE(&nd, NDF_ONLY_PNBUF);
315 
316 		if (error && bootverbose)
317 			printf("mountroot: unable to remount previous root "
318 			    "under /.mount or /mnt (error %d).\n", error);
319 	}
320 
321 	/* Remount devfs under /dev */
322 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
323 	error = namei(&nd);
324 	if (!error) {
325 		vp = nd.ni_vp;
326 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
327 		if (!error)
328 			error = vinvalbuf(vp, V_SAVE, 0, 0);
329 		if (!error) {
330 			vpdevfs = mpdevfs->mnt_vnodecovered;
331 			if (vpdevfs != NULL) {
332 				cache_purge(vpdevfs);
333 				vpdevfs->v_mountedhere = NULL;
334 				vrele(vpdevfs);
335 			}
336 			mpdevfs->mnt_vnodecovered = vp;
337 			vp->v_mountedhere = mpdevfs;
338 			VOP_UNLOCK(vp, 0);
339 		} else
340 			vput(vp);
341 	}
342 	if (error && bootverbose)
343 		printf("mountroot: unable to remount devfs under /dev "
344 		    "(error %d).\n", error);
345 	NDFREE(&nd, NDF_ONLY_PNBUF);
346 
347 	if (mporoot == mpdevfs) {
348 		vfs_unbusy(mpdevfs);
349 		/* Unlink the no longer needed /dev/dev -> / symlink */
350 		error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
351 		if (error && bootverbose)
352 			printf("mountroot: unable to unlink /dev/dev "
353 			    "(error %d)\n", error);
354 	}
355 
356 	return (0);
357 }
358 
359 /*
360  * Configuration parser.
361  */
362 
363 /* Parser character classes. */
364 #define	CC_WHITESPACE		-1
365 #define	CC_NONWHITESPACE	-2
366 
367 /* Parse errors. */
368 #define	PE_EOF			-1
369 #define	PE_EOL			-2
370 
371 static __inline int
372 parse_peek(char **conf)
373 {
374 
375 	return (**conf);
376 }
377 
378 static __inline void
379 parse_poke(char **conf, int c)
380 {
381 
382 	**conf = c;
383 }
384 
385 static __inline void
386 parse_advance(char **conf)
387 {
388 
389 	(*conf)++;
390 }
391 
392 static int
393 parse_skipto(char **conf, int mc)
394 {
395 	int c, match;
396 
397 	while (1) {
398 		c = parse_peek(conf);
399 		if (c == 0)
400 			return (PE_EOF);
401 		switch (mc) {
402 		case CC_WHITESPACE:
403 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
404 			break;
405 		case CC_NONWHITESPACE:
406 			if (c == '\n')
407 				return (PE_EOL);
408 			match = (c != ' ' && c != '\t') ? 1 : 0;
409 			break;
410 		default:
411 			match = (c == mc) ? 1 : 0;
412 			break;
413 		}
414 		if (match)
415 			break;
416 		parse_advance(conf);
417 	}
418 	return (0);
419 }
420 
421 static int
422 parse_token(char **conf, char **tok)
423 {
424 	char *p;
425 	size_t len;
426 	int error;
427 
428 	*tok = NULL;
429 	error = parse_skipto(conf, CC_NONWHITESPACE);
430 	if (error)
431 		return (error);
432 	p = *conf;
433 	error = parse_skipto(conf, CC_WHITESPACE);
434 	len = *conf - p;
435 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
436 	bcopy(p, *tok, len);
437 	return (0);
438 }
439 
440 static void
441 parse_dir_ask_printenv(const char *var)
442 {
443 	char *val;
444 
445 	val = getenv(var);
446 	if (val != NULL) {
447 		printf("  %s=%s\n", var, val);
448 		freeenv(val);
449 	}
450 }
451 
452 static int
453 parse_dir_ask(char **conf)
454 {
455 	char name[80];
456 	char *mnt;
457 	int error;
458 
459 	printf("\nLoader variables:\n");
460 	parse_dir_ask_printenv("vfs.root.mountfrom");
461 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
462 
463 	printf("\nManual root filesystem specification:\n");
464 	printf("  <fstype>:<device> [options]\n");
465 	printf("      Mount <device> using filesystem <fstype>\n");
466 	printf("      and with the specified (optional) option list.\n");
467 	printf("\n");
468 	printf("    eg. ufs:/dev/da0s1a\n");
469 	printf("        zfs:tank\n");
470 	printf("        cd9660:/dev/acd0 ro\n");
471 	printf("          (which is equivalent to: ");
472 	printf("mount -t cd9660 -o ro /dev/acd0 /)\n");
473 	printf("\n");
474 	printf("  ?               List valid disk boot devices\n");
475 	printf("  .               Yield 1 second (for background tasks)\n");
476 	printf("  <empty line>    Abort manual input\n");
477 
478 	do {
479 		error = EINVAL;
480 		printf("\nmountroot> ");
481 		cngets(name, sizeof(name), GETS_ECHO);
482 		if (name[0] == '\0')
483 			break;
484 		if (name[0] == '?' && name[1] == '\0') {
485 			printf("\nList of GEOM managed disk devices:\n  ");
486 			g_dev_print();
487 			continue;
488 		}
489 		if (name[0] == '.' && name[1] == '\0') {
490 			pause("rmask", hz);
491 			continue;
492 		}
493 		mnt = name;
494 		error = parse_mount(&mnt);
495 		if (error == -1)
496 			printf("Invalid file system specification.\n");
497 	} while (error != 0);
498 
499 	return (error);
500 }
501 
502 static int
503 parse_dir_md(char **conf)
504 {
505 	struct stat sb;
506 	struct thread *td;
507 	struct md_ioctl *mdio;
508 	char *path, *tok;
509 	int error, fd, len;
510 
511 	td = curthread;
512 
513 	error = parse_token(conf, &tok);
514 	if (error)
515 		return (error);
516 
517 	len = strlen(tok);
518 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
519 	path = (void *)(mdio + 1);
520 	bcopy(tok, path, len);
521 	free(tok, M_TEMP);
522 
523 	/* Get file status. */
524 	error = kern_stat(td, path, UIO_SYSSPACE, &sb);
525 	if (error)
526 		goto out;
527 
528 	/* Open /dev/mdctl so that we can attach/detach. */
529 	error = kern_open(td, "/dev/" MDCTL_NAME, UIO_SYSSPACE, O_RDWR, 0);
530 	if (error)
531 		goto out;
532 
533 	fd = td->td_retval[0];
534 	mdio->md_version = MDIOVERSION;
535 	mdio->md_type = MD_VNODE;
536 
537 	if (root_mount_mddev != -1) {
538 		mdio->md_unit = root_mount_mddev;
539 		DROP_GIANT();
540 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
541 		PICKUP_GIANT();
542 		/* Ignore errors. We don't care. */
543 		root_mount_mddev = -1;
544 	}
545 
546 	mdio->md_file = (void *)(mdio + 1);
547 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
548 	mdio->md_mediasize = sb.st_size;
549 	mdio->md_unit = 0;
550 	DROP_GIANT();
551 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
552 	PICKUP_GIANT();
553 	if (error)
554 		goto out;
555 
556 	if (mdio->md_unit > 9) {
557 		printf("rootmount: too many md units\n");
558 		mdio->md_file = NULL;
559 		mdio->md_options = 0;
560 		mdio->md_mediasize = 0;
561 		DROP_GIANT();
562 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
563 		PICKUP_GIANT();
564 		/* Ignore errors. We don't care. */
565 		error = ERANGE;
566 		goto out;
567 	}
568 
569 	root_mount_mddev = mdio->md_unit;
570 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
571 
572 	error = kern_close(td, fd);
573 
574  out:
575 	free(mdio, M_TEMP);
576 	return (error);
577 }
578 
579 static int
580 parse_dir_onfail(char **conf)
581 {
582 	char *action;
583 	int error;
584 
585 	error = parse_token(conf, &action);
586 	if (error)
587 		return (error);
588 
589 	if (!strcmp(action, "continue"))
590 		root_mount_onfail = A_CONTINUE;
591 	else if (!strcmp(action, "panic"))
592 		root_mount_onfail = A_PANIC;
593 	else if (!strcmp(action, "reboot"))
594 		root_mount_onfail = A_REBOOT;
595 	else if (!strcmp(action, "retry"))
596 		root_mount_onfail = A_RETRY;
597 	else {
598 		printf("rootmount: %s: unknown action\n", action);
599 		error = EINVAL;
600 	}
601 
602 	free(action, M_TEMP);
603 	return (0);
604 }
605 
606 static int
607 parse_dir_timeout(char **conf)
608 {
609 	char *tok, *endtok;
610 	long secs;
611 	int error;
612 
613 	error = parse_token(conf, &tok);
614 	if (error)
615 		return (error);
616 
617 	secs = strtol(tok, &endtok, 0);
618 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
619 	if (!error)
620 		root_mount_timeout = secs;
621 	free(tok, M_TEMP);
622 	return (error);
623 }
624 
625 static int
626 parse_directive(char **conf)
627 {
628 	char *dir;
629 	int error;
630 
631 	error = parse_token(conf, &dir);
632 	if (error)
633 		return (error);
634 
635 	if (strcmp(dir, ".ask") == 0)
636 		error = parse_dir_ask(conf);
637 	else if (strcmp(dir, ".md") == 0)
638 		error = parse_dir_md(conf);
639 	else if (strcmp(dir, ".onfail") == 0)
640 		error = parse_dir_onfail(conf);
641 	else if (strcmp(dir, ".timeout") == 0)
642 		error = parse_dir_timeout(conf);
643 	else {
644 		printf("mountroot: invalid directive `%s'\n", dir);
645 		/* Ignore the rest of the line. */
646 		(void)parse_skipto(conf, '\n');
647 		error = EINVAL;
648 	}
649 	free(dir, M_TEMP);
650 	return (error);
651 }
652 
653 static int
654 parse_mount_dev_present(const char *dev)
655 {
656 	struct nameidata nd;
657 	int error;
658 
659 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
660 	error = namei(&nd);
661 	if (!error)
662 		vput(nd.ni_vp);
663 	NDFREE(&nd, NDF_ONLY_PNBUF);
664 	return (error != 0) ? 0 : 1;
665 }
666 
667 #define	ERRMSGL	255
668 static int
669 parse_mount(char **conf)
670 {
671 	char *errmsg;
672 	struct mntarg *ma;
673 	char *dev, *fs, *opts, *tok;
674 	int delay, error, timeout;
675 
676 	error = parse_token(conf, &tok);
677 	if (error)
678 		return (error);
679 	fs = tok;
680 	error = parse_skipto(&tok, ':');
681 	if (error) {
682 		free(fs, M_TEMP);
683 		return (error);
684 	}
685 	parse_poke(&tok, '\0');
686 	parse_advance(&tok);
687 	dev = tok;
688 
689 	if (root_mount_mddev != -1) {
690 		/* Handle substitution for the md unit number. */
691 		tok = strstr(dev, "md#");
692 		if (tok != NULL)
693 			tok[2] = '0' + root_mount_mddev;
694 	}
695 
696 	/* Parse options. */
697 	error = parse_token(conf, &tok);
698 	opts = (error == 0) ? tok : NULL;
699 
700 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
701 	    (opts != NULL) ? opts : "");
702 
703 	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
704 
705 	if (vfs_byname(fs) == NULL) {
706 		strlcpy(errmsg, "unknown file system", ERRMSGL);
707 		error = ENOENT;
708 		goto out;
709 	}
710 
711 	if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
712 	    dev[0] != '\0' && !parse_mount_dev_present(dev)) {
713 		printf("mountroot: waiting for device %s ...\n", dev);
714 		delay = hz / 10;
715 		timeout = root_mount_timeout * hz;
716 		do {
717 			pause("rmdev", delay);
718 			timeout -= delay;
719 		} while (timeout > 0 && !parse_mount_dev_present(dev));
720 		if (timeout <= 0) {
721 			error = ENODEV;
722 			goto out;
723 		}
724 	}
725 
726 	ma = NULL;
727 	ma = mount_arg(ma, "fstype", fs, -1);
728 	ma = mount_arg(ma, "fspath", "/", -1);
729 	ma = mount_arg(ma, "from", dev, -1);
730 	ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
731 	ma = mount_arg(ma, "ro", NULL, 0);
732 	ma = parse_mountroot_options(ma, opts);
733 	error = kernel_mount(ma, MNT_ROOTFS);
734 
735  out:
736 	if (error) {
737 		printf("Mounting from %s:%s failed with error %d",
738 		    fs, dev, error);
739 		if (errmsg[0] != '\0')
740 			printf(": %s", errmsg);
741 		printf(".\n");
742 	}
743 	free(fs, M_TEMP);
744 	free(errmsg, M_TEMP);
745 	if (opts != NULL)
746 		free(opts, M_TEMP);
747 	/* kernel_mount can return -1 on error. */
748 	return ((error < 0) ? EDOOFUS : error);
749 }
750 #undef ERRMSGL
751 
752 static int
753 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
754 {
755 	struct mount *mp;
756 	char *conf;
757 	int error;
758 
759 	root_mount_mddev = -1;
760 
761 retry:
762 	conf = sbuf_data(sb);
763 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
764 	error = (mp == NULL) ? 0 : EDOOFUS;
765 	root_mount_onfail = A_CONTINUE;
766 	while (mp == NULL) {
767 		error = parse_skipto(&conf, CC_NONWHITESPACE);
768 		if (error == PE_EOL) {
769 			parse_advance(&conf);
770 			continue;
771 		}
772 		if (error < 0)
773 			break;
774 		switch (parse_peek(&conf)) {
775 		case '#':
776 			error = parse_skipto(&conf, '\n');
777 			break;
778 		case '.':
779 			error = parse_directive(&conf);
780 			break;
781 		default:
782 			error = parse_mount(&conf);
783 			break;
784 		}
785 		if (error < 0)
786 			break;
787 		/* Ignore any trailing garbage on the line. */
788 		if (parse_peek(&conf) != '\n') {
789 			printf("mountroot: advancing to next directive...\n");
790 			(void)parse_skipto(&conf, '\n');
791 		}
792 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
793 	}
794 	if (mp != NULL)
795 		return (0);
796 
797 	/*
798 	 * We failed to mount (a new) root.
799 	 */
800 	switch (root_mount_onfail) {
801 	case A_CONTINUE:
802 		break;
803 	case A_PANIC:
804 		panic("mountroot: unable to (re-)mount root.");
805 		/* NOTREACHED */
806 	case A_RETRY:
807 		goto retry;
808 	case A_REBOOT:
809 		kern_reboot(RB_NOSYNC);
810 		/* NOTREACHED */
811 	}
812 
813 	return (error);
814 }
815 
816 static void
817 vfs_mountroot_conf0(struct sbuf *sb)
818 {
819 	char *s, *tok, *mnt, *opt;
820 	int error;
821 
822 	sbuf_printf(sb, ".onfail panic\n");
823 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
824 	if (boothowto & RB_ASKNAME)
825 		sbuf_printf(sb, ".ask\n");
826 #ifdef ROOTDEVNAME
827 	if (boothowto & RB_DFLTROOT)
828 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
829 #endif
830 	if (boothowto & RB_CDROM) {
831 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
832 		sbuf_printf(sb, ".timeout 0\n");
833 		sbuf_printf(sb, "cd9660:/dev/acd0 ro\n");
834 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
835 	}
836 	s = getenv("vfs.root.mountfrom");
837 	if (s != NULL) {
838 		opt = getenv("vfs.root.mountfrom.options");
839 		tok = s;
840 		error = parse_token(&tok, &mnt);
841 		while (!error) {
842 			sbuf_printf(sb, "%s %s\n", mnt,
843 			    (opt != NULL) ? opt : "");
844 			free(mnt, M_TEMP);
845 			error = parse_token(&tok, &mnt);
846 		}
847 		if (opt != NULL)
848 			freeenv(opt);
849 		freeenv(s);
850 	}
851 	if (rootdevnames[0] != NULL)
852 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
853 	if (rootdevnames[1] != NULL)
854 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
855 #ifdef ROOTDEVNAME
856 	if (!(boothowto & RB_DFLTROOT))
857 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
858 #endif
859 	if (!(boothowto & RB_ASKNAME))
860 		sbuf_printf(sb, ".ask\n");
861 }
862 
863 static int
864 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
865 {
866 	static char buf[128];
867 	struct nameidata nd;
868 	off_t ofs;
869 	ssize_t resid;
870 	int error, flags, len;
871 
872 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
873 	flags = FREAD;
874 	error = vn_open(&nd, &flags, 0, NULL);
875 	if (error)
876 		return (error);
877 
878 	NDFREE(&nd, NDF_ONLY_PNBUF);
879 	ofs = 0;
880 	len = sizeof(buf) - 1;
881 	while (1) {
882 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
883 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
884 		    NOCRED, &resid, td);
885 		if (error)
886 			break;
887 		if (resid == len)
888 			break;
889 		buf[len - resid] = 0;
890 		sbuf_printf(sb, "%s", buf);
891 		ofs += len - resid;
892 	}
893 
894 	VOP_UNLOCK(nd.ni_vp, 0);
895 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
896 	return (error);
897 }
898 
899 static void
900 vfs_mountroot_wait(void)
901 {
902 	struct root_hold_token *h;
903 	struct timeval lastfail;
904 	int curfail;
905 
906 	curfail = 0;
907 	while (1) {
908 		DROP_GIANT();
909 		g_waitidle();
910 		PICKUP_GIANT();
911 		mtx_lock(&mountlist_mtx);
912 		if (LIST_EMPTY(&root_holds)) {
913 			mtx_unlock(&mountlist_mtx);
914 			break;
915 		}
916 		if (ppsratecheck(&lastfail, &curfail, 1)) {
917 			printf("Root mount waiting for:");
918 			LIST_FOREACH(h, &root_holds, list)
919 				printf(" %s", h->who);
920 			printf("\n");
921 		}
922 		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
923 		    hz);
924 	}
925 }
926 
927 void
928 vfs_mountroot(void)
929 {
930 	struct mount *mp;
931 	struct sbuf *sb;
932 	struct thread *td;
933 	time_t timebase;
934 	int error;
935 
936 	td = curthread;
937 
938 	vfs_mountroot_wait();
939 
940 	sb = sbuf_new_auto();
941 	vfs_mountroot_conf0(sb);
942 	sbuf_finish(sb);
943 
944 	error = vfs_mountroot_devfs(td, &mp);
945 	while (!error) {
946 		error = vfs_mountroot_parse(sb, mp);
947 		if (!error) {
948 			error = vfs_mountroot_shuffle(td, mp);
949 			if (!error) {
950 				sbuf_clear(sb);
951 				error = vfs_mountroot_readconf(td, sb);
952 				sbuf_finish(sb);
953 			}
954 		}
955 	}
956 
957 	sbuf_delete(sb);
958 
959 	/*
960 	 * Iterate over all currently mounted file systems and use
961 	 * the time stamp found to check and/or initialize the RTC.
962 	 * Call inittodr() only once and pass it the largest of the
963 	 * timestamps we encounter.
964 	 */
965 	timebase = 0;
966 	mtx_lock(&mountlist_mtx);
967 	mp = TAILQ_FIRST(&mountlist);
968 	while (mp != NULL) {
969 		if (mp->mnt_time > timebase)
970 			timebase = mp->mnt_time;
971 		mp = TAILQ_NEXT(mp, mnt_list);
972 	}
973 	mtx_unlock(&mountlist_mtx);
974 	inittodr(timebase);
975 
976 	/* Keep prison0's root in sync with the global rootvnode. */
977 	mtx_lock(&prison0.pr_mtx);
978 	prison0.pr_root = rootvnode;
979 	vref(prison0.pr_root);
980 	mtx_unlock(&prison0.pr_mtx);
981 
982 	mtx_lock(&mountlist_mtx);
983 	atomic_store_rel_int(&root_mount_complete, 1);
984 	wakeup(&root_mount_complete);
985 	mtx_unlock(&mountlist_mtx);
986 
987 	EVENTHANDLER_INVOKE(mountroot);
988 }
989 
990 static struct mntarg *
991 parse_mountroot_options(struct mntarg *ma, const char *options)
992 {
993 	char *p;
994 	char *name, *name_arg;
995 	char *val, *val_arg;
996 	char *opts;
997 
998 	if (options == NULL || options[0] == '\0')
999 		return (ma);
1000 
1001 	p = opts = strdup(options, M_MOUNT);
1002 	if (opts == NULL) {
1003 		return (ma);
1004 	}
1005 
1006 	while((name = strsep(&p, ",")) != NULL) {
1007 		if (name[0] == '\0')
1008 			break;
1009 
1010 		val = strchr(name, '=');
1011 		if (val != NULL) {
1012 			*val = '\0';
1013 			++val;
1014 		}
1015 		if( strcmp(name, "rw") == 0 ||
1016 		    strcmp(name, "noro") == 0) {
1017 			/*
1018 			 * The first time we mount the root file system,
1019 			 * we need to mount 'ro', so We need to ignore
1020 			 * 'rw' and 'noro' mount options.
1021 			 */
1022 			continue;
1023 		}
1024 		name_arg = strdup(name, M_MOUNT);
1025 		val_arg = NULL;
1026 		if (val != NULL)
1027 			val_arg = strdup(val, M_MOUNT);
1028 
1029 		ma = mount_arg(ma, name_arg, val_arg,
1030 		    (val_arg != NULL ? -1 : 0));
1031 	}
1032 	free(opts, M_MOUNT);
1033 	return (ma);
1034 }
1035