1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2010 Marcel Moolenaar
5 * Copyright (c) 1999-2004 Poul-Henning Kamp
6 * Copyright (c) 1999 Michael Smith
7 * Copyright (c) 1989, 1993
8 * The Regents of the University of California. All rights reserved.
9 * (c) UNIX System Laboratories, Inc.
10 * All or some portions of this file are derived from material licensed
11 * to the University of California by American Telephone and Telegraph
12 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
13 * the permission of UNIX System Laboratories, Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include "opt_rootdevname.h"
41
42 #include <sys/param.h>
43 #include <sys/conf.h>
44 #include <sys/cons.h>
45 #include <sys/eventhandler.h>
46 #include <sys/fcntl.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/mdioctl.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/namei.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/filedesc.h>
57 #include <sys/reboot.h>
58 #include <sys/sbuf.h>
59 #include <sys/stat.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysproto.h>
62 #include <sys/sx.h>
63 #include <sys/sysctl.h>
64 #include <sys/systm.h>
65 #include <sys/vnode.h>
66
67 #include <geom/geom.h>
68
69 /*
70 * The root filesystem is detailed in the kernel environment variable
71 * vfs.root.mountfrom, which is expected to be in the general format
72 *
73 * <vfsname>:[<path>][ <vfsname>:[<path>] ...]
74 * vfsname := the name of a VFS known to the kernel and capable
75 * of being mounted as root
76 * path := disk device name or other data used by the filesystem
77 * to locate its physical store
78 *
79 * If the environment variable vfs.root.mountfrom is a space separated list,
80 * each list element is tried in turn and the root filesystem will be mounted
81 * from the first one that succeeds.
82 *
83 * The environment variable vfs.root.mountfrom.options is a comma delimited
84 * set of string mount options. These mount options must be parseable
85 * by nmount() in the kernel.
86 */
87
88 static int parse_mount(char **);
89 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
90 static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS);
91 static void vfs_mountroot_wait(void);
92 static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev);
93
94 /*
95 * The vnode of the system's root (/ in the filesystem, without chroot
96 * active.)
97 */
98 struct vnode *rootvnode;
99
100 /*
101 * Mount of the system's /dev.
102 */
103 struct mount *rootdevmp;
104
105 char *rootdevnames[2] = {NULL, NULL};
106
107 struct mtx root_holds_mtx;
108 MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
109
110 static TAILQ_HEAD(, root_hold_token) root_holds =
111 TAILQ_HEAD_INITIALIZER(root_holds);
112
113 enum action {
114 A_CONTINUE,
115 A_PANIC,
116 A_REBOOT,
117 A_RETRY
118 };
119
120 enum rh_flags {
121 RH_FREE,
122 RH_ALLOC,
123 RH_ARG,
124 };
125
126 static enum action root_mount_onfail = A_CONTINUE;
127
128 static int root_mount_mddev;
129 static int root_mount_complete;
130
131 /* By default wait up to 3 seconds for devices to appear. */
132 static int root_mount_timeout = 3;
133 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
134
135 static int root_mount_always_wait = 0;
136 SYSCTL_INT(_vfs, OID_AUTO, root_mount_always_wait, CTLFLAG_RDTUN,
137 &root_mount_always_wait, 0,
138 "Wait for root mount holds even if the root device already exists");
139
140 SYSCTL_PROC(_vfs, OID_AUTO, root_mount_hold,
141 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
142 NULL, 0, sysctl_vfs_root_mount_hold, "A",
143 "List of root mount hold tokens");
144
145 static int
sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS)146 sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS)
147 {
148 struct sbuf sb;
149 struct root_hold_token *h;
150 int error;
151
152 sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL);
153
154 mtx_lock(&root_holds_mtx);
155 TAILQ_FOREACH(h, &root_holds, list) {
156 if (h != TAILQ_FIRST(&root_holds))
157 sbuf_putc(&sb, ' ');
158 sbuf_printf(&sb, "%s", h->who);
159 }
160 mtx_unlock(&root_holds_mtx);
161
162 error = sbuf_finish(&sb);
163 if (error == 0)
164 error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
165 sbuf_delete(&sb);
166 return (error);
167 }
168
169 struct root_hold_token *
root_mount_hold(const char * identifier)170 root_mount_hold(const char *identifier)
171 {
172 struct root_hold_token *h;
173
174 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
175 h->flags = RH_ALLOC;
176 h->who = identifier;
177 mtx_lock(&root_holds_mtx);
178 TSHOLD("root mount");
179 TAILQ_INSERT_TAIL(&root_holds, h, list);
180 mtx_unlock(&root_holds_mtx);
181 return (h);
182 }
183
184 void
root_mount_hold_token(const char * identifier,struct root_hold_token * h)185 root_mount_hold_token(const char *identifier, struct root_hold_token *h)
186 {
187 #ifdef INVARIANTS
188 struct root_hold_token *t;
189 #endif
190
191 h->flags = RH_ARG;
192 h->who = identifier;
193 mtx_lock(&root_holds_mtx);
194 #ifdef INVARIANTS
195 TAILQ_FOREACH(t, &root_holds, list) {
196 if (t == h) {
197 panic("Duplicate mount hold by '%s' on %p",
198 identifier, h);
199 }
200 }
201 #endif
202 TSHOLD("root mount");
203 TAILQ_INSERT_TAIL(&root_holds, h, list);
204 mtx_unlock(&root_holds_mtx);
205 }
206
207 void
root_mount_rel(struct root_hold_token * h)208 root_mount_rel(struct root_hold_token *h)
209 {
210
211 if (h == NULL || h->flags == RH_FREE)
212 return;
213
214 mtx_lock(&root_holds_mtx);
215 TAILQ_REMOVE(&root_holds, h, list);
216 TSRELEASE("root mount");
217 wakeup(&root_holds);
218 mtx_unlock(&root_holds_mtx);
219 if (h->flags == RH_ALLOC) {
220 free(h, M_DEVBUF);
221 } else
222 h->flags = RH_FREE;
223 }
224
225 int
root_mounted(void)226 root_mounted(void)
227 {
228
229 /* No mutex is acquired here because int stores are atomic. */
230 return (root_mount_complete);
231 }
232
233 static void
set_rootvnode(void)234 set_rootvnode(void)
235 {
236
237 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
238 panic("set_rootvnode: Cannot find root vnode");
239
240 VOP_UNLOCK(rootvnode);
241
242 pwd_set_rootvnode();
243 }
244
245 static int
vfs_mountroot_devfs(struct thread * td,struct mount ** mpp)246 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
247 {
248 struct vfsoptlist *opts;
249 struct vfsconf *vfsp;
250 struct mount *mp;
251 int error;
252
253 *mpp = NULL;
254
255 if (rootdevmp != NULL) {
256 /*
257 * Already have /dev; this happens during rerooting.
258 */
259 error = vfs_busy(rootdevmp, 0);
260 if (error != 0)
261 return (error);
262 *mpp = rootdevmp;
263 } else {
264 vfsp = vfs_byname("devfs");
265 KASSERT(vfsp != NULL, ("Could not find devfs by name"));
266 if (vfsp == NULL)
267 return (ENOENT);
268
269 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
270
271 error = VFS_MOUNT(mp);
272 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
273 if (error)
274 return (error);
275
276 error = VFS_STATFS(mp, &mp->mnt_stat);
277 KASSERT(error == 0, ("VFS_STATFS(devfs) failed %d", error));
278 if (error)
279 return (error);
280
281 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
282 TAILQ_INIT(opts);
283 mp->mnt_opt = opts;
284
285 mtx_lock(&mountlist_mtx);
286 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
287 mtx_unlock(&mountlist_mtx);
288
289 *mpp = mp;
290 rootdevmp = mp;
291 vfs_op_exit(mp);
292 }
293
294 set_rootvnode();
295
296 error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
297 if (error)
298 printf("kern_symlink /dev -> / returns %d\n", error);
299
300 return (error);
301 }
302
303 static void
vfs_mountroot_shuffle(struct thread * td,struct mount * mpdevfs)304 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
305 {
306 struct nameidata nd;
307 struct mount *mporoot, *mpnroot;
308 struct vnode *vp, *vporoot, *vpdevfs;
309 char *fspath;
310 int error;
311
312 mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
313
314 /* Shuffle the mountlist. */
315 mtx_lock(&mountlist_mtx);
316 mporoot = TAILQ_FIRST(&mountlist);
317 TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
318 if (mporoot != mpdevfs) {
319 TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
320 TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
321 }
322 TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
323 mtx_unlock(&mountlist_mtx);
324
325 cache_purgevfs(mporoot);
326 if (mporoot != mpdevfs)
327 cache_purgevfs(mpdevfs);
328
329 if (VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot))
330 panic("vfs_mountroot_shuffle: Cannot find root vnode");
331
332 VI_LOCK(vporoot);
333 vporoot->v_iflag &= ~VI_MOUNT;
334 vn_irflag_unset_locked(vporoot, VIRF_MOUNTPOINT);
335 vporoot->v_mountedhere = NULL;
336 VI_UNLOCK(vporoot);
337 mporoot->mnt_flag &= ~MNT_ROOTFS;
338 mporoot->mnt_vnodecovered = NULL;
339 vput(vporoot);
340
341 /* Set up the new rootvnode, and purge the cache */
342 mpnroot->mnt_vnodecovered = NULL;
343 set_rootvnode();
344 cache_purgevfs(rootvnode->v_mount);
345
346 if (mporoot != mpdevfs) {
347 /* Remount old root under /.mount or /mnt */
348 fspath = "/.mount";
349 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath);
350 error = namei(&nd);
351 if (error) {
352 fspath = "/mnt";
353 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
354 fspath);
355 error = namei(&nd);
356 }
357 if (!error) {
358 NDFREE_PNBUF(&nd);
359 vp = nd.ni_vp;
360 error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
361 if (!error)
362 error = vinvalbuf(vp, V_SAVE, 0, 0);
363 if (!error) {
364 cache_purge(vp);
365 VI_LOCK(vp);
366 mporoot->mnt_vnodecovered = vp;
367 vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
368 vp->v_mountedhere = mporoot;
369 strlcpy(mporoot->mnt_stat.f_mntonname,
370 fspath, MNAMELEN);
371 VI_UNLOCK(vp);
372 VOP_UNLOCK(vp);
373 } else
374 vput(vp);
375 }
376
377 if (error)
378 printf("mountroot: unable to remount previous root "
379 "under /.mount or /mnt (error %d)\n", error);
380 }
381
382 /* Remount devfs under /dev */
383 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev");
384 error = namei(&nd);
385 if (!error) {
386 NDFREE_PNBUF(&nd);
387 vp = nd.ni_vp;
388 error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
389 if (!error)
390 error = vinvalbuf(vp, V_SAVE, 0, 0);
391 if (!error) {
392 vpdevfs = mpdevfs->mnt_vnodecovered;
393 if (vpdevfs != NULL) {
394 cache_purge(vpdevfs);
395 VI_LOCK(vpdevfs);
396 vn_irflag_unset_locked(vpdevfs, VIRF_MOUNTPOINT);
397 vpdevfs->v_mountedhere = NULL;
398 VI_UNLOCK(vpdevfs);
399 vrele(vpdevfs);
400 }
401 VI_LOCK(vp);
402 mpdevfs->mnt_vnodecovered = vp;
403 vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
404 vp->v_mountedhere = mpdevfs;
405 VI_UNLOCK(vp);
406 VOP_UNLOCK(vp);
407 } else
408 vput(vp);
409 }
410 if (error)
411 printf("mountroot: unable to remount devfs under /dev "
412 "(error %d)\n", error);
413
414 if (mporoot == mpdevfs) {
415 vfs_unbusy(mpdevfs);
416 /* Unlink the no longer needed /dev/dev -> / symlink */
417 error = kern_funlinkat(td, AT_FDCWD, "/dev/dev", FD_NONE,
418 UIO_SYSSPACE, 0, 0);
419 if (error)
420 printf("mountroot: unable to unlink /dev/dev "
421 "(error %d)\n", error);
422 }
423 }
424
425 /*
426 * Configuration parser.
427 */
428
429 /* Parser character classes. */
430 #define CC_WHITESPACE -1
431 #define CC_NONWHITESPACE -2
432
433 /* Parse errors. */
434 #define PE_EOF -1
435 #define PE_EOL -2
436
437 static __inline int
parse_peek(char ** conf)438 parse_peek(char **conf)
439 {
440
441 return (**conf);
442 }
443
444 static __inline void
parse_poke(char ** conf,int c)445 parse_poke(char **conf, int c)
446 {
447
448 **conf = c;
449 }
450
451 static __inline void
parse_advance(char ** conf)452 parse_advance(char **conf)
453 {
454
455 (*conf)++;
456 }
457
458 static int
parse_skipto(char ** conf,int mc)459 parse_skipto(char **conf, int mc)
460 {
461 int c, match;
462
463 while (1) {
464 c = parse_peek(conf);
465 if (c == 0)
466 return (PE_EOF);
467 switch (mc) {
468 case CC_WHITESPACE:
469 match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
470 break;
471 case CC_NONWHITESPACE:
472 if (c == '\n')
473 return (PE_EOL);
474 match = (c != ' ' && c != '\t') ? 1 : 0;
475 break;
476 default:
477 match = (c == mc) ? 1 : 0;
478 break;
479 }
480 if (match)
481 break;
482 parse_advance(conf);
483 }
484 return (0);
485 }
486
487 static int
parse_token(char ** conf,char ** tok)488 parse_token(char **conf, char **tok)
489 {
490 char *p;
491 size_t len;
492 int error;
493
494 *tok = NULL;
495 error = parse_skipto(conf, CC_NONWHITESPACE);
496 if (error)
497 return (error);
498 p = *conf;
499 error = parse_skipto(conf, CC_WHITESPACE);
500 len = *conf - p;
501 *tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
502 bcopy(p, *tok, len);
503 return (0);
504 }
505
506 static void
parse_dir_ask_printenv(const char * var)507 parse_dir_ask_printenv(const char *var)
508 {
509 char *val;
510
511 val = kern_getenv(var);
512 if (val != NULL) {
513 printf(" %s=%s\n", var, val);
514 freeenv(val);
515 }
516 }
517
518 static int
parse_dir_ask(char ** conf)519 parse_dir_ask(char **conf)
520 {
521 char name[80];
522 char *mnt;
523 int error;
524
525 vfs_mountroot_wait();
526
527 printf("\nLoader variables:\n");
528 parse_dir_ask_printenv("vfs.root.mountfrom");
529 parse_dir_ask_printenv("vfs.root.mountfrom.options");
530
531 printf("\nManual root filesystem specification:\n");
532 printf(" <fstype>:<device> [options]\n");
533 printf(" Mount <device> using filesystem <fstype>\n");
534 printf(" and with the specified (optional) option list.\n");
535 printf("\n");
536 printf(" eg. ufs:/dev/da0s1a\n");
537 printf(" zfs:zroot/ROOT/default\n");
538 printf(" cd9660:/dev/cd0 ro\n");
539 printf(" (which is equivalent to: ");
540 printf("mount -t cd9660 -o ro /dev/cd0 /)\n");
541 printf("\n");
542 printf(" ? List valid disk boot devices\n");
543 printf(" . Yield 1 second (for background tasks)\n");
544 printf(" <empty line> Abort manual input\n");
545
546 do {
547 error = EINVAL;
548 printf("\nmountroot> ");
549 cngets(name, sizeof(name), GETS_ECHO);
550 if (name[0] == '\0')
551 break;
552 if (name[0] == '?' && name[1] == '\0') {
553 printf("\nList of GEOM managed disk devices:\n ");
554 g_dev_print();
555 continue;
556 }
557 if (name[0] == '.' && name[1] == '\0') {
558 pause("rmask", hz);
559 continue;
560 }
561 mnt = name;
562 error = parse_mount(&mnt);
563 if (error == -1)
564 printf("Invalid file system specification.\n");
565 } while (error != 0);
566
567 return (error);
568 }
569
570 static int
parse_dir_md(char ** conf)571 parse_dir_md(char **conf)
572 {
573 struct stat sb;
574 struct thread *td;
575 struct md_ioctl *mdio;
576 char *path, *tok;
577 int error, fd, len;
578
579 td = curthread;
580 fd = -1;
581
582 error = parse_token(conf, &tok);
583 if (error)
584 return (error);
585
586 len = strlen(tok);
587 mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
588 path = (void *)(mdio + 1);
589 bcopy(tok, path, len);
590 free(tok, M_TEMP);
591
592 /* Get file status. */
593 error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb);
594 if (error)
595 goto out;
596
597 /* Open /dev/mdctl so that we can attach/detach. */
598 error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
599 O_RDWR, 0);
600 if (error)
601 goto out;
602
603 fd = td->td_retval[0];
604 mdio->md_version = MDIOVERSION;
605 mdio->md_type = MD_VNODE;
606
607 if (root_mount_mddev != -1) {
608 mdio->md_unit = root_mount_mddev;
609 (void)kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
610 /* Ignore errors. We don't care. */
611 root_mount_mddev = -1;
612 }
613
614 mdio->md_file = (void *)(mdio + 1);
615 mdio->md_options = MD_AUTOUNIT | MD_READONLY;
616 mdio->md_mediasize = sb.st_size;
617 mdio->md_unit = 0;
618 error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
619 if (error)
620 goto out;
621
622 if (mdio->md_unit > 9) {
623 printf("rootmount: too many md units\n");
624 mdio->md_file = NULL;
625 mdio->md_options = 0;
626 mdio->md_mediasize = 0;
627 error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
628 /* Ignore errors. We don't care. */
629 error = ERANGE;
630 goto out;
631 }
632
633 root_mount_mddev = mdio->md_unit;
634 printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
635
636 out:
637 if (fd >= 0)
638 (void)kern_close(td, fd);
639 free(mdio, M_TEMP);
640 return (error);
641 }
642
643 static int
parse_dir_onfail(char ** conf)644 parse_dir_onfail(char **conf)
645 {
646 char *action;
647 int error;
648
649 error = parse_token(conf, &action);
650 if (error)
651 return (error);
652
653 if (!strcmp(action, "continue"))
654 root_mount_onfail = A_CONTINUE;
655 else if (!strcmp(action, "panic"))
656 root_mount_onfail = A_PANIC;
657 else if (!strcmp(action, "reboot"))
658 root_mount_onfail = A_REBOOT;
659 else if (!strcmp(action, "retry"))
660 root_mount_onfail = A_RETRY;
661 else {
662 printf("rootmount: %s: unknown action\n", action);
663 error = EINVAL;
664 }
665
666 free(action, M_TEMP);
667 return (0);
668 }
669
670 static int
parse_dir_timeout(char ** conf)671 parse_dir_timeout(char **conf)
672 {
673 char *tok, *endtok;
674 long secs;
675 int error;
676
677 error = parse_token(conf, &tok);
678 if (error)
679 return (error);
680
681 secs = strtol(tok, &endtok, 0);
682 error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
683 if (!error)
684 root_mount_timeout = secs;
685 free(tok, M_TEMP);
686 return (error);
687 }
688
689 static int
parse_directive(char ** conf)690 parse_directive(char **conf)
691 {
692 char *dir;
693 int error;
694
695 error = parse_token(conf, &dir);
696 if (error)
697 return (error);
698
699 if (strcmp(dir, ".ask") == 0)
700 error = parse_dir_ask(conf);
701 else if (strcmp(dir, ".md") == 0)
702 error = parse_dir_md(conf);
703 else if (strcmp(dir, ".onfail") == 0)
704 error = parse_dir_onfail(conf);
705 else if (strcmp(dir, ".timeout") == 0)
706 error = parse_dir_timeout(conf);
707 else {
708 printf("mountroot: invalid directive `%s'\n", dir);
709 /* Ignore the rest of the line. */
710 (void)parse_skipto(conf, '\n');
711 error = EINVAL;
712 }
713 free(dir, M_TEMP);
714 return (error);
715 }
716
717 static bool
parse_mount_dev_present(const char * dev)718 parse_mount_dev_present(const char *dev)
719 {
720 struct nameidata nd;
721 int error;
722
723 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, dev);
724 error = namei(&nd);
725 if (error != 0)
726 return (false);
727 vrele(nd.ni_vp);
728 NDFREE_PNBUF(&nd);
729 return (true);
730 }
731
732 #define ERRMSGL 255
733 static int
parse_mount(char ** conf)734 parse_mount(char **conf)
735 {
736 char *errmsg;
737 struct mntarg *ma;
738 char *dev, *fs, *opts, *tok;
739 int delay, error, timeout;
740
741 error = parse_token(conf, &tok);
742 if (error)
743 return (error);
744 fs = tok;
745 error = parse_skipto(&tok, ':');
746 if (error) {
747 free(fs, M_TEMP);
748 return (error);
749 }
750 parse_poke(&tok, '\0');
751 parse_advance(&tok);
752 dev = tok;
753
754 if (root_mount_mddev != -1) {
755 /* Handle substitution for the md unit number. */
756 tok = strstr(dev, "md#");
757 if (tok != NULL)
758 tok[2] = '0' + root_mount_mddev;
759 }
760
761 /* Parse options. */
762 error = parse_token(conf, &tok);
763 opts = (error == 0) ? tok : NULL;
764
765 printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
766 (opts != NULL) ? opts : "");
767
768 errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
769
770 if (vfs_byname(fs) == NULL) {
771 strlcpy(errmsg, "unknown file system", ERRMSGL);
772 error = ENOENT;
773 goto out;
774 }
775
776 error = vfs_mountroot_wait_if_neccessary(fs, dev);
777 if (error != 0)
778 goto out;
779
780 delay = hz / 10;
781 timeout = root_mount_timeout * hz;
782
783 for (;;) {
784 ma = NULL;
785 ma = mount_arg(ma, "fstype", fs, -1);
786 ma = mount_arg(ma, "fspath", "/", -1);
787 ma = mount_arg(ma, "from", dev, -1);
788 ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
789 ma = mount_arg(ma, "ro", NULL, 0);
790 ma = parse_mountroot_options(ma, opts);
791
792 error = kernel_mount(ma, MNT_ROOTFS);
793 if (error == 0 || error == EILSEQ || timeout <= 0)
794 break;
795
796 if (root_mount_timeout * hz == timeout ||
797 (bootverbose && timeout % hz == 0)) {
798 printf("Mounting from %s:%s failed with error %d; "
799 "retrying for %d more second%s\n", fs, dev, error,
800 timeout / hz, (timeout / hz > 1) ? "s" : "");
801 }
802 pause("rmretry", delay);
803 timeout -= delay;
804 }
805 out:
806 if (error) {
807 printf("Mounting from %s:%s failed with error %d",
808 fs, dev, error);
809 if (errmsg[0] != '\0')
810 printf(": %s", errmsg);
811 printf(".\n");
812 }
813 free(fs, M_TEMP);
814 free(errmsg, M_TEMP);
815 if (opts != NULL)
816 free(opts, M_TEMP);
817 /* kernel_mount can return -1 on error. */
818 return ((error < 0) ? EDOOFUS : error);
819 }
820 #undef ERRMSGL
821
822 static int
vfs_mountroot_parse(struct sbuf * sb,struct mount * mpdevfs)823 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
824 {
825 struct mount *mp;
826 char *conf;
827 int error;
828
829 root_mount_mddev = -1;
830
831 retry:
832 conf = sbuf_data(sb);
833 mp = TAILQ_NEXT(mpdevfs, mnt_list);
834 error = (mp == NULL) ? 0 : EDOOFUS;
835 root_mount_onfail = A_CONTINUE;
836 while (mp == NULL) {
837 error = parse_skipto(&conf, CC_NONWHITESPACE);
838 if (error == PE_EOL) {
839 parse_advance(&conf);
840 continue;
841 }
842 if (error < 0)
843 break;
844 switch (parse_peek(&conf)) {
845 case '#':
846 error = parse_skipto(&conf, '\n');
847 break;
848 case '.':
849 error = parse_directive(&conf);
850 break;
851 default:
852 error = parse_mount(&conf);
853 if (error == -1) {
854 printf("mountroot: invalid file system "
855 "specification.\n");
856 error = 0;
857 }
858 break;
859 }
860 if (error < 0)
861 break;
862 /* Ignore any trailing garbage on the line. */
863 if (parse_peek(&conf) != '\n') {
864 printf("mountroot: advancing to next directive...\n");
865 (void)parse_skipto(&conf, '\n');
866 }
867 mp = TAILQ_NEXT(mpdevfs, mnt_list);
868 }
869 if (mp != NULL)
870 return (0);
871
872 /*
873 * We failed to mount (a new) root.
874 */
875 switch (root_mount_onfail) {
876 case A_CONTINUE:
877 break;
878 case A_PANIC:
879 panic("mountroot: unable to (re-)mount root.");
880 /* NOTREACHED */
881 case A_RETRY:
882 goto retry;
883 case A_REBOOT:
884 kern_reboot(RB_NOSYNC);
885 /* NOTREACHED */
886 }
887
888 return (error);
889 }
890
891 static void
vfs_mountroot_conf0(struct sbuf * sb)892 vfs_mountroot_conf0(struct sbuf *sb)
893 {
894 char *s, *tok, *mnt, *opt;
895 int error;
896
897 sbuf_cat(sb, ".onfail panic\n");
898 sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
899 if (boothowto & RB_ASKNAME)
900 sbuf_cat(sb, ".ask\n");
901 #ifdef ROOTDEVNAME
902 if (boothowto & RB_DFLTROOT)
903 sbuf_printf(sb, "%s\n", ROOTDEVNAME);
904 #endif
905 if (boothowto & RB_CDROM) {
906 sbuf_cat(sb, "cd9660:/dev/cd0 ro\n");
907 sbuf_cat(sb, ".timeout 0\n");
908 sbuf_cat(sb, "cd9660:/dev/cd1 ro\n");
909 sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
910 }
911 s = kern_getenv("vfs.root.mountfrom");
912 if (s != NULL) {
913 opt = kern_getenv("vfs.root.mountfrom.options");
914 tok = s;
915 error = parse_token(&tok, &mnt);
916 while (!error) {
917 sbuf_printf(sb, "%s %s\n", mnt,
918 (opt != NULL) ? opt : "");
919 free(mnt, M_TEMP);
920 error = parse_token(&tok, &mnt);
921 }
922 if (opt != NULL)
923 freeenv(opt);
924 freeenv(s);
925 }
926 if (rootdevnames[0] != NULL)
927 sbuf_printf(sb, "%s\n", rootdevnames[0]);
928 if (rootdevnames[1] != NULL)
929 sbuf_printf(sb, "%s\n", rootdevnames[1]);
930 #ifdef ROOTDEVNAME
931 if (!(boothowto & RB_DFLTROOT))
932 sbuf_printf(sb, "%s\n", ROOTDEVNAME);
933 #endif
934 if (!(boothowto & RB_ASKNAME))
935 sbuf_cat(sb, ".ask\n");
936 }
937
938 static int
vfs_mountroot_readconf(struct thread * td,struct sbuf * sb)939 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
940 {
941 static char buf[128];
942 struct nameidata nd;
943 off_t ofs;
944 ssize_t resid;
945 int error, flags, len;
946
947 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf");
948 flags = FREAD;
949 error = vn_open(&nd, &flags, 0, NULL);
950 if (error)
951 return (error);
952
953 NDFREE_PNBUF(&nd);
954 ofs = 0;
955 len = sizeof(buf) - 1;
956 while (1) {
957 error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
958 UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
959 NOCRED, &resid, td);
960 if (error)
961 break;
962 if (resid == len)
963 break;
964 buf[len - resid] = 0;
965 sbuf_printf(sb, "%s", buf);
966 ofs += len - resid;
967 }
968
969 VOP_UNLOCK(nd.ni_vp);
970 vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
971 return (error);
972 }
973
974 static void
vfs_mountroot_wait(void)975 vfs_mountroot_wait(void)
976 {
977 struct root_hold_token *h;
978 struct thread *td;
979 struct timeval lastfail;
980 int curfail;
981
982 TSENTER();
983
984 curfail = 0;
985 lastfail.tv_sec = 0;
986 eventratecheck(&lastfail, &curfail, 1);
987 td = curthread;
988 while (1) {
989 g_waitidle(td);
990 mtx_lock(&root_holds_mtx);
991 if (TAILQ_EMPTY(&root_holds)) {
992 mtx_unlock(&root_holds_mtx);
993 break;
994 }
995 if (eventratecheck(&lastfail, &curfail, 1)) {
996 printf("Root mount waiting for:");
997 TAILQ_FOREACH(h, &root_holds, list)
998 printf(" %s", h->who);
999 printf("\n");
1000 }
1001 TSWAIT("root mount");
1002 msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
1003 hz);
1004 TSUNWAIT("root mount");
1005 }
1006 g_waitidle(td);
1007
1008 TSEXIT();
1009 }
1010
1011 static int
vfs_mountroot_wait_if_neccessary(const char * fs,const char * dev)1012 vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev)
1013 {
1014 int delay, timeout;
1015
1016 /*
1017 * In case of ZFS and NFS we don't have a way to wait for
1018 * specific device. Also do the wait if the user forced that
1019 * behaviour by setting vfs.root_mount_always_wait=1.
1020 */
1021 if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL ||
1022 strcmp(fs, "p9fs") == 0 ||
1023 dev[0] == '\0' || root_mount_always_wait != 0) {
1024 vfs_mountroot_wait();
1025 return (0);
1026 }
1027
1028 /*
1029 * Otherwise, no point in waiting if the device is already there.
1030 * Note that we must wait for GEOM to finish reconfiguring itself,
1031 * eg for geom_part(4) to finish tasting.
1032 */
1033 g_waitidle(curthread);
1034 if (parse_mount_dev_present(dev))
1035 return (0);
1036
1037 /*
1038 * No luck. Let's wait. This code looks weird, but it's that way
1039 * to behave exactly as it used to work before.
1040 */
1041 vfs_mountroot_wait();
1042 if (parse_mount_dev_present(dev))
1043 return (0);
1044 printf("mountroot: waiting for device %s...\n", dev);
1045 delay = hz / 10;
1046 timeout = root_mount_timeout * hz;
1047 do {
1048 pause("rmdev", delay);
1049 timeout -= delay;
1050 } while (timeout > 0 && !parse_mount_dev_present(dev));
1051
1052 if (timeout <= 0)
1053 return (ENODEV);
1054
1055 return (0);
1056 }
1057
1058 void
vfs_mountroot(void)1059 vfs_mountroot(void)
1060 {
1061 struct mount *mp;
1062 struct sbuf *sb;
1063 struct thread *td;
1064 time_t timebase;
1065 int error;
1066
1067 mtx_assert(&Giant, MA_NOTOWNED);
1068
1069 TSENTER();
1070
1071 td = curthread;
1072
1073 sb = sbuf_new_auto();
1074 vfs_mountroot_conf0(sb);
1075 sbuf_finish(sb);
1076
1077 error = vfs_mountroot_devfs(td, &mp);
1078 while (!error) {
1079 error = vfs_mountroot_parse(sb, mp);
1080 if (!error) {
1081 vfs_mountroot_shuffle(td, mp);
1082 sbuf_clear(sb);
1083 error = vfs_mountroot_readconf(td, sb);
1084 sbuf_finish(sb);
1085 }
1086 }
1087
1088 sbuf_delete(sb);
1089
1090 /*
1091 * Iterate over all currently mounted file systems and use
1092 * the time stamp found to check and/or initialize the RTC.
1093 * Call inittodr() only once and pass it the largest of the
1094 * timestamps we encounter.
1095 */
1096 timebase = 0;
1097 mtx_lock(&mountlist_mtx);
1098 mp = TAILQ_FIRST(&mountlist);
1099 while (mp != NULL) {
1100 if (mp->mnt_time > timebase)
1101 timebase = mp->mnt_time;
1102 mp = TAILQ_NEXT(mp, mnt_list);
1103 }
1104 mtx_unlock(&mountlist_mtx);
1105 inittodr(timebase);
1106
1107 /* Keep prison0's root in sync with the global rootvnode. */
1108 mtx_lock(&prison0.pr_mtx);
1109 prison0.pr_root = rootvnode;
1110 vref(prison0.pr_root);
1111 mtx_unlock(&prison0.pr_mtx);
1112
1113 mtx_lock(&root_holds_mtx);
1114 atomic_store_rel_int(&root_mount_complete, 1);
1115 wakeup(&root_mount_complete);
1116 mtx_unlock(&root_holds_mtx);
1117
1118 EVENTHANDLER_INVOKE(mountroot);
1119
1120 TSEXIT();
1121 }
1122
1123 static struct mntarg *
parse_mountroot_options(struct mntarg * ma,const char * options)1124 parse_mountroot_options(struct mntarg *ma, const char *options)
1125 {
1126 char *p;
1127 char *name, *name_arg;
1128 char *val, *val_arg;
1129 char *opts;
1130
1131 if (options == NULL || options[0] == '\0')
1132 return (ma);
1133
1134 p = opts = strdup(options, M_MOUNT);
1135 if (opts == NULL) {
1136 return (ma);
1137 }
1138
1139 while((name = strsep(&p, ",")) != NULL) {
1140 if (name[0] == '\0')
1141 break;
1142
1143 val = strchr(name, '=');
1144 if (val != NULL) {
1145 *val = '\0';
1146 ++val;
1147 }
1148 if (strcmp(name, "rw") == 0 || strcmp(name, "noro") == 0) {
1149 /*
1150 * The first time we mount the root file system,
1151 * we need to mount 'ro', so We need to ignore
1152 * 'rw' and 'noro' mount options.
1153 */
1154 continue;
1155 }
1156 name_arg = strdup(name, M_MOUNT);
1157 val_arg = NULL;
1158 if (val != NULL)
1159 val_arg = strdup(val, M_MOUNT);
1160
1161 ma = mount_arg(ma, name_arg, val_arg,
1162 (val_arg != NULL ? -1 : 0));
1163 }
1164 free(opts, M_MOUNT);
1165 return (ma);
1166 }
1167