1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 #include <sys/cdefs.h>
56 #include <sys/ioctl.h>
57 #include <sys/stat.h>
58 #include <sys/disk.h>
59 #include <sys/queue.h>
60
61 #include <machine/specialreg.h>
62 #include <machine/vmm.h>
63
64 #include <assert.h>
65 #include <dirent.h>
66 #include <dlfcn.h>
67 #include <errno.h>
68 #include <err.h>
69 #include <fcntl.h>
70 #include <getopt.h>
71 #include <libgen.h>
72 #include <limits.h>
73 #include <setjmp.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <sysexits.h>
78 #include <termios.h>
79 #include <unistd.h>
80
81 #include <capsicum_helpers.h>
82 #include <vmmapi.h>
83
84 #include "userboot.h"
85
86 #define MB (1024 * 1024UL)
87 #define GB (1024 * 1024 * 1024UL)
88 #define BSP 0
89
90 #define NDISKS 32
91
92 /*
93 * Reason for our loader reload and reentry, though these aren't really used
94 * at the moment.
95 */
96 enum {
97 /* 0 cannot be allocated; setjmp(3) return. */
98 JMP_SWAPLOADER = 0x01,
99 JMP_REBOOT,
100 };
101
102 static struct termios term, oldterm;
103 static int disk_fd[NDISKS];
104 static int ndisks;
105 static int consin_fd, consout_fd;
106 static int hostbase_fd = -1;
107
108 static void *loader_hdl;
109 static char *loader;
110 static int explicit_loader_fd = -1;
111 static jmp_buf jb;
112
113 static char *vmname, *progname;
114 static struct vmctx *ctx;
115 static struct vcpu *vcpu;
116
117 static uint64_t gdtbase, cr3, rsp;
118
119 static void cb_exit(void *arg, int v);
120
121 /*
122 * Console i/o callbacks
123 */
124
125 static void
cb_putc(void * arg __unused,int ch)126 cb_putc(void *arg __unused, int ch)
127 {
128 char c = ch;
129
130 (void) write(consout_fd, &c, 1);
131 }
132
133 static int
cb_getc(void * arg __unused)134 cb_getc(void *arg __unused)
135 {
136 char c;
137
138 if (read(consin_fd, &c, 1) == 1)
139 return (c);
140 return (-1);
141 }
142
143 static int
cb_poll(void * arg __unused)144 cb_poll(void *arg __unused)
145 {
146 int n;
147
148 if (ioctl(consin_fd, FIONREAD, &n) >= 0)
149 return (n > 0);
150 return (0);
151 }
152
153 /*
154 * Host filesystem i/o callbacks
155 */
156
157 struct cb_file {
158 int cf_isdir;
159 size_t cf_size;
160 struct stat cf_stat;
161 union {
162 int fd;
163 DIR *dir;
164 } cf_u;
165 };
166
167 static int
cb_open(void * arg __unused,const char * filename,void ** hp)168 cb_open(void *arg __unused, const char *filename, void **hp)
169 {
170 struct cb_file *cf;
171 struct stat sb;
172 int fd, flags;
173
174 cf = NULL;
175 fd = -1;
176 flags = O_RDONLY | O_RESOLVE_BENEATH;
177 if (hostbase_fd == -1)
178 return (ENOENT);
179
180 /* Absolute paths are relative to our hostbase, chop off leading /. */
181 if (filename[0] == '/')
182 filename++;
183
184 /* Lookup of /, use . instead. */
185 if (filename[0] == '\0')
186 filename = ".";
187
188 if (fstatat(hostbase_fd, filename, &sb, AT_RESOLVE_BENEATH) < 0)
189 return (errno);
190
191 if (!S_ISDIR(sb.st_mode) && !S_ISREG(sb.st_mode))
192 return (EINVAL);
193
194 if (S_ISDIR(sb.st_mode))
195 flags |= O_DIRECTORY;
196
197 /* May be opening the root dir */
198 fd = openat(hostbase_fd, filename, flags);
199 if (fd < 0)
200 return (errno);
201
202 cf = malloc(sizeof(struct cb_file));
203 if (cf == NULL) {
204 close(fd);
205 return (ENOMEM);
206 }
207
208 cf->cf_stat = sb;
209 cf->cf_size = cf->cf_stat.st_size;
210
211 if (S_ISDIR(cf->cf_stat.st_mode)) {
212 cf->cf_isdir = 1;
213 cf->cf_u.dir = fdopendir(fd);
214 if (cf->cf_u.dir == NULL) {
215 close(fd);
216 free(cf);
217 return (ENOMEM);
218 }
219 } else {
220 assert(S_ISREG(cf->cf_stat.st_mode));
221 cf->cf_isdir = 0;
222 cf->cf_u.fd = fd;
223 }
224 *hp = cf;
225 return (0);
226 }
227
228 static int
cb_close(void * arg __unused,void * h)229 cb_close(void *arg __unused, void *h)
230 {
231 struct cb_file *cf = h;
232
233 if (cf->cf_isdir)
234 closedir(cf->cf_u.dir);
235 else
236 close(cf->cf_u.fd);
237 free(cf);
238
239 return (0);
240 }
241
242 static int
cb_isdir(void * arg __unused,void * h)243 cb_isdir(void *arg __unused, void *h)
244 {
245 struct cb_file *cf = h;
246
247 return (cf->cf_isdir);
248 }
249
250 static int
cb_read(void * arg __unused,void * h,void * buf,size_t size,size_t * resid)251 cb_read(void *arg __unused, void *h, void *buf, size_t size, size_t *resid)
252 {
253 struct cb_file *cf = h;
254 ssize_t sz;
255
256 if (cf->cf_isdir)
257 return (EINVAL);
258 sz = read(cf->cf_u.fd, buf, size);
259 if (sz < 0)
260 return (EINVAL);
261 *resid = size - sz;
262 return (0);
263 }
264
265 static int
cb_readdir(void * arg __unused,void * h,uint32_t * fileno_return,uint8_t * type_return,size_t * namelen_return,char * name)266 cb_readdir(void *arg __unused, void *h, uint32_t *fileno_return,
267 uint8_t *type_return, size_t *namelen_return, char *name)
268 {
269 struct cb_file *cf = h;
270 struct dirent *dp;
271
272 if (!cf->cf_isdir)
273 return (EINVAL);
274
275 dp = readdir(cf->cf_u.dir);
276 if (!dp)
277 return (ENOENT);
278
279 /*
280 * Note: d_namlen is in the range 0..255 and therefore less
281 * than PATH_MAX so we don't need to test before copying.
282 */
283 *fileno_return = dp->d_fileno;
284 *type_return = dp->d_type;
285 *namelen_return = dp->d_namlen;
286 memcpy(name, dp->d_name, dp->d_namlen);
287 name[dp->d_namlen] = 0;
288
289 return (0);
290 }
291
292 static int
cb_seek(void * arg __unused,void * h,uint64_t offset,int whence)293 cb_seek(void *arg __unused, void *h, uint64_t offset, int whence)
294 {
295 struct cb_file *cf = h;
296
297 if (cf->cf_isdir)
298 return (EINVAL);
299 if (lseek(cf->cf_u.fd, offset, whence) < 0)
300 return (errno);
301 return (0);
302 }
303
304 static int
cb_stat(void * arg __unused,void * h,struct stat * sbp)305 cb_stat(void *arg __unused, void *h, struct stat *sbp)
306 {
307 struct cb_file *cf = h;
308
309 memset(sbp, 0, sizeof(struct stat));
310 sbp->st_mode = cf->cf_stat.st_mode;
311 sbp->st_uid = cf->cf_stat.st_uid;
312 sbp->st_gid = cf->cf_stat.st_gid;
313 sbp->st_size = cf->cf_stat.st_size;
314 sbp->st_mtime = cf->cf_stat.st_mtime;
315 sbp->st_dev = cf->cf_stat.st_dev;
316 sbp->st_ino = cf->cf_stat.st_ino;
317
318 return (0);
319 }
320
321 /*
322 * Disk image i/o callbacks
323 */
324
325 static int
cb_diskread(void * arg __unused,int unit,uint64_t from,void * to,size_t size,size_t * resid)326 cb_diskread(void *arg __unused, int unit, uint64_t from, void *to, size_t size,
327 size_t *resid)
328 {
329 ssize_t n;
330
331 if (unit < 0 || unit >= ndisks)
332 return (EIO);
333 n = pread(disk_fd[unit], to, size, from);
334 if (n < 0)
335 return (errno);
336 *resid = size - n;
337 return (0);
338 }
339
340 static int
cb_diskwrite(void * arg __unused,int unit,uint64_t offset,void * src,size_t size,size_t * resid)341 cb_diskwrite(void *arg __unused, int unit, uint64_t offset, void *src,
342 size_t size, size_t *resid)
343 {
344 ssize_t n;
345
346 if (unit < 0 || unit >= ndisks)
347 return (EIO);
348 n = pwrite(disk_fd[unit], src, size, offset);
349 if (n < 0)
350 return (errno);
351 *resid = size - n;
352 return (0);
353 }
354
355 static int
cb_diskioctl(void * arg __unused,int unit,u_long cmd,void * data)356 cb_diskioctl(void *arg __unused, int unit, u_long cmd, void *data)
357 {
358 struct stat sb;
359
360 if (unit < 0 || unit >= ndisks)
361 return (EBADF);
362
363 switch (cmd) {
364 case DIOCGSECTORSIZE:
365 *(u_int *)data = 512;
366 break;
367 case DIOCGMEDIASIZE:
368 if (fstat(disk_fd[unit], &sb) != 0)
369 return (ENOTTY);
370 if (S_ISCHR(sb.st_mode) &&
371 ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
372 return (ENOTTY);
373 *(off_t *)data = sb.st_size;
374 break;
375 default:
376 return (ENOTTY);
377 }
378
379 return (0);
380 }
381
382 /*
383 * Guest virtual machine i/o callbacks
384 */
385 static int
cb_copyin(void * arg __unused,const void * from,uint64_t to,size_t size)386 cb_copyin(void *arg __unused, const void *from, uint64_t to, size_t size)
387 {
388 char *ptr;
389
390 to &= 0x7fffffff;
391
392 ptr = vm_map_gpa(ctx, to, size);
393 if (ptr == NULL)
394 return (EFAULT);
395
396 memcpy(ptr, from, size);
397 return (0);
398 }
399
400 static int
cb_copyout(void * arg __unused,uint64_t from,void * to,size_t size)401 cb_copyout(void *arg __unused, uint64_t from, void *to, size_t size)
402 {
403 char *ptr;
404
405 from &= 0x7fffffff;
406
407 ptr = vm_map_gpa(ctx, from, size);
408 if (ptr == NULL)
409 return (EFAULT);
410
411 memcpy(to, ptr, size);
412 return (0);
413 }
414
415 static void
cb_setreg(void * arg __unused,int r,uint64_t v)416 cb_setreg(void *arg __unused, int r, uint64_t v)
417 {
418 int error;
419 enum vm_reg_name vmreg;
420
421 vmreg = VM_REG_LAST;
422
423 switch (r) {
424 case 4:
425 vmreg = VM_REG_GUEST_RSP;
426 rsp = v;
427 break;
428 default:
429 break;
430 }
431
432 if (vmreg == VM_REG_LAST) {
433 printf("test_setreg(%d): not implemented\n", r);
434 cb_exit(NULL, USERBOOT_EXIT_QUIT);
435 }
436
437 error = vm_set_register(vcpu, vmreg, v);
438 if (error) {
439 perror("vm_set_register");
440 cb_exit(NULL, USERBOOT_EXIT_QUIT);
441 }
442 }
443
444 static void
cb_setmsr(void * arg __unused,int r,uint64_t v)445 cb_setmsr(void *arg __unused, int r, uint64_t v)
446 {
447 int error;
448 enum vm_reg_name vmreg;
449
450 vmreg = VM_REG_LAST;
451
452 switch (r) {
453 case MSR_EFER:
454 vmreg = VM_REG_GUEST_EFER;
455 break;
456 default:
457 break;
458 }
459
460 if (vmreg == VM_REG_LAST) {
461 printf("test_setmsr(%d): not implemented\n", r);
462 cb_exit(NULL, USERBOOT_EXIT_QUIT);
463 }
464
465 error = vm_set_register(vcpu, vmreg, v);
466 if (error) {
467 perror("vm_set_msr");
468 cb_exit(NULL, USERBOOT_EXIT_QUIT);
469 }
470 }
471
472 static void
cb_setcr(void * arg __unused,int r,uint64_t v)473 cb_setcr(void *arg __unused, int r, uint64_t v)
474 {
475 int error;
476 enum vm_reg_name vmreg;
477
478 vmreg = VM_REG_LAST;
479
480 switch (r) {
481 case 0:
482 vmreg = VM_REG_GUEST_CR0;
483 break;
484 case 3:
485 vmreg = VM_REG_GUEST_CR3;
486 cr3 = v;
487 break;
488 case 4:
489 vmreg = VM_REG_GUEST_CR4;
490 break;
491 default:
492 break;
493 }
494
495 if (vmreg == VM_REG_LAST) {
496 printf("test_setcr(%d): not implemented\n", r);
497 cb_exit(NULL, USERBOOT_EXIT_QUIT);
498 }
499
500 error = vm_set_register(vcpu, vmreg, v);
501 if (error) {
502 perror("vm_set_cr");
503 cb_exit(NULL, USERBOOT_EXIT_QUIT);
504 }
505 }
506
507 static void
cb_setgdt(void * arg __unused,uint64_t base,size_t size)508 cb_setgdt(void *arg __unused, uint64_t base, size_t size)
509 {
510 int error;
511
512 error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, base, size - 1, 0);
513 if (error != 0) {
514 perror("vm_set_desc(gdt)");
515 cb_exit(NULL, USERBOOT_EXIT_QUIT);
516 }
517
518 gdtbase = base;
519 }
520
521 static void
cb_exec(void * arg __unused,uint64_t rip)522 cb_exec(void *arg __unused, uint64_t rip)
523 {
524 int error;
525
526 if (cr3 == 0)
527 error = vm_setup_freebsd_registers_i386(vcpu, rip, gdtbase,
528 rsp);
529 else
530 error = vm_setup_freebsd_registers(vcpu, rip, cr3, gdtbase,
531 rsp);
532 if (error) {
533 perror("vm_setup_freebsd_registers");
534 cb_exit(NULL, USERBOOT_EXIT_QUIT);
535 }
536
537 cb_exit(NULL, 0);
538 }
539
540 /*
541 * Misc
542 */
543
544 static void
cb_delay(void * arg __unused,int usec)545 cb_delay(void *arg __unused, int usec)
546 {
547
548 usleep(usec);
549 }
550
551 static void
cb_exit(void * arg __unused,int v)552 cb_exit(void *arg __unused, int v)
553 {
554
555 tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
556 if (v == USERBOOT_EXIT_REBOOT)
557 longjmp(jb, JMP_REBOOT);
558 exit(v);
559 }
560
561 static void
cb_getmem(void * arg __unused,uint64_t * ret_lowmem,uint64_t * ret_highmem)562 cb_getmem(void *arg __unused, uint64_t *ret_lowmem, uint64_t *ret_highmem)
563 {
564
565 *ret_lowmem = vm_get_lowmem_size(ctx);
566 *ret_highmem = vm_get_highmem_size(ctx);
567 }
568
569 struct env {
570 char *str; /* name=value */
571 SLIST_ENTRY(env) next;
572 };
573
574 static SLIST_HEAD(envhead, env) envhead;
575
576 static void
addenv(const char * str)577 addenv(const char *str)
578 {
579 struct env *env;
580
581 env = malloc(sizeof(struct env));
582 if (env == NULL)
583 err(EX_OSERR, "malloc");
584 env->str = strdup(str);
585 if (env->str == NULL)
586 err(EX_OSERR, "strdup");
587 SLIST_INSERT_HEAD(&envhead, env, next);
588 }
589
590 static char *
cb_getenv(void * arg __unused,int num)591 cb_getenv(void *arg __unused, int num)
592 {
593 int i;
594 struct env *env;
595
596 i = 0;
597 SLIST_FOREACH(env, &envhead, next) {
598 if (i == num)
599 return (env->str);
600 i++;
601 }
602
603 return (NULL);
604 }
605
606 static int
cb_vm_set_register(void * arg __unused,int vcpuid,int reg,uint64_t val)607 cb_vm_set_register(void *arg __unused, int vcpuid, int reg, uint64_t val)
608 {
609
610 assert(vcpuid == BSP);
611 return (vm_set_register(vcpu, reg, val));
612 }
613
614 static int
cb_vm_set_desc(void * arg __unused,int vcpuid,int reg,uint64_t base,u_int limit,u_int access)615 cb_vm_set_desc(void *arg __unused, int vcpuid, int reg, uint64_t base,
616 u_int limit, u_int access)
617 {
618
619 assert(vcpuid == BSP);
620 return (vm_set_desc(vcpu, reg, base, limit, access));
621 }
622
623 static void
cb_swap_interpreter(void * arg __unused,const char * interp_req)624 cb_swap_interpreter(void *arg __unused, const char *interp_req)
625 {
626
627 /*
628 * If the user specified a loader but we detected a mismatch, we should
629 * not try to pivot to a different loader on them.
630 */
631 free(loader);
632 if (explicit_loader_fd != -1) {
633 perror("requested loader interpreter does not match guest userboot");
634 cb_exit(NULL, 1);
635 }
636 if (interp_req == NULL || *interp_req == '\0') {
637 perror("guest failed to request an interpreter");
638 cb_exit(NULL, 1);
639 }
640
641 if (asprintf(&loader, "userboot_%s.so", interp_req) == -1)
642 err(EX_OSERR, "malloc");
643 longjmp(jb, JMP_SWAPLOADER);
644 }
645
646 static struct loader_callbacks cb = {
647 .getc = cb_getc,
648 .putc = cb_putc,
649 .poll = cb_poll,
650
651 .open = cb_open,
652 .close = cb_close,
653 .isdir = cb_isdir,
654 .read = cb_read,
655 .readdir = cb_readdir,
656 .seek = cb_seek,
657 .stat = cb_stat,
658
659 .diskread = cb_diskread,
660 .diskwrite = cb_diskwrite,
661 .diskioctl = cb_diskioctl,
662
663 .copyin = cb_copyin,
664 .copyout = cb_copyout,
665 .setreg = cb_setreg,
666 .setmsr = cb_setmsr,
667 .setcr = cb_setcr,
668 .setgdt = cb_setgdt,
669 .exec = cb_exec,
670
671 .delay = cb_delay,
672 .exit = cb_exit,
673 .getmem = cb_getmem,
674
675 .getenv = cb_getenv,
676
677 /* Version 4 additions */
678 .vm_set_register = cb_vm_set_register,
679 .vm_set_desc = cb_vm_set_desc,
680
681 /* Version 5 additions */
682 .swap_interpreter = cb_swap_interpreter,
683 };
684
685 static int
altcons_open(char * path)686 altcons_open(char *path)
687 {
688 struct stat sb;
689 int err;
690 int fd;
691
692 /*
693 * Allow stdio to be passed in so that the same string
694 * can be used for the bhyveload console and bhyve com-port
695 * parameters
696 */
697 if (!strcmp(path, "stdio"))
698 return (0);
699
700 err = stat(path, &sb);
701 if (err == 0) {
702 if (!S_ISCHR(sb.st_mode))
703 err = ENOTSUP;
704 else {
705 fd = open(path, O_RDWR | O_NONBLOCK);
706 if (fd < 0)
707 err = errno;
708 else
709 consin_fd = consout_fd = fd;
710 }
711 }
712
713 return (err);
714 }
715
716 static int
disk_open(char * path)717 disk_open(char *path)
718 {
719 int fd;
720
721 if (ndisks >= NDISKS)
722 return (ERANGE);
723
724 fd = open(path, O_RDWR);
725 if (fd < 0)
726 return (errno);
727
728 disk_fd[ndisks] = fd;
729 ndisks++;
730
731 return (0);
732 }
733
734 static void
usage(void)735 usage(void)
736 {
737
738 fprintf(stderr,
739 "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
740 " %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
741 progname,
742 (int)strlen(progname), "");
743 exit(1);
744 }
745
746 static void
hostbase_open(const char * base)747 hostbase_open(const char *base)
748 {
749 cap_rights_t rights;
750
751 if (hostbase_fd != -1)
752 close(hostbase_fd);
753 hostbase_fd = open(base, O_DIRECTORY | O_PATH);
754 if (hostbase_fd == -1)
755 err(EX_OSERR, "open");
756
757 if (caph_rights_limit(hostbase_fd, cap_rights_init(&rights, CAP_FSTATAT,
758 CAP_LOOKUP, CAP_PREAD)) < 0)
759 err(EX_OSERR, "caph_rights_limit");
760 }
761
762 static void
loader_open(int bootfd)763 loader_open(int bootfd)
764 {
765 int fd;
766
767 if (loader == NULL) {
768 loader = strdup("userboot.so");
769 if (loader == NULL)
770 err(EX_OSERR, "malloc");
771 }
772
773 assert(bootfd >= 0 || explicit_loader_fd >= 0);
774 if (explicit_loader_fd >= 0)
775 fd = explicit_loader_fd;
776 else
777 fd = openat(bootfd, loader, O_RDONLY | O_RESOLVE_BENEATH);
778 if (fd == -1)
779 err(EX_OSERR, "openat");
780
781 loader_hdl = fdlopen(fd, RTLD_LOCAL);
782 if (!loader_hdl)
783 errx(EX_OSERR, "dlopen: %s", dlerror());
784 if (fd != explicit_loader_fd)
785 close(fd);
786 }
787
788 int
main(int argc,char ** argv)789 main(int argc, char** argv)
790 {
791 void (*func)(struct loader_callbacks *, void *, int, int);
792 uint64_t mem_size;
793 int bootfd, opt, error, memflags, need_reinit;
794
795 bootfd = -1;
796 progname = basename(argv[0]);
797
798 memflags = 0;
799 mem_size = 256 * MB;
800
801 consin_fd = STDIN_FILENO;
802 consout_fd = STDOUT_FILENO;
803
804 while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
805 switch (opt) {
806 case 'c':
807 error = altcons_open(optarg);
808 if (error != 0)
809 errx(EX_USAGE, "Could not open '%s'", optarg);
810 break;
811
812 case 'd':
813 error = disk_open(optarg);
814 if (error != 0)
815 errx(EX_USAGE, "Could not open '%s'", optarg);
816 break;
817
818 case 'e':
819 addenv(optarg);
820 break;
821
822 case 'h':
823 hostbase_open(optarg);
824 break;
825
826 case 'l':
827 if (loader != NULL)
828 errx(EX_USAGE, "-l can only be given once");
829 loader = strdup(optarg);
830 if (loader == NULL)
831 err(EX_OSERR, "malloc");
832 explicit_loader_fd = open(loader, O_RDONLY);
833 if (explicit_loader_fd == -1)
834 err(EX_OSERR, "%s", loader);
835 break;
836
837 case 'm':
838 error = vm_parse_memsize(optarg, &mem_size);
839 if (error != 0)
840 errx(EX_USAGE, "Invalid memsize '%s'", optarg);
841 break;
842 case 'C':
843 memflags |= VM_MEM_F_INCORE;
844 break;
845 case 'S':
846 memflags |= VM_MEM_F_WIRED;
847 break;
848 case '?':
849 usage();
850 }
851 }
852
853 argc -= optind;
854 argv += optind;
855
856 if (argc != 1)
857 usage();
858
859 vmname = argv[0];
860
861 need_reinit = 0;
862 error = vm_create(vmname);
863 if (error) {
864 if (errno != EEXIST)
865 err(1, "vm_create");
866 need_reinit = 1;
867 }
868
869 ctx = vm_open(vmname);
870 if (ctx == NULL)
871 err(1, "vm_open");
872
873 /*
874 * If we weren't given an explicit loader to use, we need to support the
875 * guest requesting a different one.
876 */
877 if (explicit_loader_fd == -1) {
878 cap_rights_t rights;
879
880 bootfd = open("/boot", O_DIRECTORY | O_PATH);
881 if (bootfd == -1)
882 err(1, "open");
883
884 /*
885 * bootfd will be used to do a lookup of our loader and do an
886 * fdlopen(3) on the loader; thus, we need mmap(2) in addition
887 * to the more usual lookup rights.
888 */
889 if (caph_rights_limit(bootfd, cap_rights_init(&rights,
890 CAP_FSTATAT, CAP_LOOKUP, CAP_MMAP_RX, CAP_PREAD)) < 0)
891 err(1, "caph_rights_limit");
892 }
893
894 vcpu = vm_vcpu_open(ctx, BSP);
895
896 caph_cache_catpages();
897 if (caph_enter() < 0)
898 err(1, "caph_enter");
899
900 /*
901 * setjmp in the case the guest wants to swap out interpreter,
902 * cb_swap_interpreter will swap out loader as appropriate and set
903 * need_reinit so that we end up in a clean state once again.
904 */
905 if (setjmp(jb) != 0) {
906 dlclose(loader_hdl);
907 loader_hdl = NULL;
908
909 need_reinit = 1;
910 }
911
912 if (need_reinit) {
913 error = vm_reinit(ctx);
914 if (error)
915 err(1, "vm_reinit");
916 }
917
918 vm_set_memflags(ctx, memflags);
919 error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
920 if (error)
921 err(1, "vm_setup_memory");
922
923 loader_open(bootfd);
924 func = dlsym(loader_hdl, "loader_main");
925 if (!func)
926 errx(1, "dlsym: %s", dlerror());
927
928 tcgetattr(consout_fd, &term);
929 oldterm = term;
930 cfmakeraw(&term);
931 term.c_cflag |= CLOCAL;
932
933 tcsetattr(consout_fd, TCSAFLUSH, &term);
934
935 addenv("smbios.bios.vendor=BHYVE");
936 addenv("boot_serial=1");
937
938 func(&cb, NULL, USERBOOT_VERSION_5, ndisks);
939
940 free(loader);
941 return (0);
942 }
943