xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision 76f2606181eabc8ab20aa7297bbddc6e78bb549d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*-
32  * Copyright (c) 2011 Google, Inc.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  *
44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54  * SUCH DAMAGE.
55  *
56  * $FreeBSD$
57  */
58 
59 #include <sys/cdefs.h>
60 __FBSDID("$FreeBSD$");
61 
62 #include <sys/ioctl.h>
63 #include <sys/stat.h>
64 #include <sys/disk.h>
65 #include <sys/queue.h>
66 
67 #include <machine/specialreg.h>
68 #include <machine/vmm.h>
69 
70 #include <dirent.h>
71 #include <dlfcn.h>
72 #include <errno.h>
73 #include <err.h>
74 #include <fcntl.h>
75 #include <getopt.h>
76 #include <libgen.h>
77 #include <limits.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <sysexits.h>
82 #include <termios.h>
83 #include <unistd.h>
84 
85 #include <vmmapi.h>
86 
87 #include "userboot.h"
88 
89 #define	MB	(1024 * 1024UL)
90 #define	GB	(1024 * 1024 * 1024UL)
91 #define	BSP	0
92 
93 #define	NDISKS	32
94 
95 static char *host_base;
96 static struct termios term, oldterm;
97 static int disk_fd[NDISKS];
98 static int ndisks;
99 static int consin_fd, consout_fd;
100 
101 static char *vmname, *progname;
102 static struct vmctx *ctx;
103 
104 static uint64_t gdtbase, cr3, rsp;
105 
106 static void cb_exit(void *arg, int v);
107 
108 /*
109  * Console i/o callbacks
110  */
111 
112 static void
113 cb_putc(void *arg, int ch)
114 {
115 	char c = ch;
116 
117 	(void) write(consout_fd, &c, 1);
118 }
119 
120 static int
121 cb_getc(void *arg)
122 {
123 	char c;
124 
125 	if (read(consin_fd, &c, 1) == 1)
126 		return (c);
127 	return (-1);
128 }
129 
130 static int
131 cb_poll(void *arg)
132 {
133 	int n;
134 
135 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
136 		return (n > 0);
137 	return (0);
138 }
139 
140 /*
141  * Host filesystem i/o callbacks
142  */
143 
144 struct cb_file {
145 	int cf_isdir;
146 	size_t cf_size;
147 	struct stat cf_stat;
148 	union {
149 		int fd;
150 		DIR *dir;
151 	} cf_u;
152 };
153 
154 static int
155 cb_open(void *arg, const char *filename, void **hp)
156 {
157 	struct cb_file *cf;
158 	char path[PATH_MAX];
159 
160 	if (!host_base)
161 		return (ENOENT);
162 
163 	strlcpy(path, host_base, PATH_MAX);
164 	if (path[strlen(path) - 1] == '/')
165 		path[strlen(path) - 1] = 0;
166 	strlcat(path, filename, PATH_MAX);
167 	cf = malloc(sizeof(struct cb_file));
168 	if (stat(path, &cf->cf_stat) < 0) {
169 		free(cf);
170 		return (errno);
171 	}
172 
173 	cf->cf_size = cf->cf_stat.st_size;
174 	if (S_ISDIR(cf->cf_stat.st_mode)) {
175 		cf->cf_isdir = 1;
176 		cf->cf_u.dir = opendir(path);
177 		if (!cf->cf_u.dir)
178 			goto out;
179 		*hp = cf;
180 		return (0);
181 	}
182 	if (S_ISREG(cf->cf_stat.st_mode)) {
183 		cf->cf_isdir = 0;
184 		cf->cf_u.fd = open(path, O_RDONLY);
185 		if (cf->cf_u.fd < 0)
186 			goto out;
187 		*hp = cf;
188 		return (0);
189 	}
190 
191 out:
192 	free(cf);
193 	return (EINVAL);
194 }
195 
196 static int
197 cb_close(void *arg, void *h)
198 {
199 	struct cb_file *cf = h;
200 
201 	if (cf->cf_isdir)
202 		closedir(cf->cf_u.dir);
203 	else
204 		close(cf->cf_u.fd);
205 	free(cf);
206 
207 	return (0);
208 }
209 
210 static int
211 cb_isdir(void *arg, void *h)
212 {
213 	struct cb_file *cf = h;
214 
215 	return (cf->cf_isdir);
216 }
217 
218 static int
219 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
220 {
221 	struct cb_file *cf = h;
222 	ssize_t sz;
223 
224 	if (cf->cf_isdir)
225 		return (EINVAL);
226 	sz = read(cf->cf_u.fd, buf, size);
227 	if (sz < 0)
228 		return (EINVAL);
229 	*resid = size - sz;
230 	return (0);
231 }
232 
233 static int
234 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
235 	   size_t *namelen_return, char *name)
236 {
237 	struct cb_file *cf = h;
238 	struct dirent *dp;
239 
240 	if (!cf->cf_isdir)
241 		return (EINVAL);
242 
243 	dp = readdir(cf->cf_u.dir);
244 	if (!dp)
245 		return (ENOENT);
246 
247 	/*
248 	 * Note: d_namlen is in the range 0..255 and therefore less
249 	 * than PATH_MAX so we don't need to test before copying.
250 	 */
251 	*fileno_return = dp->d_fileno;
252 	*type_return = dp->d_type;
253 	*namelen_return = dp->d_namlen;
254 	memcpy(name, dp->d_name, dp->d_namlen);
255 	name[dp->d_namlen] = 0;
256 
257 	return (0);
258 }
259 
260 static int
261 cb_seek(void *arg, void *h, uint64_t offset, int whence)
262 {
263 	struct cb_file *cf = h;
264 
265 	if (cf->cf_isdir)
266 		return (EINVAL);
267 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
268 		return (errno);
269 	return (0);
270 }
271 
272 static int
273 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
274 {
275 	struct cb_file *cf = h;
276 
277 	*mode = cf->cf_stat.st_mode;
278 	*uid = cf->cf_stat.st_uid;
279 	*gid = cf->cf_stat.st_gid;
280 	*size = cf->cf_stat.st_size;
281 	return (0);
282 }
283 
284 /*
285  * Disk image i/o callbacks
286  */
287 
288 static int
289 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
290 	    size_t *resid)
291 {
292 	ssize_t n;
293 
294 	if (unit < 0 || unit >= ndisks )
295 		return (EIO);
296 	n = pread(disk_fd[unit], to, size, from);
297 	if (n < 0)
298 		return (errno);
299 	*resid = size - n;
300 	return (0);
301 }
302 
303 static int
304 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
305 {
306 	struct stat sb;
307 
308 	if (unit < 0 || unit >= ndisks)
309 		return (EBADF);
310 
311 	switch (cmd) {
312 	case DIOCGSECTORSIZE:
313 		*(u_int *)data = 512;
314 		break;
315 	case DIOCGMEDIASIZE:
316 		if (fstat(disk_fd[unit], &sb) != 0)
317 			return (ENOTTY);
318 		if (S_ISCHR(sb.st_mode) &&
319 		    ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
320 				return (ENOTTY);
321 		*(off_t *)data = sb.st_size;
322 		break;
323 	default:
324 		return (ENOTTY);
325 	}
326 
327 	return (0);
328 }
329 
330 /*
331  * Guest virtual machine i/o callbacks
332  */
333 static int
334 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
335 {
336 	char *ptr;
337 
338 	to &= 0x7fffffff;
339 
340 	ptr = vm_map_gpa(ctx, to, size);
341 	if (ptr == NULL)
342 		return (EFAULT);
343 
344 	memcpy(ptr, from, size);
345 	return (0);
346 }
347 
348 static int
349 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
350 {
351 	char *ptr;
352 
353 	from &= 0x7fffffff;
354 
355 	ptr = vm_map_gpa(ctx, from, size);
356 	if (ptr == NULL)
357 		return (EFAULT);
358 
359 	memcpy(to, ptr, size);
360 	return (0);
361 }
362 
363 static void
364 cb_setreg(void *arg, int r, uint64_t v)
365 {
366 	int error;
367 	enum vm_reg_name vmreg;
368 
369 	vmreg = VM_REG_LAST;
370 
371 	switch (r) {
372 	case 4:
373 		vmreg = VM_REG_GUEST_RSP;
374 		rsp = v;
375 		break;
376 	default:
377 		break;
378 	}
379 
380 	if (vmreg == VM_REG_LAST) {
381 		printf("test_setreg(%d): not implemented\n", r);
382 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
383 	}
384 
385 	error = vm_set_register(ctx, BSP, vmreg, v);
386 	if (error) {
387 		perror("vm_set_register");
388 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
389 	}
390 }
391 
392 static void
393 cb_setmsr(void *arg, int r, uint64_t v)
394 {
395 	int error;
396 	enum vm_reg_name vmreg;
397 
398 	vmreg = VM_REG_LAST;
399 
400 	switch (r) {
401 	case MSR_EFER:
402 		vmreg = VM_REG_GUEST_EFER;
403 		break;
404 	default:
405 		break;
406 	}
407 
408 	if (vmreg == VM_REG_LAST) {
409 		printf("test_setmsr(%d): not implemented\n", r);
410 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
411 	}
412 
413 	error = vm_set_register(ctx, BSP, vmreg, v);
414 	if (error) {
415 		perror("vm_set_msr");
416 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
417 	}
418 }
419 
420 static void
421 cb_setcr(void *arg, int r, uint64_t v)
422 {
423 	int error;
424 	enum vm_reg_name vmreg;
425 
426 	vmreg = VM_REG_LAST;
427 
428 	switch (r) {
429 	case 0:
430 		vmreg = VM_REG_GUEST_CR0;
431 		break;
432 	case 3:
433 		vmreg = VM_REG_GUEST_CR3;
434 		cr3 = v;
435 		break;
436 	case 4:
437 		vmreg = VM_REG_GUEST_CR4;
438 		break;
439 	default:
440 		break;
441 	}
442 
443 	if (vmreg == VM_REG_LAST) {
444 		printf("test_setcr(%d): not implemented\n", r);
445 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
446 	}
447 
448 	error = vm_set_register(ctx, BSP, vmreg, v);
449 	if (error) {
450 		perror("vm_set_cr");
451 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
452 	}
453 }
454 
455 static void
456 cb_setgdt(void *arg, uint64_t base, size_t size)
457 {
458 	int error;
459 
460 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
461 	if (error != 0) {
462 		perror("vm_set_desc(gdt)");
463 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
464 	}
465 
466 	gdtbase = base;
467 }
468 
469 static void
470 cb_exec(void *arg, uint64_t rip)
471 {
472 	int error;
473 
474 	if (cr3 == 0)
475 		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
476 		    rsp);
477 	else
478 		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
479 		    rsp);
480 	if (error) {
481 		perror("vm_setup_freebsd_registers");
482 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
483 	}
484 
485 	cb_exit(NULL, 0);
486 }
487 
488 /*
489  * Misc
490  */
491 
492 static void
493 cb_delay(void *arg, int usec)
494 {
495 
496 	usleep(usec);
497 }
498 
499 static void
500 cb_exit(void *arg, int v)
501 {
502 
503 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
504 	exit(v);
505 }
506 
507 static void
508 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
509 {
510 
511 	*ret_lowmem = vm_get_lowmem_size(ctx);
512 	*ret_highmem = vm_get_highmem_size(ctx);
513 }
514 
515 struct env {
516 	char *str;	/* name=value */
517 	SLIST_ENTRY(env) next;
518 };
519 
520 static SLIST_HEAD(envhead, env) envhead;
521 
522 static void
523 addenv(char *str)
524 {
525 	struct env *env;
526 
527 	env = malloc(sizeof(struct env));
528 	env->str = str;
529 	SLIST_INSERT_HEAD(&envhead, env, next);
530 }
531 
532 static char *
533 cb_getenv(void *arg, int num)
534 {
535 	int i;
536 	struct env *env;
537 
538 	i = 0;
539 	SLIST_FOREACH(env, &envhead, next) {
540 		if (i == num)
541 			return (env->str);
542 		i++;
543 	}
544 
545 	return (NULL);
546 }
547 
548 static int
549 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val)
550 {
551 
552 	return (vm_set_register(ctx, vcpu, reg, val));
553 }
554 
555 static int
556 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit,
557     u_int access)
558 {
559 
560 	return (vm_set_desc(ctx, vcpu, reg, base, limit, access));
561 }
562 
563 static struct loader_callbacks cb = {
564 	.getc = cb_getc,
565 	.putc = cb_putc,
566 	.poll = cb_poll,
567 
568 	.open = cb_open,
569 	.close = cb_close,
570 	.isdir = cb_isdir,
571 	.read = cb_read,
572 	.readdir = cb_readdir,
573 	.seek = cb_seek,
574 	.stat = cb_stat,
575 
576 	.diskread = cb_diskread,
577 	.diskioctl = cb_diskioctl,
578 
579 	.copyin = cb_copyin,
580 	.copyout = cb_copyout,
581 	.setreg = cb_setreg,
582 	.setmsr = cb_setmsr,
583 	.setcr = cb_setcr,
584 	.setgdt = cb_setgdt,
585 	.exec = cb_exec,
586 
587 	.delay = cb_delay,
588 	.exit = cb_exit,
589 	.getmem = cb_getmem,
590 
591 	.getenv = cb_getenv,
592 
593 	/* Version 4 additions */
594 	.vm_set_register = cb_vm_set_register,
595 	.vm_set_desc = cb_vm_set_desc,
596 };
597 
598 static int
599 altcons_open(char *path)
600 {
601 	struct stat sb;
602 	int err;
603 	int fd;
604 
605 	/*
606 	 * Allow stdio to be passed in so that the same string
607 	 * can be used for the bhyveload console and bhyve com-port
608 	 * parameters
609 	 */
610 	if (!strcmp(path, "stdio"))
611 		return (0);
612 
613 	err = stat(path, &sb);
614 	if (err == 0) {
615 		if (!S_ISCHR(sb.st_mode))
616 			err = ENOTSUP;
617 		else {
618 			fd = open(path, O_RDWR | O_NONBLOCK);
619 			if (fd < 0)
620 				err = errno;
621 			else
622 				consin_fd = consout_fd = fd;
623 		}
624 	}
625 
626 	return (err);
627 }
628 
629 static int
630 disk_open(char *path)
631 {
632 	int err, fd;
633 
634 	if (ndisks >= NDISKS)
635 		return (ERANGE);
636 
637 	err = 0;
638 	fd = open(path, O_RDONLY);
639 
640 	if (fd > 0) {
641 		disk_fd[ndisks] = fd;
642 		ndisks++;
643 	} else
644 		err = errno;
645 
646 	return (err);
647 }
648 
649 static void
650 usage(void)
651 {
652 
653 	fprintf(stderr,
654 	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
655 	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
656 	    progname,
657 	    (int)strlen(progname), "");
658 	exit(1);
659 }
660 
661 int
662 main(int argc, char** argv)
663 {
664 	char *loader;
665 	void *h;
666 	void (*func)(struct loader_callbacks *, void *, int, int);
667 	uint64_t mem_size;
668 	int opt, error, need_reinit, memflags;
669 
670 	progname = basename(argv[0]);
671 
672 	loader = NULL;
673 
674 	memflags = 0;
675 	mem_size = 256 * MB;
676 
677 	consin_fd = STDIN_FILENO;
678 	consout_fd = STDOUT_FILENO;
679 
680 	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
681 		switch (opt) {
682 		case 'c':
683 			error = altcons_open(optarg);
684 			if (error != 0)
685 				errx(EX_USAGE, "Could not open '%s'", optarg);
686 			break;
687 
688 		case 'd':
689 			error = disk_open(optarg);
690 			if (error != 0)
691 				errx(EX_USAGE, "Could not open '%s'", optarg);
692 			break;
693 
694 		case 'e':
695 			addenv(optarg);
696 			break;
697 
698 		case 'h':
699 			host_base = optarg;
700 			break;
701 
702 		case 'l':
703 			if (loader != NULL)
704 				errx(EX_USAGE, "-l can only be given once");
705 			loader = strdup(optarg);
706 			if (loader == NULL)
707 				err(EX_OSERR, "malloc");
708 			break;
709 
710 		case 'm':
711 			error = vm_parse_memsize(optarg, &mem_size);
712 			if (error != 0)
713 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
714 			break;
715 		case 'C':
716 			memflags |= VM_MEM_F_INCORE;
717 			break;
718 		case 'S':
719 			memflags |= VM_MEM_F_WIRED;
720 			break;
721 		case '?':
722 			usage();
723 		}
724 	}
725 
726 	argc -= optind;
727 	argv += optind;
728 
729 	if (argc != 1)
730 		usage();
731 
732 	vmname = argv[0];
733 
734 	need_reinit = 0;
735 	error = vm_create(vmname);
736 	if (error) {
737 		if (errno != EEXIST) {
738 			perror("vm_create");
739 			exit(1);
740 		}
741 		need_reinit = 1;
742 	}
743 
744 	ctx = vm_open(vmname);
745 	if (ctx == NULL) {
746 		perror("vm_open");
747 		exit(1);
748 	}
749 
750 	if (need_reinit) {
751 		error = vm_reinit(ctx);
752 		if (error) {
753 			perror("vm_reinit");
754 			exit(1);
755 		}
756 	}
757 
758 	vm_set_memflags(ctx, memflags);
759 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
760 	if (error) {
761 		perror("vm_setup_memory");
762 		exit(1);
763 	}
764 
765 	if (loader == NULL) {
766 		loader = strdup("/boot/userboot.so");
767 		if (loader == NULL)
768 			err(EX_OSERR, "malloc");
769 	}
770 	h = dlopen(loader, RTLD_LOCAL);
771 	if (!h) {
772 		printf("%s\n", dlerror());
773 		free(loader);
774 		return (1);
775 	}
776 	func = dlsym(h, "loader_main");
777 	if (!func) {
778 		printf("%s\n", dlerror());
779 		free(loader);
780 		return (1);
781 	}
782 
783 	tcgetattr(consout_fd, &term);
784 	oldterm = term;
785 	cfmakeraw(&term);
786 	term.c_cflag |= CLOCAL;
787 
788 	tcsetattr(consout_fd, TCSAFLUSH, &term);
789 
790 	addenv("smbios.bios.vendor=BHYVE");
791 	addenv("boot_serial=1");
792 
793 	func(&cb, NULL, USERBOOT_VERSION_4, ndisks);
794 
795 	free(loader);
796 	return (0);
797 }
798