xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision 273c26a3c3bea87a241d6879abd4f991db180bf0)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  *
54  * $FreeBSD$
55  */
56 
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
59 
60 #include <sys/ioctl.h>
61 #include <sys/stat.h>
62 #include <sys/disk.h>
63 #include <sys/queue.h>
64 
65 #include <machine/specialreg.h>
66 #include <machine/vmm.h>
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <err.h>
72 #include <fcntl.h>
73 #include <getopt.h>
74 #include <libgen.h>
75 #include <limits.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sysexits.h>
80 #include <termios.h>
81 #include <unistd.h>
82 
83 #include <vmmapi.h>
84 
85 #include "userboot.h"
86 
87 #define	MB	(1024 * 1024UL)
88 #define	GB	(1024 * 1024 * 1024UL)
89 #define	BSP	0
90 
91 #define	NDISKS	32
92 
93 static char *host_base;
94 static struct termios term, oldterm;
95 static int disk_fd[NDISKS];
96 static int ndisks;
97 static int consin_fd, consout_fd;
98 
99 static char *vmname, *progname;
100 static struct vmctx *ctx;
101 
102 static uint64_t gdtbase, cr3, rsp;
103 
104 static void cb_exit(void *arg, int v);
105 
106 /*
107  * Console i/o callbacks
108  */
109 
110 static void
111 cb_putc(void *arg, int ch)
112 {
113 	char c = ch;
114 
115 	(void) write(consout_fd, &c, 1);
116 }
117 
118 static int
119 cb_getc(void *arg)
120 {
121 	char c;
122 
123 	if (read(consin_fd, &c, 1) == 1)
124 		return (c);
125 	return (-1);
126 }
127 
128 static int
129 cb_poll(void *arg)
130 {
131 	int n;
132 
133 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134 		return (n > 0);
135 	return (0);
136 }
137 
138 /*
139  * Host filesystem i/o callbacks
140  */
141 
142 struct cb_file {
143 	int cf_isdir;
144 	size_t cf_size;
145 	struct stat cf_stat;
146 	union {
147 		int fd;
148 		DIR *dir;
149 	} cf_u;
150 };
151 
152 static int
153 cb_open(void *arg, const char *filename, void **hp)
154 {
155 	struct cb_file *cf;
156 	char path[PATH_MAX];
157 
158 	if (!host_base)
159 		return (ENOENT);
160 
161 	strlcpy(path, host_base, PATH_MAX);
162 	if (path[strlen(path) - 1] == '/')
163 		path[strlen(path) - 1] = 0;
164 	strlcat(path, filename, PATH_MAX);
165 	cf = malloc(sizeof(struct cb_file));
166 	if (stat(path, &cf->cf_stat) < 0) {
167 		free(cf);
168 		return (errno);
169 	}
170 
171 	cf->cf_size = cf->cf_stat.st_size;
172 	if (S_ISDIR(cf->cf_stat.st_mode)) {
173 		cf->cf_isdir = 1;
174 		cf->cf_u.dir = opendir(path);
175 		if (!cf->cf_u.dir)
176 			goto out;
177 		*hp = cf;
178 		return (0);
179 	}
180 	if (S_ISREG(cf->cf_stat.st_mode)) {
181 		cf->cf_isdir = 0;
182 		cf->cf_u.fd = open(path, O_RDONLY);
183 		if (cf->cf_u.fd < 0)
184 			goto out;
185 		*hp = cf;
186 		return (0);
187 	}
188 
189 out:
190 	free(cf);
191 	return (EINVAL);
192 }
193 
194 static int
195 cb_close(void *arg, void *h)
196 {
197 	struct cb_file *cf = h;
198 
199 	if (cf->cf_isdir)
200 		closedir(cf->cf_u.dir);
201 	else
202 		close(cf->cf_u.fd);
203 	free(cf);
204 
205 	return (0);
206 }
207 
208 static int
209 cb_isdir(void *arg, void *h)
210 {
211 	struct cb_file *cf = h;
212 
213 	return (cf->cf_isdir);
214 }
215 
216 static int
217 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
218 {
219 	struct cb_file *cf = h;
220 	ssize_t sz;
221 
222 	if (cf->cf_isdir)
223 		return (EINVAL);
224 	sz = read(cf->cf_u.fd, buf, size);
225 	if (sz < 0)
226 		return (EINVAL);
227 	*resid = size - sz;
228 	return (0);
229 }
230 
231 static int
232 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
233 	   size_t *namelen_return, char *name)
234 {
235 	struct cb_file *cf = h;
236 	struct dirent *dp;
237 
238 	if (!cf->cf_isdir)
239 		return (EINVAL);
240 
241 	dp = readdir(cf->cf_u.dir);
242 	if (!dp)
243 		return (ENOENT);
244 
245 	/*
246 	 * Note: d_namlen is in the range 0..255 and therefore less
247 	 * than PATH_MAX so we don't need to test before copying.
248 	 */
249 	*fileno_return = dp->d_fileno;
250 	*type_return = dp->d_type;
251 	*namelen_return = dp->d_namlen;
252 	memcpy(name, dp->d_name, dp->d_namlen);
253 	name[dp->d_namlen] = 0;
254 
255 	return (0);
256 }
257 
258 static int
259 cb_seek(void *arg, void *h, uint64_t offset, int whence)
260 {
261 	struct cb_file *cf = h;
262 
263 	if (cf->cf_isdir)
264 		return (EINVAL);
265 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
266 		return (errno);
267 	return (0);
268 }
269 
270 static int
271 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
272 {
273 	struct cb_file *cf = h;
274 
275 	*mode = cf->cf_stat.st_mode;
276 	*uid = cf->cf_stat.st_uid;
277 	*gid = cf->cf_stat.st_gid;
278 	*size = cf->cf_stat.st_size;
279 	return (0);
280 }
281 
282 /*
283  * Disk image i/o callbacks
284  */
285 
286 static int
287 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
288 	    size_t *resid)
289 {
290 	ssize_t n;
291 
292 	if (unit < 0 || unit >= ndisks )
293 		return (EIO);
294 	n = pread(disk_fd[unit], to, size, from);
295 	if (n < 0)
296 		return (errno);
297 	*resid = size - n;
298 	return (0);
299 }
300 
301 static int
302 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
303 {
304 	struct stat sb;
305 
306 	if (unit < 0 || unit >= ndisks)
307 		return (EBADF);
308 
309 	switch (cmd) {
310 	case DIOCGSECTORSIZE:
311 		*(u_int *)data = 512;
312 		break;
313 	case DIOCGMEDIASIZE:
314 		if (fstat(disk_fd[unit], &sb) == 0)
315 			*(off_t *)data = sb.st_size;
316 		else
317 			return (ENOTTY);
318 		break;
319 	default:
320 		return (ENOTTY);
321 	}
322 
323 	return (0);
324 }
325 
326 /*
327  * Guest virtual machine i/o callbacks
328  */
329 static int
330 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
331 {
332 	char *ptr;
333 
334 	to &= 0x7fffffff;
335 
336 	ptr = vm_map_gpa(ctx, to, size);
337 	if (ptr == NULL)
338 		return (EFAULT);
339 
340 	memcpy(ptr, from, size);
341 	return (0);
342 }
343 
344 static int
345 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
346 {
347 	char *ptr;
348 
349 	from &= 0x7fffffff;
350 
351 	ptr = vm_map_gpa(ctx, from, size);
352 	if (ptr == NULL)
353 		return (EFAULT);
354 
355 	memcpy(to, ptr, size);
356 	return (0);
357 }
358 
359 static void
360 cb_setreg(void *arg, int r, uint64_t v)
361 {
362 	int error;
363 	enum vm_reg_name vmreg;
364 
365 	vmreg = VM_REG_LAST;
366 
367 	switch (r) {
368 	case 4:
369 		vmreg = VM_REG_GUEST_RSP;
370 		rsp = v;
371 		break;
372 	default:
373 		break;
374 	}
375 
376 	if (vmreg == VM_REG_LAST) {
377 		printf("test_setreg(%d): not implemented\n", r);
378 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
379 	}
380 
381 	error = vm_set_register(ctx, BSP, vmreg, v);
382 	if (error) {
383 		perror("vm_set_register");
384 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
385 	}
386 }
387 
388 static void
389 cb_setmsr(void *arg, int r, uint64_t v)
390 {
391 	int error;
392 	enum vm_reg_name vmreg;
393 
394 	vmreg = VM_REG_LAST;
395 
396 	switch (r) {
397 	case MSR_EFER:
398 		vmreg = VM_REG_GUEST_EFER;
399 		break;
400 	default:
401 		break;
402 	}
403 
404 	if (vmreg == VM_REG_LAST) {
405 		printf("test_setmsr(%d): not implemented\n", r);
406 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
407 	}
408 
409 	error = vm_set_register(ctx, BSP, vmreg, v);
410 	if (error) {
411 		perror("vm_set_msr");
412 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
413 	}
414 }
415 
416 static void
417 cb_setcr(void *arg, int r, uint64_t v)
418 {
419 	int error;
420 	enum vm_reg_name vmreg;
421 
422 	vmreg = VM_REG_LAST;
423 
424 	switch (r) {
425 	case 0:
426 		vmreg = VM_REG_GUEST_CR0;
427 		break;
428 	case 3:
429 		vmreg = VM_REG_GUEST_CR3;
430 		cr3 = v;
431 		break;
432 	case 4:
433 		vmreg = VM_REG_GUEST_CR4;
434 		break;
435 	default:
436 		break;
437 	}
438 
439 	if (vmreg == VM_REG_LAST) {
440 		printf("test_setcr(%d): not implemented\n", r);
441 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
442 	}
443 
444 	error = vm_set_register(ctx, BSP, vmreg, v);
445 	if (error) {
446 		perror("vm_set_cr");
447 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
448 	}
449 }
450 
451 static void
452 cb_setgdt(void *arg, uint64_t base, size_t size)
453 {
454 	int error;
455 
456 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
457 	if (error != 0) {
458 		perror("vm_set_desc(gdt)");
459 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
460 	}
461 
462 	gdtbase = base;
463 }
464 
465 static void
466 cb_exec(void *arg, uint64_t rip)
467 {
468 	int error;
469 
470 	if (cr3 == 0)
471 		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
472 		    rsp);
473 	else
474 		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
475 		    rsp);
476 	if (error) {
477 		perror("vm_setup_freebsd_registers");
478 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
479 	}
480 
481 	cb_exit(NULL, 0);
482 }
483 
484 /*
485  * Misc
486  */
487 
488 static void
489 cb_delay(void *arg, int usec)
490 {
491 
492 	usleep(usec);
493 }
494 
495 static void
496 cb_exit(void *arg, int v)
497 {
498 
499 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
500 	exit(v);
501 }
502 
503 static void
504 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
505 {
506 
507 	*ret_lowmem = vm_get_lowmem_size(ctx);
508 	*ret_highmem = vm_get_highmem_size(ctx);
509 }
510 
511 struct env {
512 	const char *str;	/* name=value */
513 	SLIST_ENTRY(env) next;
514 };
515 
516 static SLIST_HEAD(envhead, env) envhead;
517 
518 static void
519 addenv(const char *str)
520 {
521 	struct env *env;
522 
523 	env = malloc(sizeof(struct env));
524 	env->str = str;
525 	SLIST_INSERT_HEAD(&envhead, env, next);
526 }
527 
528 static const char *
529 cb_getenv(void *arg, int num)
530 {
531 	int i;
532 	struct env *env;
533 
534 	i = 0;
535 	SLIST_FOREACH(env, &envhead, next) {
536 		if (i == num)
537 			return (env->str);
538 		i++;
539 	}
540 
541 	return (NULL);
542 }
543 
544 static int
545 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val)
546 {
547 
548 	return (vm_set_register(ctx, vcpu, reg, val));
549 }
550 
551 static int
552 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit,
553     u_int access)
554 {
555 
556 	return (vm_set_desc(ctx, vcpu, reg, base, limit, access));
557 }
558 
559 static struct loader_callbacks cb = {
560 	.getc = cb_getc,
561 	.putc = cb_putc,
562 	.poll = cb_poll,
563 
564 	.open = cb_open,
565 	.close = cb_close,
566 	.isdir = cb_isdir,
567 	.read = cb_read,
568 	.readdir = cb_readdir,
569 	.seek = cb_seek,
570 	.stat = cb_stat,
571 
572 	.diskread = cb_diskread,
573 	.diskioctl = cb_diskioctl,
574 
575 	.copyin = cb_copyin,
576 	.copyout = cb_copyout,
577 	.setreg = cb_setreg,
578 	.setmsr = cb_setmsr,
579 	.setcr = cb_setcr,
580 	.setgdt = cb_setgdt,
581 	.exec = cb_exec,
582 
583 	.delay = cb_delay,
584 	.exit = cb_exit,
585 	.getmem = cb_getmem,
586 
587 	.getenv = cb_getenv,
588 
589 	/* Version 4 additions */
590 	.vm_set_register = cb_vm_set_register,
591 	.vm_set_desc = cb_vm_set_desc,
592 };
593 
594 static int
595 altcons_open(char *path)
596 {
597 	struct stat sb;
598 	int err;
599 	int fd;
600 
601 	/*
602 	 * Allow stdio to be passed in so that the same string
603 	 * can be used for the bhyveload console and bhyve com-port
604 	 * parameters
605 	 */
606 	if (!strcmp(path, "stdio"))
607 		return (0);
608 
609 	err = stat(path, &sb);
610 	if (err == 0) {
611 		if (!S_ISCHR(sb.st_mode))
612 			err = ENOTSUP;
613 		else {
614 			fd = open(path, O_RDWR | O_NONBLOCK);
615 			if (fd < 0)
616 				err = errno;
617 			else
618 				consin_fd = consout_fd = fd;
619 		}
620 	}
621 
622 	return (err);
623 }
624 
625 static int
626 disk_open(char *path)
627 {
628 	int err, fd;
629 
630 	if (ndisks >= NDISKS)
631 		return (ERANGE);
632 
633 	err = 0;
634 	fd = open(path, O_RDONLY);
635 
636 	if (fd > 0) {
637 		disk_fd[ndisks] = fd;
638 		ndisks++;
639 	} else
640 		err = errno;
641 
642 	return (err);
643 }
644 
645 static void
646 usage(void)
647 {
648 
649 	fprintf(stderr,
650 	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
651 	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
652 	    progname,
653 	    (int)strlen(progname), "");
654 	exit(1);
655 }
656 
657 int
658 main(int argc, char** argv)
659 {
660 	char *loader;
661 	void *h;
662 	void (*func)(struct loader_callbacks *, void *, int, int);
663 	uint64_t mem_size;
664 	int opt, error, need_reinit, memflags;
665 
666 	progname = basename(argv[0]);
667 
668 	loader = NULL;
669 
670 	memflags = 0;
671 	mem_size = 256 * MB;
672 
673 	consin_fd = STDIN_FILENO;
674 	consout_fd = STDOUT_FILENO;
675 
676 	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
677 		switch (opt) {
678 		case 'c':
679 			error = altcons_open(optarg);
680 			if (error != 0)
681 				errx(EX_USAGE, "Could not open '%s'", optarg);
682 			break;
683 
684 		case 'd':
685 			error = disk_open(optarg);
686 			if (error != 0)
687 				errx(EX_USAGE, "Could not open '%s'", optarg);
688 			break;
689 
690 		case 'e':
691 			addenv(optarg);
692 			break;
693 
694 		case 'h':
695 			host_base = optarg;
696 			break;
697 
698 		case 'l':
699 			if (loader != NULL)
700 				errx(EX_USAGE, "-l can only be given once");
701 			loader = strdup(optarg);
702 			if (loader == NULL)
703 				err(EX_OSERR, "malloc");
704 			break;
705 
706 		case 'm':
707 			error = vm_parse_memsize(optarg, &mem_size);
708 			if (error != 0)
709 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
710 			break;
711 		case 'C':
712 			memflags |= VM_MEM_F_INCORE;
713 			break;
714 		case 'S':
715 			memflags |= VM_MEM_F_WIRED;
716 			break;
717 		case '?':
718 			usage();
719 		}
720 	}
721 
722 	argc -= optind;
723 	argv += optind;
724 
725 	if (argc != 1)
726 		usage();
727 
728 	vmname = argv[0];
729 
730 	need_reinit = 0;
731 	error = vm_create(vmname);
732 	if (error) {
733 		if (errno != EEXIST) {
734 			perror("vm_create");
735 			exit(1);
736 		}
737 		need_reinit = 1;
738 	}
739 
740 	ctx = vm_open(vmname);
741 	if (ctx == NULL) {
742 		perror("vm_open");
743 		exit(1);
744 	}
745 
746 	if (need_reinit) {
747 		error = vm_reinit(ctx);
748 		if (error) {
749 			perror("vm_reinit");
750 			exit(1);
751 		}
752 	}
753 
754 	vm_set_memflags(ctx, memflags);
755 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
756 	if (error) {
757 		perror("vm_setup_memory");
758 		exit(1);
759 	}
760 
761 	if (loader == NULL) {
762 		loader = strdup("/boot/userboot.so");
763 		if (loader == NULL)
764 			err(EX_OSERR, "malloc");
765 	}
766 	h = dlopen(loader, RTLD_LOCAL);
767 	if (!h) {
768 		printf("%s\n", dlerror());
769 		free(loader);
770 		return (1);
771 	}
772 	func = dlsym(h, "loader_main");
773 	if (!func) {
774 		printf("%s\n", dlerror());
775 		free(loader);
776 		return (1);
777 	}
778 
779 	tcgetattr(consout_fd, &term);
780 	oldterm = term;
781 	cfmakeraw(&term);
782 	term.c_cflag |= CLOCAL;
783 
784 	tcsetattr(consout_fd, TCSAFLUSH, &term);
785 
786 	addenv("smbios.bios.vendor=BHYVE");
787 	addenv("boot_serial=1");
788 
789 	func(&cb, NULL, USERBOOT_VERSION_4, ndisks);
790 
791 	free(loader);
792 	return (0);
793 }
794