1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /*- 32 * Copyright (c) 2011 Google, Inc. 33 * All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 47 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * SUCH DAMAGE. 55 * 56 * $FreeBSD$ 57 */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include <sys/ioctl.h> 63 #include <sys/stat.h> 64 #include <sys/disk.h> 65 #include <sys/queue.h> 66 67 #include <machine/specialreg.h> 68 #include <machine/vmm.h> 69 70 #include <dirent.h> 71 #include <dlfcn.h> 72 #include <errno.h> 73 #include <err.h> 74 #include <fcntl.h> 75 #include <getopt.h> 76 #include <libgen.h> 77 #include <limits.h> 78 #include <setjmp.h> 79 #include <stdio.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #include <sysexits.h> 83 #include <termios.h> 84 #include <unistd.h> 85 86 #include <vmmapi.h> 87 88 #include "userboot.h" 89 90 #define MB (1024 * 1024UL) 91 #define GB (1024 * 1024 * 1024UL) 92 #define BSP 0 93 94 #define NDISKS 32 95 96 static char *host_base; 97 static struct termios term, oldterm; 98 static int disk_fd[NDISKS]; 99 static int ndisks; 100 static int consin_fd, consout_fd; 101 102 static int need_reinit; 103 104 static void *loader_hdl; 105 static char *loader; 106 static int explicit_loader; 107 static jmp_buf jb; 108 109 static char *vmname, *progname; 110 static struct vmctx *ctx; 111 112 static uint64_t gdtbase, cr3, rsp; 113 114 static void cb_exit(void *arg, int v); 115 116 /* 117 * Console i/o callbacks 118 */ 119 120 static void 121 cb_putc(void *arg, int ch) 122 { 123 char c = ch; 124 125 (void) write(consout_fd, &c, 1); 126 } 127 128 static int 129 cb_getc(void *arg) 130 { 131 char c; 132 133 if (read(consin_fd, &c, 1) == 1) 134 return (c); 135 return (-1); 136 } 137 138 static int 139 cb_poll(void *arg) 140 { 141 int n; 142 143 if (ioctl(consin_fd, FIONREAD, &n) >= 0) 144 return (n > 0); 145 return (0); 146 } 147 148 /* 149 * Host filesystem i/o callbacks 150 */ 151 152 struct cb_file { 153 int cf_isdir; 154 size_t cf_size; 155 struct stat cf_stat; 156 union { 157 int fd; 158 DIR *dir; 159 } cf_u; 160 }; 161 162 static int 163 cb_open(void *arg, const char *filename, void **hp) 164 { 165 struct cb_file *cf; 166 char path[PATH_MAX]; 167 168 if (!host_base) 169 return (ENOENT); 170 171 strlcpy(path, host_base, PATH_MAX); 172 if (path[strlen(path) - 1] == '/') 173 path[strlen(path) - 1] = 0; 174 strlcat(path, filename, PATH_MAX); 175 cf = malloc(sizeof(struct cb_file)); 176 if (stat(path, &cf->cf_stat) < 0) { 177 free(cf); 178 return (errno); 179 } 180 181 cf->cf_size = cf->cf_stat.st_size; 182 if (S_ISDIR(cf->cf_stat.st_mode)) { 183 cf->cf_isdir = 1; 184 cf->cf_u.dir = opendir(path); 185 if (!cf->cf_u.dir) 186 goto out; 187 *hp = cf; 188 return (0); 189 } 190 if (S_ISREG(cf->cf_stat.st_mode)) { 191 cf->cf_isdir = 0; 192 cf->cf_u.fd = open(path, O_RDONLY); 193 if (cf->cf_u.fd < 0) 194 goto out; 195 *hp = cf; 196 return (0); 197 } 198 199 out: 200 free(cf); 201 return (EINVAL); 202 } 203 204 static int 205 cb_close(void *arg, void *h) 206 { 207 struct cb_file *cf = h; 208 209 if (cf->cf_isdir) 210 closedir(cf->cf_u.dir); 211 else 212 close(cf->cf_u.fd); 213 free(cf); 214 215 return (0); 216 } 217 218 static int 219 cb_isdir(void *arg, void *h) 220 { 221 struct cb_file *cf = h; 222 223 return (cf->cf_isdir); 224 } 225 226 static int 227 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) 228 { 229 struct cb_file *cf = h; 230 ssize_t sz; 231 232 if (cf->cf_isdir) 233 return (EINVAL); 234 sz = read(cf->cf_u.fd, buf, size); 235 if (sz < 0) 236 return (EINVAL); 237 *resid = size - sz; 238 return (0); 239 } 240 241 static int 242 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, 243 size_t *namelen_return, char *name) 244 { 245 struct cb_file *cf = h; 246 struct dirent *dp; 247 248 if (!cf->cf_isdir) 249 return (EINVAL); 250 251 dp = readdir(cf->cf_u.dir); 252 if (!dp) 253 return (ENOENT); 254 255 /* 256 * Note: d_namlen is in the range 0..255 and therefore less 257 * than PATH_MAX so we don't need to test before copying. 258 */ 259 *fileno_return = dp->d_fileno; 260 *type_return = dp->d_type; 261 *namelen_return = dp->d_namlen; 262 memcpy(name, dp->d_name, dp->d_namlen); 263 name[dp->d_namlen] = 0; 264 265 return (0); 266 } 267 268 static int 269 cb_seek(void *arg, void *h, uint64_t offset, int whence) 270 { 271 struct cb_file *cf = h; 272 273 if (cf->cf_isdir) 274 return (EINVAL); 275 if (lseek(cf->cf_u.fd, offset, whence) < 0) 276 return (errno); 277 return (0); 278 } 279 280 static int 281 cb_stat(void *arg, void *h, struct stat *sbp) 282 { 283 struct cb_file *cf = h; 284 285 memset(sbp, 0, sizeof(struct stat)); 286 sbp->st_mode = cf->cf_stat.st_mode; 287 sbp->st_uid = cf->cf_stat.st_uid; 288 sbp->st_gid = cf->cf_stat.st_gid; 289 sbp->st_size = cf->cf_stat.st_size; 290 sbp->st_mtime = cf->cf_stat.st_mtime; 291 sbp->st_dev = cf->cf_stat.st_dev; 292 sbp->st_ino = cf->cf_stat.st_ino; 293 294 return (0); 295 } 296 297 /* 298 * Disk image i/o callbacks 299 */ 300 301 static int 302 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, 303 size_t *resid) 304 { 305 ssize_t n; 306 307 if (unit < 0 || unit >= ndisks ) 308 return (EIO); 309 n = pread(disk_fd[unit], to, size, from); 310 if (n < 0) 311 return (errno); 312 *resid = size - n; 313 return (0); 314 } 315 316 static int 317 cb_diskioctl(void *arg, int unit, u_long cmd, void *data) 318 { 319 struct stat sb; 320 321 if (unit < 0 || unit >= ndisks) 322 return (EBADF); 323 324 switch (cmd) { 325 case DIOCGSECTORSIZE: 326 *(u_int *)data = 512; 327 break; 328 case DIOCGMEDIASIZE: 329 if (fstat(disk_fd[unit], &sb) != 0) 330 return (ENOTTY); 331 if (S_ISCHR(sb.st_mode) && 332 ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0) 333 return (ENOTTY); 334 *(off_t *)data = sb.st_size; 335 break; 336 default: 337 return (ENOTTY); 338 } 339 340 return (0); 341 } 342 343 /* 344 * Guest virtual machine i/o callbacks 345 */ 346 static int 347 cb_copyin(void *arg, const void *from, uint64_t to, size_t size) 348 { 349 char *ptr; 350 351 to &= 0x7fffffff; 352 353 ptr = vm_map_gpa(ctx, to, size); 354 if (ptr == NULL) 355 return (EFAULT); 356 357 memcpy(ptr, from, size); 358 return (0); 359 } 360 361 static int 362 cb_copyout(void *arg, uint64_t from, void *to, size_t size) 363 { 364 char *ptr; 365 366 from &= 0x7fffffff; 367 368 ptr = vm_map_gpa(ctx, from, size); 369 if (ptr == NULL) 370 return (EFAULT); 371 372 memcpy(to, ptr, size); 373 return (0); 374 } 375 376 static void 377 cb_setreg(void *arg, int r, uint64_t v) 378 { 379 int error; 380 enum vm_reg_name vmreg; 381 382 vmreg = VM_REG_LAST; 383 384 switch (r) { 385 case 4: 386 vmreg = VM_REG_GUEST_RSP; 387 rsp = v; 388 break; 389 default: 390 break; 391 } 392 393 if (vmreg == VM_REG_LAST) { 394 printf("test_setreg(%d): not implemented\n", r); 395 cb_exit(NULL, USERBOOT_EXIT_QUIT); 396 } 397 398 error = vm_set_register(ctx, BSP, vmreg, v); 399 if (error) { 400 perror("vm_set_register"); 401 cb_exit(NULL, USERBOOT_EXIT_QUIT); 402 } 403 } 404 405 static void 406 cb_setmsr(void *arg, int r, uint64_t v) 407 { 408 int error; 409 enum vm_reg_name vmreg; 410 411 vmreg = VM_REG_LAST; 412 413 switch (r) { 414 case MSR_EFER: 415 vmreg = VM_REG_GUEST_EFER; 416 break; 417 default: 418 break; 419 } 420 421 if (vmreg == VM_REG_LAST) { 422 printf("test_setmsr(%d): not implemented\n", r); 423 cb_exit(NULL, USERBOOT_EXIT_QUIT); 424 } 425 426 error = vm_set_register(ctx, BSP, vmreg, v); 427 if (error) { 428 perror("vm_set_msr"); 429 cb_exit(NULL, USERBOOT_EXIT_QUIT); 430 } 431 } 432 433 static void 434 cb_setcr(void *arg, int r, uint64_t v) 435 { 436 int error; 437 enum vm_reg_name vmreg; 438 439 vmreg = VM_REG_LAST; 440 441 switch (r) { 442 case 0: 443 vmreg = VM_REG_GUEST_CR0; 444 break; 445 case 3: 446 vmreg = VM_REG_GUEST_CR3; 447 cr3 = v; 448 break; 449 case 4: 450 vmreg = VM_REG_GUEST_CR4; 451 break; 452 default: 453 break; 454 } 455 456 if (vmreg == VM_REG_LAST) { 457 printf("test_setcr(%d): not implemented\n", r); 458 cb_exit(NULL, USERBOOT_EXIT_QUIT); 459 } 460 461 error = vm_set_register(ctx, BSP, vmreg, v); 462 if (error) { 463 perror("vm_set_cr"); 464 cb_exit(NULL, USERBOOT_EXIT_QUIT); 465 } 466 } 467 468 static void 469 cb_setgdt(void *arg, uint64_t base, size_t size) 470 { 471 int error; 472 473 error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); 474 if (error != 0) { 475 perror("vm_set_desc(gdt)"); 476 cb_exit(NULL, USERBOOT_EXIT_QUIT); 477 } 478 479 gdtbase = base; 480 } 481 482 static void 483 cb_exec(void *arg, uint64_t rip) 484 { 485 int error; 486 487 if (cr3 == 0) 488 error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase, 489 rsp); 490 else 491 error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, 492 rsp); 493 if (error) { 494 perror("vm_setup_freebsd_registers"); 495 cb_exit(NULL, USERBOOT_EXIT_QUIT); 496 } 497 498 cb_exit(NULL, 0); 499 } 500 501 /* 502 * Misc 503 */ 504 505 static void 506 cb_delay(void *arg, int usec) 507 { 508 509 usleep(usec); 510 } 511 512 static void 513 cb_exit(void *arg, int v) 514 { 515 516 tcsetattr(consout_fd, TCSAFLUSH, &oldterm); 517 exit(v); 518 } 519 520 static void 521 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) 522 { 523 524 *ret_lowmem = vm_get_lowmem_size(ctx); 525 *ret_highmem = vm_get_highmem_size(ctx); 526 } 527 528 struct env { 529 char *str; /* name=value */ 530 SLIST_ENTRY(env) next; 531 }; 532 533 static SLIST_HEAD(envhead, env) envhead; 534 535 static void 536 addenv(char *str) 537 { 538 struct env *env; 539 540 env = malloc(sizeof(struct env)); 541 env->str = str; 542 SLIST_INSERT_HEAD(&envhead, env, next); 543 } 544 545 static char * 546 cb_getenv(void *arg, int num) 547 { 548 int i; 549 struct env *env; 550 551 i = 0; 552 SLIST_FOREACH(env, &envhead, next) { 553 if (i == num) 554 return (env->str); 555 i++; 556 } 557 558 return (NULL); 559 } 560 561 static int 562 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val) 563 { 564 565 return (vm_set_register(ctx, vcpu, reg, val)); 566 } 567 568 static int 569 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit, 570 u_int access) 571 { 572 573 return (vm_set_desc(ctx, vcpu, reg, base, limit, access)); 574 } 575 576 static void 577 cb_swap_interpreter(void *arg, const char *interp_req) 578 { 579 580 /* 581 * If the user specified a loader but we detected a mismatch, we should 582 * not try to pivot to a different loader on them. 583 */ 584 free(loader); 585 if (explicit_loader == 1) { 586 perror("requested loader interpreter does not match guest userboot"); 587 cb_exit(NULL, 1); 588 } 589 if (interp_req == NULL || *interp_req == '\0') { 590 perror("guest failed to request an interpreter"); 591 cb_exit(NULL, 1); 592 } 593 594 if (asprintf(&loader, "/boot/userboot_%s.so", interp_req) == -1) 595 err(EX_OSERR, "malloc"); 596 need_reinit = 1; 597 longjmp(jb, 1); 598 } 599 600 static struct loader_callbacks cb = { 601 .getc = cb_getc, 602 .putc = cb_putc, 603 .poll = cb_poll, 604 605 .open = cb_open, 606 .close = cb_close, 607 .isdir = cb_isdir, 608 .read = cb_read, 609 .readdir = cb_readdir, 610 .seek = cb_seek, 611 .stat = cb_stat, 612 613 .diskread = cb_diskread, 614 .diskioctl = cb_diskioctl, 615 616 .copyin = cb_copyin, 617 .copyout = cb_copyout, 618 .setreg = cb_setreg, 619 .setmsr = cb_setmsr, 620 .setcr = cb_setcr, 621 .setgdt = cb_setgdt, 622 .exec = cb_exec, 623 624 .delay = cb_delay, 625 .exit = cb_exit, 626 .getmem = cb_getmem, 627 628 .getenv = cb_getenv, 629 630 /* Version 4 additions */ 631 .vm_set_register = cb_vm_set_register, 632 .vm_set_desc = cb_vm_set_desc, 633 634 /* Version 5 additions */ 635 .swap_interpreter = cb_swap_interpreter, 636 }; 637 638 static int 639 altcons_open(char *path) 640 { 641 struct stat sb; 642 int err; 643 int fd; 644 645 /* 646 * Allow stdio to be passed in so that the same string 647 * can be used for the bhyveload console and bhyve com-port 648 * parameters 649 */ 650 if (!strcmp(path, "stdio")) 651 return (0); 652 653 err = stat(path, &sb); 654 if (err == 0) { 655 if (!S_ISCHR(sb.st_mode)) 656 err = ENOTSUP; 657 else { 658 fd = open(path, O_RDWR | O_NONBLOCK); 659 if (fd < 0) 660 err = errno; 661 else 662 consin_fd = consout_fd = fd; 663 } 664 } 665 666 return (err); 667 } 668 669 static int 670 disk_open(char *path) 671 { 672 int fd; 673 674 if (ndisks >= NDISKS) 675 return (ERANGE); 676 677 fd = open(path, O_RDONLY); 678 if (fd < 0) 679 return (errno); 680 681 disk_fd[ndisks] = fd; 682 ndisks++; 683 684 return (0); 685 } 686 687 static void 688 usage(void) 689 { 690 691 fprintf(stderr, 692 "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n" 693 " %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n", 694 progname, 695 (int)strlen(progname), ""); 696 exit(1); 697 } 698 699 int 700 main(int argc, char** argv) 701 { 702 void (*func)(struct loader_callbacks *, void *, int, int); 703 uint64_t mem_size; 704 int opt, error, memflags; 705 706 progname = basename(argv[0]); 707 708 memflags = 0; 709 mem_size = 256 * MB; 710 711 consin_fd = STDIN_FILENO; 712 consout_fd = STDOUT_FILENO; 713 714 while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) { 715 switch (opt) { 716 case 'c': 717 error = altcons_open(optarg); 718 if (error != 0) 719 errx(EX_USAGE, "Could not open '%s'", optarg); 720 break; 721 722 case 'd': 723 error = disk_open(optarg); 724 if (error != 0) 725 errx(EX_USAGE, "Could not open '%s'", optarg); 726 break; 727 728 case 'e': 729 addenv(optarg); 730 break; 731 732 case 'h': 733 host_base = optarg; 734 break; 735 736 case 'l': 737 if (loader != NULL) 738 errx(EX_USAGE, "-l can only be given once"); 739 loader = strdup(optarg); 740 if (loader == NULL) 741 err(EX_OSERR, "malloc"); 742 explicit_loader = 1; 743 break; 744 745 case 'm': 746 error = vm_parse_memsize(optarg, &mem_size); 747 if (error != 0) 748 errx(EX_USAGE, "Invalid memsize '%s'", optarg); 749 break; 750 case 'C': 751 memflags |= VM_MEM_F_INCORE; 752 break; 753 case 'S': 754 memflags |= VM_MEM_F_WIRED; 755 break; 756 case '?': 757 usage(); 758 } 759 } 760 761 argc -= optind; 762 argv += optind; 763 764 if (argc != 1) 765 usage(); 766 767 vmname = argv[0]; 768 769 need_reinit = 0; 770 error = vm_create(vmname); 771 if (error) { 772 if (errno != EEXIST) { 773 perror("vm_create"); 774 exit(1); 775 } 776 need_reinit = 1; 777 } 778 779 ctx = vm_open(vmname); 780 if (ctx == NULL) { 781 perror("vm_open"); 782 exit(1); 783 } 784 785 /* 786 * setjmp in the case the guest wants to swap out interpreter, 787 * cb_swap_interpreter will swap out loader as appropriate and set 788 * need_reinit so that we end up in a clean state once again. 789 */ 790 setjmp(jb); 791 792 if (need_reinit) { 793 error = vm_reinit(ctx); 794 if (error) { 795 perror("vm_reinit"); 796 exit(1); 797 } 798 } 799 800 vm_set_memflags(ctx, memflags); 801 error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); 802 if (error) { 803 perror("vm_setup_memory"); 804 exit(1); 805 } 806 807 if (loader == NULL) { 808 loader = strdup("/boot/userboot.so"); 809 if (loader == NULL) 810 err(EX_OSERR, "malloc"); 811 } 812 if (loader_hdl != NULL) 813 dlclose(loader_hdl); 814 loader_hdl = dlopen(loader, RTLD_LOCAL); 815 if (!loader_hdl) { 816 printf("%s\n", dlerror()); 817 free(loader); 818 return (1); 819 } 820 func = dlsym(loader_hdl, "loader_main"); 821 if (!func) { 822 printf("%s\n", dlerror()); 823 free(loader); 824 return (1); 825 } 826 827 tcgetattr(consout_fd, &term); 828 oldterm = term; 829 cfmakeraw(&term); 830 term.c_cflag |= CLOCAL; 831 832 tcsetattr(consout_fd, TCSAFLUSH, &term); 833 834 addenv("smbios.bios.vendor=BHYVE"); 835 addenv("boot_serial=1"); 836 837 func(&cb, NULL, USERBOOT_VERSION_5, ndisks); 838 839 free(loader); 840 return (0); 841 } 842