1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /*- 32 * Copyright (c) 2011 Google, Inc. 33 * All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 47 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 54 * SUCH DAMAGE. 55 * 56 * $FreeBSD$ 57 */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include <sys/ioctl.h> 63 #include <sys/stat.h> 64 #include <sys/disk.h> 65 #include <sys/queue.h> 66 67 #include <machine/specialreg.h> 68 #include <machine/vmm.h> 69 70 #include <dirent.h> 71 #include <dlfcn.h> 72 #include <errno.h> 73 #include <err.h> 74 #include <fcntl.h> 75 #include <getopt.h> 76 #include <libgen.h> 77 #include <limits.h> 78 #include <setjmp.h> 79 #include <stdio.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #include <sysexits.h> 83 #include <termios.h> 84 #include <unistd.h> 85 86 #include <vmmapi.h> 87 88 #include "userboot.h" 89 90 #define MB (1024 * 1024UL) 91 #define GB (1024 * 1024 * 1024UL) 92 #define BSP 0 93 94 #define NDISKS 32 95 96 static char *host_base; 97 static struct termios term, oldterm; 98 static int disk_fd[NDISKS]; 99 static int ndisks; 100 static int consin_fd, consout_fd; 101 102 static int need_reinit; 103 104 static void *loader_hdl; 105 static char *loader; 106 static int explicit_loader; 107 static jmp_buf jb; 108 109 static char *vmname, *progname; 110 static struct vmctx *ctx; 111 112 static uint64_t gdtbase, cr3, rsp; 113 114 static void cb_exit(void *arg, int v); 115 116 /* 117 * Console i/o callbacks 118 */ 119 120 static void 121 cb_putc(void *arg, int ch) 122 { 123 char c = ch; 124 125 (void) write(consout_fd, &c, 1); 126 } 127 128 static int 129 cb_getc(void *arg) 130 { 131 char c; 132 133 if (read(consin_fd, &c, 1) == 1) 134 return (c); 135 return (-1); 136 } 137 138 static int 139 cb_poll(void *arg) 140 { 141 int n; 142 143 if (ioctl(consin_fd, FIONREAD, &n) >= 0) 144 return (n > 0); 145 return (0); 146 } 147 148 /* 149 * Host filesystem i/o callbacks 150 */ 151 152 struct cb_file { 153 int cf_isdir; 154 size_t cf_size; 155 struct stat cf_stat; 156 union { 157 int fd; 158 DIR *dir; 159 } cf_u; 160 }; 161 162 static int 163 cb_open(void *arg, const char *filename, void **hp) 164 { 165 struct cb_file *cf; 166 char path[PATH_MAX]; 167 168 if (!host_base) 169 return (ENOENT); 170 171 strlcpy(path, host_base, PATH_MAX); 172 if (path[strlen(path) - 1] == '/') 173 path[strlen(path) - 1] = 0; 174 strlcat(path, filename, PATH_MAX); 175 cf = malloc(sizeof(struct cb_file)); 176 if (stat(path, &cf->cf_stat) < 0) { 177 free(cf); 178 return (errno); 179 } 180 181 cf->cf_size = cf->cf_stat.st_size; 182 if (S_ISDIR(cf->cf_stat.st_mode)) { 183 cf->cf_isdir = 1; 184 cf->cf_u.dir = opendir(path); 185 if (!cf->cf_u.dir) 186 goto out; 187 *hp = cf; 188 return (0); 189 } 190 if (S_ISREG(cf->cf_stat.st_mode)) { 191 cf->cf_isdir = 0; 192 cf->cf_u.fd = open(path, O_RDONLY); 193 if (cf->cf_u.fd < 0) 194 goto out; 195 *hp = cf; 196 return (0); 197 } 198 199 out: 200 free(cf); 201 return (EINVAL); 202 } 203 204 static int 205 cb_close(void *arg, void *h) 206 { 207 struct cb_file *cf = h; 208 209 if (cf->cf_isdir) 210 closedir(cf->cf_u.dir); 211 else 212 close(cf->cf_u.fd); 213 free(cf); 214 215 return (0); 216 } 217 218 static int 219 cb_isdir(void *arg, void *h) 220 { 221 struct cb_file *cf = h; 222 223 return (cf->cf_isdir); 224 } 225 226 static int 227 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) 228 { 229 struct cb_file *cf = h; 230 ssize_t sz; 231 232 if (cf->cf_isdir) 233 return (EINVAL); 234 sz = read(cf->cf_u.fd, buf, size); 235 if (sz < 0) 236 return (EINVAL); 237 *resid = size - sz; 238 return (0); 239 } 240 241 static int 242 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, 243 size_t *namelen_return, char *name) 244 { 245 struct cb_file *cf = h; 246 struct dirent *dp; 247 248 if (!cf->cf_isdir) 249 return (EINVAL); 250 251 dp = readdir(cf->cf_u.dir); 252 if (!dp) 253 return (ENOENT); 254 255 /* 256 * Note: d_namlen is in the range 0..255 and therefore less 257 * than PATH_MAX so we don't need to test before copying. 258 */ 259 *fileno_return = dp->d_fileno; 260 *type_return = dp->d_type; 261 *namelen_return = dp->d_namlen; 262 memcpy(name, dp->d_name, dp->d_namlen); 263 name[dp->d_namlen] = 0; 264 265 return (0); 266 } 267 268 static int 269 cb_seek(void *arg, void *h, uint64_t offset, int whence) 270 { 271 struct cb_file *cf = h; 272 273 if (cf->cf_isdir) 274 return (EINVAL); 275 if (lseek(cf->cf_u.fd, offset, whence) < 0) 276 return (errno); 277 return (0); 278 } 279 280 static int 281 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) 282 { 283 struct cb_file *cf = h; 284 285 *mode = cf->cf_stat.st_mode; 286 *uid = cf->cf_stat.st_uid; 287 *gid = cf->cf_stat.st_gid; 288 *size = cf->cf_stat.st_size; 289 return (0); 290 } 291 292 /* 293 * Disk image i/o callbacks 294 */ 295 296 static int 297 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, 298 size_t *resid) 299 { 300 ssize_t n; 301 302 if (unit < 0 || unit >= ndisks ) 303 return (EIO); 304 n = pread(disk_fd[unit], to, size, from); 305 if (n < 0) 306 return (errno); 307 *resid = size - n; 308 return (0); 309 } 310 311 static int 312 cb_diskioctl(void *arg, int unit, u_long cmd, void *data) 313 { 314 struct stat sb; 315 316 if (unit < 0 || unit >= ndisks) 317 return (EBADF); 318 319 switch (cmd) { 320 case DIOCGSECTORSIZE: 321 *(u_int *)data = 512; 322 break; 323 case DIOCGMEDIASIZE: 324 if (fstat(disk_fd[unit], &sb) != 0) 325 return (ENOTTY); 326 if (S_ISCHR(sb.st_mode) && 327 ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0) 328 return (ENOTTY); 329 *(off_t *)data = sb.st_size; 330 break; 331 default: 332 return (ENOTTY); 333 } 334 335 return (0); 336 } 337 338 /* 339 * Guest virtual machine i/o callbacks 340 */ 341 static int 342 cb_copyin(void *arg, const void *from, uint64_t to, size_t size) 343 { 344 char *ptr; 345 346 to &= 0x7fffffff; 347 348 ptr = vm_map_gpa(ctx, to, size); 349 if (ptr == NULL) 350 return (EFAULT); 351 352 memcpy(ptr, from, size); 353 return (0); 354 } 355 356 static int 357 cb_copyout(void *arg, uint64_t from, void *to, size_t size) 358 { 359 char *ptr; 360 361 from &= 0x7fffffff; 362 363 ptr = vm_map_gpa(ctx, from, size); 364 if (ptr == NULL) 365 return (EFAULT); 366 367 memcpy(to, ptr, size); 368 return (0); 369 } 370 371 static void 372 cb_setreg(void *arg, int r, uint64_t v) 373 { 374 int error; 375 enum vm_reg_name vmreg; 376 377 vmreg = VM_REG_LAST; 378 379 switch (r) { 380 case 4: 381 vmreg = VM_REG_GUEST_RSP; 382 rsp = v; 383 break; 384 default: 385 break; 386 } 387 388 if (vmreg == VM_REG_LAST) { 389 printf("test_setreg(%d): not implemented\n", r); 390 cb_exit(NULL, USERBOOT_EXIT_QUIT); 391 } 392 393 error = vm_set_register(ctx, BSP, vmreg, v); 394 if (error) { 395 perror("vm_set_register"); 396 cb_exit(NULL, USERBOOT_EXIT_QUIT); 397 } 398 } 399 400 static void 401 cb_setmsr(void *arg, int r, uint64_t v) 402 { 403 int error; 404 enum vm_reg_name vmreg; 405 406 vmreg = VM_REG_LAST; 407 408 switch (r) { 409 case MSR_EFER: 410 vmreg = VM_REG_GUEST_EFER; 411 break; 412 default: 413 break; 414 } 415 416 if (vmreg == VM_REG_LAST) { 417 printf("test_setmsr(%d): not implemented\n", r); 418 cb_exit(NULL, USERBOOT_EXIT_QUIT); 419 } 420 421 error = vm_set_register(ctx, BSP, vmreg, v); 422 if (error) { 423 perror("vm_set_msr"); 424 cb_exit(NULL, USERBOOT_EXIT_QUIT); 425 } 426 } 427 428 static void 429 cb_setcr(void *arg, int r, uint64_t v) 430 { 431 int error; 432 enum vm_reg_name vmreg; 433 434 vmreg = VM_REG_LAST; 435 436 switch (r) { 437 case 0: 438 vmreg = VM_REG_GUEST_CR0; 439 break; 440 case 3: 441 vmreg = VM_REG_GUEST_CR3; 442 cr3 = v; 443 break; 444 case 4: 445 vmreg = VM_REG_GUEST_CR4; 446 break; 447 default: 448 break; 449 } 450 451 if (vmreg == VM_REG_LAST) { 452 printf("test_setcr(%d): not implemented\n", r); 453 cb_exit(NULL, USERBOOT_EXIT_QUIT); 454 } 455 456 error = vm_set_register(ctx, BSP, vmreg, v); 457 if (error) { 458 perror("vm_set_cr"); 459 cb_exit(NULL, USERBOOT_EXIT_QUIT); 460 } 461 } 462 463 static void 464 cb_setgdt(void *arg, uint64_t base, size_t size) 465 { 466 int error; 467 468 error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); 469 if (error != 0) { 470 perror("vm_set_desc(gdt)"); 471 cb_exit(NULL, USERBOOT_EXIT_QUIT); 472 } 473 474 gdtbase = base; 475 } 476 477 static void 478 cb_exec(void *arg, uint64_t rip) 479 { 480 int error; 481 482 if (cr3 == 0) 483 error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase, 484 rsp); 485 else 486 error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, 487 rsp); 488 if (error) { 489 perror("vm_setup_freebsd_registers"); 490 cb_exit(NULL, USERBOOT_EXIT_QUIT); 491 } 492 493 cb_exit(NULL, 0); 494 } 495 496 /* 497 * Misc 498 */ 499 500 static void 501 cb_delay(void *arg, int usec) 502 { 503 504 usleep(usec); 505 } 506 507 static void 508 cb_exit(void *arg, int v) 509 { 510 511 tcsetattr(consout_fd, TCSAFLUSH, &oldterm); 512 exit(v); 513 } 514 515 static void 516 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) 517 { 518 519 *ret_lowmem = vm_get_lowmem_size(ctx); 520 *ret_highmem = vm_get_highmem_size(ctx); 521 } 522 523 struct env { 524 char *str; /* name=value */ 525 SLIST_ENTRY(env) next; 526 }; 527 528 static SLIST_HEAD(envhead, env) envhead; 529 530 static void 531 addenv(char *str) 532 { 533 struct env *env; 534 535 env = malloc(sizeof(struct env)); 536 env->str = str; 537 SLIST_INSERT_HEAD(&envhead, env, next); 538 } 539 540 static char * 541 cb_getenv(void *arg, int num) 542 { 543 int i; 544 struct env *env; 545 546 i = 0; 547 SLIST_FOREACH(env, &envhead, next) { 548 if (i == num) 549 return (env->str); 550 i++; 551 } 552 553 return (NULL); 554 } 555 556 static int 557 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val) 558 { 559 560 return (vm_set_register(ctx, vcpu, reg, val)); 561 } 562 563 static int 564 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit, 565 u_int access) 566 { 567 568 return (vm_set_desc(ctx, vcpu, reg, base, limit, access)); 569 } 570 571 static void 572 cb_swap_interpreter(void *arg, const char *interp_req) 573 { 574 575 /* 576 * If the user specified a loader but we detected a mismatch, we should 577 * not try to pivot to a different loader on them. 578 */ 579 free(loader); 580 if (explicit_loader == 1) { 581 perror("requested loader interpreter does not match guest userboot"); 582 cb_exit(NULL, 1); 583 } 584 if (interp_req == NULL || *interp_req == '\0') { 585 perror("guest failed to request an interpreter"); 586 cb_exit(NULL, 1); 587 } 588 589 if (asprintf(&loader, "/boot/userboot_%s.so", interp_req) == -1) 590 err(EX_OSERR, "malloc"); 591 need_reinit = 1; 592 longjmp(jb, 1); 593 } 594 595 static struct loader_callbacks cb = { 596 .getc = cb_getc, 597 .putc = cb_putc, 598 .poll = cb_poll, 599 600 .open = cb_open, 601 .close = cb_close, 602 .isdir = cb_isdir, 603 .read = cb_read, 604 .readdir = cb_readdir, 605 .seek = cb_seek, 606 .stat = cb_stat, 607 608 .diskread = cb_diskread, 609 .diskioctl = cb_diskioctl, 610 611 .copyin = cb_copyin, 612 .copyout = cb_copyout, 613 .setreg = cb_setreg, 614 .setmsr = cb_setmsr, 615 .setcr = cb_setcr, 616 .setgdt = cb_setgdt, 617 .exec = cb_exec, 618 619 .delay = cb_delay, 620 .exit = cb_exit, 621 .getmem = cb_getmem, 622 623 .getenv = cb_getenv, 624 625 /* Version 4 additions */ 626 .vm_set_register = cb_vm_set_register, 627 .vm_set_desc = cb_vm_set_desc, 628 629 /* Version 5 additions */ 630 .swap_interpreter = cb_swap_interpreter, 631 }; 632 633 static int 634 altcons_open(char *path) 635 { 636 struct stat sb; 637 int err; 638 int fd; 639 640 /* 641 * Allow stdio to be passed in so that the same string 642 * can be used for the bhyveload console and bhyve com-port 643 * parameters 644 */ 645 if (!strcmp(path, "stdio")) 646 return (0); 647 648 err = stat(path, &sb); 649 if (err == 0) { 650 if (!S_ISCHR(sb.st_mode)) 651 err = ENOTSUP; 652 else { 653 fd = open(path, O_RDWR | O_NONBLOCK); 654 if (fd < 0) 655 err = errno; 656 else 657 consin_fd = consout_fd = fd; 658 } 659 } 660 661 return (err); 662 } 663 664 static int 665 disk_open(char *path) 666 { 667 int fd; 668 669 if (ndisks >= NDISKS) 670 return (ERANGE); 671 672 fd = open(path, O_RDONLY); 673 if (fd < 0) 674 return (errno); 675 676 disk_fd[ndisks] = fd; 677 ndisks++; 678 679 return (0); 680 } 681 682 static void 683 usage(void) 684 { 685 686 fprintf(stderr, 687 "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n" 688 " %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n", 689 progname, 690 (int)strlen(progname), ""); 691 exit(1); 692 } 693 694 int 695 main(int argc, char** argv) 696 { 697 void (*func)(struct loader_callbacks *, void *, int, int); 698 uint64_t mem_size; 699 int opt, error, memflags; 700 701 progname = basename(argv[0]); 702 703 memflags = 0; 704 mem_size = 256 * MB; 705 706 consin_fd = STDIN_FILENO; 707 consout_fd = STDOUT_FILENO; 708 709 while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) { 710 switch (opt) { 711 case 'c': 712 error = altcons_open(optarg); 713 if (error != 0) 714 errx(EX_USAGE, "Could not open '%s'", optarg); 715 break; 716 717 case 'd': 718 error = disk_open(optarg); 719 if (error != 0) 720 errx(EX_USAGE, "Could not open '%s'", optarg); 721 break; 722 723 case 'e': 724 addenv(optarg); 725 break; 726 727 case 'h': 728 host_base = optarg; 729 break; 730 731 case 'l': 732 if (loader != NULL) 733 errx(EX_USAGE, "-l can only be given once"); 734 loader = strdup(optarg); 735 if (loader == NULL) 736 err(EX_OSERR, "malloc"); 737 explicit_loader = 1; 738 break; 739 740 case 'm': 741 error = vm_parse_memsize(optarg, &mem_size); 742 if (error != 0) 743 errx(EX_USAGE, "Invalid memsize '%s'", optarg); 744 break; 745 case 'C': 746 memflags |= VM_MEM_F_INCORE; 747 break; 748 case 'S': 749 memflags |= VM_MEM_F_WIRED; 750 break; 751 case '?': 752 usage(); 753 } 754 } 755 756 argc -= optind; 757 argv += optind; 758 759 if (argc != 1) 760 usage(); 761 762 vmname = argv[0]; 763 764 need_reinit = 0; 765 error = vm_create(vmname); 766 if (error) { 767 if (errno != EEXIST) { 768 perror("vm_create"); 769 exit(1); 770 } 771 need_reinit = 1; 772 } 773 774 ctx = vm_open(vmname); 775 if (ctx == NULL) { 776 perror("vm_open"); 777 exit(1); 778 } 779 780 /* 781 * setjmp in the case the guest wants to swap out interpreter, 782 * cb_swap_interpreter will swap out loader as appropriate and set 783 * need_reinit so that we end up in a clean state once again. 784 */ 785 setjmp(jb); 786 787 if (need_reinit) { 788 error = vm_reinit(ctx); 789 if (error) { 790 perror("vm_reinit"); 791 exit(1); 792 } 793 } 794 795 vm_set_memflags(ctx, memflags); 796 error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); 797 if (error) { 798 perror("vm_setup_memory"); 799 exit(1); 800 } 801 802 if (loader == NULL) { 803 loader = strdup("/boot/userboot.so"); 804 if (loader == NULL) 805 err(EX_OSERR, "malloc"); 806 } 807 if (loader_hdl != NULL) 808 dlclose(loader_hdl); 809 loader_hdl = dlopen(loader, RTLD_LOCAL); 810 if (!loader_hdl) { 811 printf("%s\n", dlerror()); 812 free(loader); 813 return (1); 814 } 815 func = dlsym(loader_hdl, "loader_main"); 816 if (!func) { 817 printf("%s\n", dlerror()); 818 free(loader); 819 return (1); 820 } 821 822 tcgetattr(consout_fd, &term); 823 oldterm = term; 824 cfmakeraw(&term); 825 term.c_cflag |= CLOCAL; 826 827 tcsetattr(consout_fd, TCSAFLUSH, &term); 828 829 addenv("smbios.bios.vendor=BHYVE"); 830 addenv("boot_serial=1"); 831 832 func(&cb, NULL, USERBOOT_VERSION_5, ndisks); 833 834 free(loader); 835 return (0); 836 } 837