1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/types.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/mman.h> 39 #include <sys/time.h> 40 41 #include <machine/atomic.h> 42 #include <machine/segments.h> 43 44 #ifndef WITHOUT_CAPSICUM 45 #include <capsicum_helpers.h> 46 #endif 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <err.h> 51 #include <errno.h> 52 #include <libgen.h> 53 #include <unistd.h> 54 #include <assert.h> 55 #include <errno.h> 56 #include <pthread.h> 57 #include <pthread_np.h> 58 #include <sysexits.h> 59 #include <stdbool.h> 60 61 #include <machine/vmm.h> 62 #ifndef WITHOUT_CAPSICUM 63 #include <machine/vmm_dev.h> 64 #endif 65 #include <vmmapi.h> 66 67 #include "bhyverun.h" 68 #include "acpi.h" 69 #include "atkbdc.h" 70 #include "inout.h" 71 #include "dbgport.h" 72 #include "fwctl.h" 73 #include "ioapic.h" 74 #include "mem.h" 75 #include "mevent.h" 76 #include "mptbl.h" 77 #include "pci_emul.h" 78 #include "pci_irq.h" 79 #include "pci_lpc.h" 80 #include "smbiostbl.h" 81 #include "xmsr.h" 82 #include "spinup_ap.h" 83 #include "rtc.h" 84 85 #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 86 87 #define MB (1024UL * 1024) 88 #define GB (1024UL * MB) 89 90 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 91 extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 92 93 char *vmname; 94 95 int guest_ncpus; 96 char *guest_uuid_str; 97 98 static int guest_vmexit_on_hlt, guest_vmexit_on_pause; 99 static int virtio_msix = 1; 100 static int x2apic_mode = 0; /* default is xAPIC */ 101 102 static int strictio; 103 static int strictmsr = 1; 104 105 static int acpi; 106 107 static char *progname; 108 static const int BSP = 0; 109 110 static cpuset_t cpumask; 111 112 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 113 114 static struct vm_exit vmexit[VM_MAXCPU]; 115 116 struct bhyvestats { 117 uint64_t vmexit_bogus; 118 uint64_t vmexit_reqidle; 119 uint64_t vmexit_hlt; 120 uint64_t vmexit_pause; 121 uint64_t vmexit_mtrap; 122 uint64_t vmexit_inst_emul; 123 uint64_t cpu_switch_rotate; 124 uint64_t cpu_switch_direct; 125 } stats; 126 127 struct mt_vmm_info { 128 pthread_t mt_thr; 129 struct vmctx *mt_ctx; 130 int mt_vcpu; 131 } mt_vmm_info[VM_MAXCPU]; 132 133 static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 134 135 static void 136 usage(int code) 137 { 138 139 fprintf(stderr, 140 "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 141 " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 142 " -a: local apic is in xAPIC mode (deprecated)\n" 143 " -A: create ACPI tables\n" 144 " -c: # cpus (default 1)\n" 145 " -C: include guest memory in core file\n" 146 " -e: exit on unhandled I/O access\n" 147 " -g: gdb port\n" 148 " -h: help\n" 149 " -H: vmexit from the guest on hlt\n" 150 " -l: LPC device configuration\n" 151 " -m: memory size in MB\n" 152 " -p: pin 'vcpu' to 'hostcpu'\n" 153 " -P: vmexit from the guest on pause\n" 154 " -s: <slot,driver,configinfo> PCI slot config\n" 155 " -S: guest memory cannot be swapped\n" 156 " -u: RTC keeps UTC time\n" 157 " -U: uuid\n" 158 " -w: ignore unimplemented MSRs\n" 159 " -W: force virtio to use single-vector MSI\n" 160 " -x: local apic is in x2APIC mode\n" 161 " -Y: disable MPtable generation\n", 162 progname, (int)strlen(progname), ""); 163 164 exit(code); 165 } 166 167 static int 168 pincpu_parse(const char *opt) 169 { 170 int vcpu, pcpu; 171 172 if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 173 fprintf(stderr, "invalid format: %s\n", opt); 174 return (-1); 175 } 176 177 if (vcpu < 0 || vcpu >= VM_MAXCPU) { 178 fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 179 vcpu, VM_MAXCPU - 1); 180 return (-1); 181 } 182 183 if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 184 fprintf(stderr, "hostcpu '%d' outside valid range from " 185 "0 to %d\n", pcpu, CPU_SETSIZE - 1); 186 return (-1); 187 } 188 189 if (vcpumap[vcpu] == NULL) { 190 if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 191 perror("malloc"); 192 return (-1); 193 } 194 CPU_ZERO(vcpumap[vcpu]); 195 } 196 CPU_SET(pcpu, vcpumap[vcpu]); 197 return (0); 198 } 199 200 void 201 vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 202 int errcode) 203 { 204 struct vmctx *ctx; 205 int error, restart_instruction; 206 207 ctx = arg; 208 restart_instruction = 1; 209 210 error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 211 restart_instruction); 212 assert(error == 0); 213 } 214 215 void * 216 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 217 { 218 219 return (vm_map_gpa(ctx, gaddr, len)); 220 } 221 222 int 223 fbsdrun_vmexit_on_pause(void) 224 { 225 226 return (guest_vmexit_on_pause); 227 } 228 229 int 230 fbsdrun_vmexit_on_hlt(void) 231 { 232 233 return (guest_vmexit_on_hlt); 234 } 235 236 int 237 fbsdrun_virtio_msix(void) 238 { 239 240 return (virtio_msix); 241 } 242 243 static void * 244 fbsdrun_start_thread(void *param) 245 { 246 char tname[MAXCOMLEN + 1]; 247 struct mt_vmm_info *mtp; 248 int vcpu; 249 250 mtp = param; 251 vcpu = mtp->mt_vcpu; 252 253 snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 254 pthread_set_name_np(mtp->mt_thr, tname); 255 256 vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 257 258 /* not reached */ 259 exit(1); 260 return (NULL); 261 } 262 263 void 264 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 265 { 266 int error; 267 268 assert(fromcpu == BSP); 269 270 /* 271 * The 'newcpu' must be activated in the context of 'fromcpu'. If 272 * vm_activate_cpu() is delayed until newcpu's pthread starts running 273 * then vmm.ko is out-of-sync with bhyve and this can create a race 274 * with vm_suspend(). 275 */ 276 error = vm_activate_cpu(ctx, newcpu); 277 if (error != 0) 278 err(EX_OSERR, "could not activate CPU %d", newcpu); 279 280 CPU_SET_ATOMIC(newcpu, &cpumask); 281 282 /* 283 * Set up the vmexit struct to allow execution to start 284 * at the given RIP 285 */ 286 vmexit[newcpu].rip = rip; 287 vmexit[newcpu].inst_length = 0; 288 289 mt_vmm_info[newcpu].mt_ctx = ctx; 290 mt_vmm_info[newcpu].mt_vcpu = newcpu; 291 292 error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 293 fbsdrun_start_thread, &mt_vmm_info[newcpu]); 294 assert(error == 0); 295 } 296 297 static int 298 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 299 { 300 301 if (!CPU_ISSET(vcpu, &cpumask)) { 302 fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 303 exit(1); 304 } 305 306 CPU_CLR_ATOMIC(vcpu, &cpumask); 307 return (CPU_EMPTY(&cpumask)); 308 } 309 310 static int 311 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 312 uint32_t eax) 313 { 314 #if BHYVE_DEBUG 315 /* 316 * put guest-driven debug here 317 */ 318 #endif 319 return (VMEXIT_CONTINUE); 320 } 321 322 static int 323 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 324 { 325 int error; 326 int bytes, port, in, out; 327 int vcpu; 328 329 vcpu = *pvcpu; 330 331 port = vme->u.inout.port; 332 bytes = vme->u.inout.bytes; 333 in = vme->u.inout.in; 334 out = !in; 335 336 /* Extra-special case of host notifications */ 337 if (out && port == GUEST_NIO_PORT) { 338 error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 339 return (error); 340 } 341 342 error = emulate_inout(ctx, vcpu, vme, strictio); 343 if (error) { 344 fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 345 in ? "in" : "out", 346 bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 347 port, vmexit->rip); 348 return (VMEXIT_ABORT); 349 } else { 350 return (VMEXIT_CONTINUE); 351 } 352 } 353 354 static int 355 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 356 { 357 uint64_t val; 358 uint32_t eax, edx; 359 int error; 360 361 val = 0; 362 error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 363 if (error != 0) { 364 fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 365 vme->u.msr.code, *pvcpu); 366 if (strictmsr) { 367 vm_inject_gp(ctx, *pvcpu); 368 return (VMEXIT_CONTINUE); 369 } 370 } 371 372 eax = val; 373 error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 374 assert(error == 0); 375 376 edx = val >> 32; 377 error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 378 assert(error == 0); 379 380 return (VMEXIT_CONTINUE); 381 } 382 383 static int 384 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 385 { 386 int error; 387 388 error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 389 if (error != 0) { 390 fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 391 vme->u.msr.code, vme->u.msr.wval, *pvcpu); 392 if (strictmsr) { 393 vm_inject_gp(ctx, *pvcpu); 394 return (VMEXIT_CONTINUE); 395 } 396 } 397 return (VMEXIT_CONTINUE); 398 } 399 400 static int 401 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 402 { 403 404 (void)spinup_ap(ctx, *pvcpu, 405 vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 406 407 return (VMEXIT_CONTINUE); 408 } 409 410 #define DEBUG_EPT_MISCONFIG 411 #ifdef DEBUG_EPT_MISCONFIG 412 #define EXIT_REASON_EPT_MISCONFIG 49 413 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 414 #define VMCS_IDENT(x) ((x) | 0x80000000) 415 416 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 417 static int ept_misconfig_ptenum; 418 #endif 419 420 static int 421 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 422 { 423 424 fprintf(stderr, "vm exit[%d]\n", *pvcpu); 425 fprintf(stderr, "\treason\t\tVMX\n"); 426 fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 427 fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 428 fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 429 fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 430 fprintf(stderr, "\tqualification\t0x%016lx\n", 431 vmexit->u.vmx.exit_qualification); 432 fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 433 fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 434 #ifdef DEBUG_EPT_MISCONFIG 435 if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 436 vm_get_register(ctx, *pvcpu, 437 VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 438 &ept_misconfig_gpa); 439 vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 440 &ept_misconfig_ptenum); 441 fprintf(stderr, "\tEPT misconfiguration:\n"); 442 fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 443 fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 444 ept_misconfig_ptenum, ept_misconfig_pte[0], 445 ept_misconfig_pte[1], ept_misconfig_pte[2], 446 ept_misconfig_pte[3]); 447 } 448 #endif /* DEBUG_EPT_MISCONFIG */ 449 return (VMEXIT_ABORT); 450 } 451 452 static int 453 vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 454 { 455 456 fprintf(stderr, "vm exit[%d]\n", *pvcpu); 457 fprintf(stderr, "\treason\t\tSVM\n"); 458 fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 459 fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 460 fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 461 fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 462 fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 463 return (VMEXIT_ABORT); 464 } 465 466 static int 467 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 468 { 469 470 assert(vmexit->inst_length == 0); 471 472 stats.vmexit_bogus++; 473 474 return (VMEXIT_CONTINUE); 475 } 476 477 static int 478 vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 479 { 480 481 assert(vmexit->inst_length == 0); 482 483 stats.vmexit_reqidle++; 484 485 return (VMEXIT_CONTINUE); 486 } 487 488 static int 489 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 490 { 491 492 stats.vmexit_hlt++; 493 494 /* 495 * Just continue execution with the next instruction. We use 496 * the HLT VM exit as a way to be friendly with the host 497 * scheduler. 498 */ 499 return (VMEXIT_CONTINUE); 500 } 501 502 static int 503 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 504 { 505 506 stats.vmexit_pause++; 507 508 return (VMEXIT_CONTINUE); 509 } 510 511 static int 512 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 513 { 514 515 assert(vmexit->inst_length == 0); 516 517 stats.vmexit_mtrap++; 518 519 return (VMEXIT_CONTINUE); 520 } 521 522 static int 523 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 524 { 525 int err, i; 526 struct vie *vie; 527 528 stats.vmexit_inst_emul++; 529 530 vie = &vmexit->u.inst_emul.vie; 531 err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 532 vie, &vmexit->u.inst_emul.paging); 533 534 if (err) { 535 if (err == ESRCH) { 536 fprintf(stderr, "Unhandled memory access to 0x%lx\n", 537 vmexit->u.inst_emul.gpa); 538 } 539 540 fprintf(stderr, "Failed to emulate instruction ["); 541 for (i = 0; i < vie->num_valid; i++) { 542 fprintf(stderr, "0x%02x%s", vie->inst[i], 543 i != (vie->num_valid - 1) ? " " : ""); 544 } 545 fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 546 return (VMEXIT_ABORT); 547 } 548 549 return (VMEXIT_CONTINUE); 550 } 551 552 static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 553 static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 554 555 static int 556 vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 557 { 558 enum vm_suspend_how how; 559 560 how = vmexit->u.suspended.how; 561 562 fbsdrun_deletecpu(ctx, *pvcpu); 563 564 if (*pvcpu != BSP) { 565 pthread_mutex_lock(&resetcpu_mtx); 566 pthread_cond_signal(&resetcpu_cond); 567 pthread_mutex_unlock(&resetcpu_mtx); 568 pthread_exit(NULL); 569 } 570 571 pthread_mutex_lock(&resetcpu_mtx); 572 while (!CPU_EMPTY(&cpumask)) { 573 pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 574 } 575 pthread_mutex_unlock(&resetcpu_mtx); 576 577 switch (how) { 578 case VM_SUSPEND_RESET: 579 exit(0); 580 case VM_SUSPEND_POWEROFF: 581 exit(1); 582 case VM_SUSPEND_HALT: 583 exit(2); 584 case VM_SUSPEND_TRIPLEFAULT: 585 exit(3); 586 default: 587 fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 588 exit(100); 589 } 590 return (0); /* NOTREACHED */ 591 } 592 593 static vmexit_handler_t handler[VM_EXITCODE_MAX] = { 594 [VM_EXITCODE_INOUT] = vmexit_inout, 595 [VM_EXITCODE_INOUT_STR] = vmexit_inout, 596 [VM_EXITCODE_VMX] = vmexit_vmx, 597 [VM_EXITCODE_SVM] = vmexit_svm, 598 [VM_EXITCODE_BOGUS] = vmexit_bogus, 599 [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 600 [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 601 [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 602 [VM_EXITCODE_MTRAP] = vmexit_mtrap, 603 [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 604 [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 605 [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 606 [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 607 }; 608 609 static void 610 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 611 { 612 int error, rc; 613 enum vm_exitcode exitcode; 614 cpuset_t active_cpus; 615 616 if (vcpumap[vcpu] != NULL) { 617 error = pthread_setaffinity_np(pthread_self(), 618 sizeof(cpuset_t), vcpumap[vcpu]); 619 assert(error == 0); 620 } 621 622 error = vm_active_cpus(ctx, &active_cpus); 623 assert(CPU_ISSET(vcpu, &active_cpus)); 624 625 error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 626 assert(error == 0); 627 628 while (1) { 629 error = vm_run(ctx, vcpu, &vmexit[vcpu]); 630 if (error != 0) 631 break; 632 633 exitcode = vmexit[vcpu].exitcode; 634 if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 635 fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 636 exitcode); 637 exit(1); 638 } 639 640 rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 641 642 switch (rc) { 643 case VMEXIT_CONTINUE: 644 break; 645 case VMEXIT_ABORT: 646 abort(); 647 default: 648 exit(1); 649 } 650 } 651 fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 652 } 653 654 static int 655 num_vcpus_allowed(struct vmctx *ctx) 656 { 657 int tmp, error; 658 659 error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 660 661 /* 662 * The guest is allowed to spinup more than one processor only if the 663 * UNRESTRICTED_GUEST capability is available. 664 */ 665 if (error == 0) 666 return (VM_MAXCPU); 667 else 668 return (1); 669 } 670 671 void 672 fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 673 { 674 int err, tmp; 675 676 if (fbsdrun_vmexit_on_hlt()) { 677 err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 678 if (err < 0) { 679 fprintf(stderr, "VM exit on HLT not supported\n"); 680 exit(1); 681 } 682 vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 683 if (cpu == BSP) 684 handler[VM_EXITCODE_HLT] = vmexit_hlt; 685 } 686 687 if (fbsdrun_vmexit_on_pause()) { 688 /* 689 * pause exit support required for this mode 690 */ 691 err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 692 if (err < 0) { 693 fprintf(stderr, 694 "SMP mux requested, no pause support\n"); 695 exit(1); 696 } 697 vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 698 if (cpu == BSP) 699 handler[VM_EXITCODE_PAUSE] = vmexit_pause; 700 } 701 702 if (x2apic_mode) 703 err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 704 else 705 err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 706 707 if (err) { 708 fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 709 exit(1); 710 } 711 712 vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 713 } 714 715 static struct vmctx * 716 do_open(const char *vmname) 717 { 718 struct vmctx *ctx; 719 int error; 720 bool reinit, romboot; 721 #ifndef WITHOUT_CAPSICUM 722 cap_rights_t rights; 723 const cap_ioctl_t *cmds; 724 size_t ncmds; 725 #endif 726 727 reinit = romboot = false; 728 729 if (lpc_bootrom()) 730 romboot = true; 731 732 error = vm_create(vmname); 733 if (error) { 734 if (errno == EEXIST) { 735 if (romboot) { 736 reinit = true; 737 } else { 738 /* 739 * The virtual machine has been setup by the 740 * userspace bootloader. 741 */ 742 } 743 } else { 744 perror("vm_create"); 745 exit(1); 746 } 747 } else { 748 if (!romboot) { 749 /* 750 * If the virtual machine was just created then a 751 * bootrom must be configured to boot it. 752 */ 753 fprintf(stderr, "virtual machine cannot be booted\n"); 754 exit(1); 755 } 756 } 757 758 ctx = vm_open(vmname); 759 if (ctx == NULL) { 760 perror("vm_open"); 761 exit(1); 762 } 763 764 #ifndef WITHOUT_CAPSICUM 765 cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 766 if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 && 767 errno != ENOSYS) 768 errx(EX_OSERR, "Unable to apply rights for sandbox"); 769 vm_get_ioctls(&ncmds); 770 cmds = vm_get_ioctls(NULL); 771 if (cmds == NULL) 772 errx(EX_OSERR, "out of memory"); 773 if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 && 774 errno != ENOSYS) 775 errx(EX_OSERR, "Unable to apply rights for sandbox"); 776 free((cap_ioctl_t *)cmds); 777 #endif 778 779 if (reinit) { 780 error = vm_reinit(ctx); 781 if (error) { 782 perror("vm_reinit"); 783 exit(1); 784 } 785 } 786 return (ctx); 787 } 788 789 int 790 main(int argc, char *argv[]) 791 { 792 int c, error, gdb_port, err, bvmcons; 793 int max_vcpus, mptgen, memflags; 794 int rtc_localtime; 795 struct vmctx *ctx; 796 uint64_t rip; 797 size_t memsize; 798 char *optstr; 799 800 bvmcons = 0; 801 progname = basename(argv[0]); 802 gdb_port = 0; 803 guest_ncpus = 1; 804 memsize = 256 * MB; 805 mptgen = 1; 806 rtc_localtime = 1; 807 memflags = 0; 808 809 optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 810 while ((c = getopt(argc, argv, optstr)) != -1) { 811 switch (c) { 812 case 'a': 813 x2apic_mode = 0; 814 break; 815 case 'A': 816 acpi = 1; 817 break; 818 case 'b': 819 bvmcons = 1; 820 break; 821 case 'p': 822 if (pincpu_parse(optarg) != 0) { 823 errx(EX_USAGE, "invalid vcpu pinning " 824 "configuration '%s'", optarg); 825 } 826 break; 827 case 'c': 828 guest_ncpus = atoi(optarg); 829 break; 830 case 'C': 831 memflags |= VM_MEM_F_INCORE; 832 break; 833 case 'g': 834 gdb_port = atoi(optarg); 835 break; 836 case 'l': 837 if (lpc_device_parse(optarg) != 0) { 838 errx(EX_USAGE, "invalid lpc device " 839 "configuration '%s'", optarg); 840 } 841 break; 842 case 's': 843 if (pci_parse_slot(optarg) != 0) 844 exit(1); 845 else 846 break; 847 case 'S': 848 memflags |= VM_MEM_F_WIRED; 849 break; 850 case 'm': 851 error = vm_parse_memsize(optarg, &memsize); 852 if (error) 853 errx(EX_USAGE, "invalid memsize '%s'", optarg); 854 break; 855 case 'H': 856 guest_vmexit_on_hlt = 1; 857 break; 858 case 'I': 859 /* 860 * The "-I" option was used to add an ioapic to the 861 * virtual machine. 862 * 863 * An ioapic is now provided unconditionally for each 864 * virtual machine and this option is now deprecated. 865 */ 866 break; 867 case 'P': 868 guest_vmexit_on_pause = 1; 869 break; 870 case 'e': 871 strictio = 1; 872 break; 873 case 'u': 874 rtc_localtime = 0; 875 break; 876 case 'U': 877 guest_uuid_str = optarg; 878 break; 879 case 'w': 880 strictmsr = 0; 881 break; 882 case 'W': 883 virtio_msix = 0; 884 break; 885 case 'x': 886 x2apic_mode = 1; 887 break; 888 case 'Y': 889 mptgen = 0; 890 break; 891 case 'h': 892 usage(0); 893 default: 894 usage(1); 895 } 896 } 897 argc -= optind; 898 argv += optind; 899 900 if (argc != 1) 901 usage(1); 902 903 vmname = argv[0]; 904 ctx = do_open(vmname); 905 906 if (guest_ncpus < 1) { 907 fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 908 exit(1); 909 } 910 911 max_vcpus = num_vcpus_allowed(ctx); 912 if (guest_ncpus > max_vcpus) { 913 fprintf(stderr, "%d vCPUs requested but only %d available\n", 914 guest_ncpus, max_vcpus); 915 exit(1); 916 } 917 918 fbsdrun_set_capabilities(ctx, BSP); 919 920 vm_set_memflags(ctx, memflags); 921 err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 922 if (err) { 923 fprintf(stderr, "Unable to setup memory (%d)\n", errno); 924 exit(1); 925 } 926 927 error = init_msr(); 928 if (error) { 929 fprintf(stderr, "init_msr error %d", error); 930 exit(1); 931 } 932 933 init_mem(); 934 init_inout(); 935 atkbdc_init(ctx); 936 pci_irq_init(ctx); 937 ioapic_init(ctx); 938 939 rtc_init(ctx, rtc_localtime); 940 sci_init(ctx); 941 942 /* 943 * Exit if a device emulation finds an error in its initilization 944 */ 945 if (init_pci(ctx) != 0) 946 exit(1); 947 948 if (gdb_port != 0) 949 init_dbgport(gdb_port); 950 951 if (bvmcons) 952 init_bvmcons(); 953 954 if (lpc_bootrom()) { 955 if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 956 fprintf(stderr, "ROM boot failed: unrestricted guest " 957 "capability not available\n"); 958 exit(1); 959 } 960 error = vcpu_reset(ctx, BSP); 961 assert(error == 0); 962 } 963 964 error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 965 assert(error == 0); 966 967 /* 968 * build the guest tables, MP etc. 969 */ 970 if (mptgen) { 971 error = mptable_build(ctx, guest_ncpus); 972 if (error) 973 exit(1); 974 } 975 976 error = smbios_build(ctx); 977 assert(error == 0); 978 979 if (acpi) { 980 error = acpi_build(ctx, guest_ncpus); 981 assert(error == 0); 982 } 983 984 if (lpc_bootrom()) 985 fwctl_init(); 986 987 #ifndef WITHOUT_CAPSICUM 988 caph_cache_catpages(); 989 990 if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) 991 errx(EX_OSERR, "Unable to apply rights for sandbox"); 992 993 if (cap_enter() == -1 && errno != ENOSYS) 994 errx(EX_OSERR, "cap_enter() failed"); 995 #endif 996 997 /* 998 * Change the proc title to include the VM name. 999 */ 1000 setproctitle("%s", vmname); 1001 1002 /* 1003 * Add CPU 0 1004 */ 1005 fbsdrun_addcpu(ctx, BSP, BSP, rip); 1006 1007 /* 1008 * Head off to the main event dispatch loop 1009 */ 1010 mevent_dispatch(); 1011 1012 exit(1); 1013 } 1014