1e285ef8dSPeter Grehan /*- 21de7b4b8SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 31de7b4b8SPedro F. Giffuni * 4e285ef8dSPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5e285ef8dSPeter Grehan * All rights reserved. 6e285ef8dSPeter Grehan * 7e285ef8dSPeter Grehan * Redistribution and use in source and binary forms, with or without 8e285ef8dSPeter Grehan * modification, are permitted provided that the following conditions 9e285ef8dSPeter Grehan * are met: 10e285ef8dSPeter Grehan * 1. Redistributions of source code must retain the above copyright 11e285ef8dSPeter Grehan * notice, this list of conditions and the following disclaimer. 12e285ef8dSPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13e285ef8dSPeter Grehan * notice, this list of conditions and the following disclaimer in the 14e285ef8dSPeter Grehan * documentation and/or other materials provided with the distribution. 15e285ef8dSPeter Grehan * 16e285ef8dSPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17e285ef8dSPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18e285ef8dSPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19e285ef8dSPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20e285ef8dSPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21e285ef8dSPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22e285ef8dSPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23e285ef8dSPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24e285ef8dSPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25e285ef8dSPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26e285ef8dSPeter Grehan * SUCH DAMAGE. 27e285ef8dSPeter Grehan * 28e285ef8dSPeter Grehan * $FreeBSD$ 29e285ef8dSPeter Grehan */ 30e285ef8dSPeter Grehan 31e285ef8dSPeter Grehan #include <sys/cdefs.h> 32e285ef8dSPeter Grehan __FBSDID("$FreeBSD$"); 33e285ef8dSPeter Grehan 34e285ef8dSPeter Grehan #include <sys/types.h> 3500ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 3600ef17beSBartek Rutkowski #include <sys/capsicum.h> 3700ef17beSBartek Rutkowski #endif 38e285ef8dSPeter Grehan #include <sys/mman.h> 39483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 40483d953aSJohn Baldwin #include <sys/socket.h> 41483d953aSJohn Baldwin #include <sys/stat.h> 42483d953aSJohn Baldwin #endif 43e285ef8dSPeter Grehan #include <sys/time.h> 44483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 45483d953aSJohn Baldwin #include <sys/un.h> 46483d953aSJohn Baldwin #endif 47e285ef8dSPeter Grehan 488d56c805SYuri Pankov #include <amd64/vmm/intel/vmcs.h> 498d56c805SYuri Pankov 501c052192SNeel Natu #include <machine/atomic.h> 51e285ef8dSPeter Grehan #include <machine/segments.h> 52e285ef8dSPeter Grehan 5300ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 5400ef17beSBartek Rutkowski #include <capsicum_helpers.h> 5500ef17beSBartek Rutkowski #endif 56e285ef8dSPeter Grehan #include <stdio.h> 57e285ef8dSPeter Grehan #include <stdlib.h> 58b5331f4dSNeel Natu #include <string.h> 59200758f1SNeel Natu #include <err.h> 6000ef17beSBartek Rutkowski #include <errno.h> 61483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 62483d953aSJohn Baldwin #include <fcntl.h> 63483d953aSJohn Baldwin #endif 64e285ef8dSPeter Grehan #include <libgen.h> 65e285ef8dSPeter Grehan #include <unistd.h> 66e285ef8dSPeter Grehan #include <assert.h> 67e285ef8dSPeter Grehan #include <pthread.h> 68e285ef8dSPeter Grehan #include <pthread_np.h> 69200758f1SNeel Natu #include <sysexits.h> 709b1aa8d6SNeel Natu #include <stdbool.h> 7101d822d3SRodney W. Grimes #include <stdint.h> 72483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 73483d953aSJohn Baldwin #include <ucl.h> 74483d953aSJohn Baldwin #include <unistd.h> 75483d953aSJohn Baldwin 76483d953aSJohn Baldwin #include <libxo/xo.h> 77483d953aSJohn Baldwin #endif 78e285ef8dSPeter Grehan 79e285ef8dSPeter Grehan #include <machine/vmm.h> 8000ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 8100ef17beSBartek Rutkowski #include <machine/vmm_dev.h> 8200ef17beSBartek Rutkowski #endif 834daa95f8SConrad Meyer #include <machine/vmm_instruction_emul.h> 84e285ef8dSPeter Grehan #include <vmmapi.h> 85e285ef8dSPeter Grehan 86e285ef8dSPeter Grehan #include "bhyverun.h" 87e285ef8dSPeter Grehan #include "acpi.h" 882cf9911fSPeter Grehan #include "atkbdc.h" 89bb30b08eSConrad Meyer #include "bootrom.h" 90*621b5090SJohn Baldwin #include "config.h" 91e285ef8dSPeter Grehan #include "inout.h" 9252c39ee6SConrad Meyer #include "debug.h" 9388ac6958SPeter Grehan #include "fwctl.h" 94cd377eb3SJohn Baldwin #include "gdb.h" 953cbf3585SJohn Baldwin #include "ioapic.h" 968a68ae80SConrad Meyer #include "kernemu_dev.h" 97e285ef8dSPeter Grehan #include "mem.h" 98e285ef8dSPeter Grehan #include "mevent.h" 99e285ef8dSPeter Grehan #include "mptbl.h" 100e285ef8dSPeter Grehan #include "pci_emul.h" 101b3e9732aSJohn Baldwin #include "pci_irq.h" 102ea7f1c8cSNeel Natu #include "pci_lpc.h" 103af5bfc53STycho Nightingale #include "smbiostbl.h" 104483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 105483d953aSJohn Baldwin #include "snapshot.h" 106483d953aSJohn Baldwin #endif 107e285ef8dSPeter Grehan #include "xmsr.h" 108e285ef8dSPeter Grehan #include "spinup_ap.h" 1099d6be09fSPeter Grehan #include "rtc.h" 1109cb339ccSConrad Meyer #include "vmgenc.h" 111e285ef8dSPeter Grehan 112e285ef8dSPeter Grehan #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 113e285ef8dSPeter Grehan 114e285ef8dSPeter Grehan #define MB (1024UL * 1024) 115e285ef8dSPeter Grehan #define GB (1024UL * MB) 116e285ef8dSPeter Grehan 1178d56c805SYuri Pankov static const char * const vmx_exit_reason_desc[] = { 1188d56c805SYuri Pankov [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", 1198d56c805SYuri Pankov [EXIT_REASON_EXT_INTR] = "External interrupt", 1208d56c805SYuri Pankov [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", 1218d56c805SYuri Pankov [EXIT_REASON_INIT] = "INIT signal", 1228d56c805SYuri Pankov [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", 1238d56c805SYuri Pankov [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", 1248d56c805SYuri Pankov [EXIT_REASON_SMI] = "Other SMI", 1258d56c805SYuri Pankov [EXIT_REASON_INTR_WINDOW] = "Interrupt window", 1268d56c805SYuri Pankov [EXIT_REASON_NMI_WINDOW] = "NMI window", 1278d56c805SYuri Pankov [EXIT_REASON_TASK_SWITCH] = "Task switch", 1288d56c805SYuri Pankov [EXIT_REASON_CPUID] = "CPUID", 1298d56c805SYuri Pankov [EXIT_REASON_GETSEC] = "GETSEC", 1308d56c805SYuri Pankov [EXIT_REASON_HLT] = "HLT", 1318d56c805SYuri Pankov [EXIT_REASON_INVD] = "INVD", 1328d56c805SYuri Pankov [EXIT_REASON_INVLPG] = "INVLPG", 1338d56c805SYuri Pankov [EXIT_REASON_RDPMC] = "RDPMC", 1348d56c805SYuri Pankov [EXIT_REASON_RDTSC] = "RDTSC", 1358d56c805SYuri Pankov [EXIT_REASON_RSM] = "RSM", 1368d56c805SYuri Pankov [EXIT_REASON_VMCALL] = "VMCALL", 1378d56c805SYuri Pankov [EXIT_REASON_VMCLEAR] = "VMCLEAR", 1388d56c805SYuri Pankov [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", 1398d56c805SYuri Pankov [EXIT_REASON_VMPTRLD] = "VMPTRLD", 1408d56c805SYuri Pankov [EXIT_REASON_VMPTRST] = "VMPTRST", 1418d56c805SYuri Pankov [EXIT_REASON_VMREAD] = "VMREAD", 1428d56c805SYuri Pankov [EXIT_REASON_VMRESUME] = "VMRESUME", 1438d56c805SYuri Pankov [EXIT_REASON_VMWRITE] = "VMWRITE", 1448d56c805SYuri Pankov [EXIT_REASON_VMXOFF] = "VMXOFF", 1458d56c805SYuri Pankov [EXIT_REASON_VMXON] = "VMXON", 1468d56c805SYuri Pankov [EXIT_REASON_CR_ACCESS] = "Control-register accesses", 1478d56c805SYuri Pankov [EXIT_REASON_DR_ACCESS] = "MOV DR", 1488d56c805SYuri Pankov [EXIT_REASON_INOUT] = "I/O instruction", 1498d56c805SYuri Pankov [EXIT_REASON_RDMSR] = "RDMSR", 1508d56c805SYuri Pankov [EXIT_REASON_WRMSR] = "WRMSR", 1518d56c805SYuri Pankov [EXIT_REASON_INVAL_VMCS] = 1528d56c805SYuri Pankov "VM-entry failure due to invalid guest state", 1538d56c805SYuri Pankov [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", 1548d56c805SYuri Pankov [EXIT_REASON_MWAIT] = "MWAIT", 1558d56c805SYuri Pankov [EXIT_REASON_MTF] = "Monitor trap flag", 1568d56c805SYuri Pankov [EXIT_REASON_MONITOR] = "MONITOR", 1578d56c805SYuri Pankov [EXIT_REASON_PAUSE] = "PAUSE", 1588d56c805SYuri Pankov [EXIT_REASON_MCE_DURING_ENTRY] = 1598d56c805SYuri Pankov "VM-entry failure due to machine-check event", 1608d56c805SYuri Pankov [EXIT_REASON_TPR] = "TPR below threshold", 1618d56c805SYuri Pankov [EXIT_REASON_APIC_ACCESS] = "APIC access", 1628d56c805SYuri Pankov [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", 1638d56c805SYuri Pankov [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", 1648d56c805SYuri Pankov [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", 1658d56c805SYuri Pankov [EXIT_REASON_EPT_FAULT] = "EPT violation", 1668d56c805SYuri Pankov [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", 1678d56c805SYuri Pankov [EXIT_REASON_INVEPT] = "INVEPT", 1688d56c805SYuri Pankov [EXIT_REASON_RDTSCP] = "RDTSCP", 1698d56c805SYuri Pankov [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", 1708d56c805SYuri Pankov [EXIT_REASON_INVVPID] = "INVVPID", 1718d56c805SYuri Pankov [EXIT_REASON_WBINVD] = "WBINVD", 1728d56c805SYuri Pankov [EXIT_REASON_XSETBV] = "XSETBV", 1738d56c805SYuri Pankov [EXIT_REASON_APIC_WRITE] = "APIC write", 1748d56c805SYuri Pankov [EXIT_REASON_RDRAND] = "RDRAND", 1758d56c805SYuri Pankov [EXIT_REASON_INVPCID] = "INVPCID", 1768d56c805SYuri Pankov [EXIT_REASON_VMFUNC] = "VMFUNC", 1778d56c805SYuri Pankov [EXIT_REASON_ENCLS] = "ENCLS", 1788d56c805SYuri Pankov [EXIT_REASON_RDSEED] = "RDSEED", 1798d56c805SYuri Pankov [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", 1808d56c805SYuri Pankov [EXIT_REASON_XSAVES] = "XSAVES", 1818d56c805SYuri Pankov [EXIT_REASON_XRSTORS] = "XRSTORS" 1828d56c805SYuri Pankov }; 1838d56c805SYuri Pankov 184e285ef8dSPeter Grehan typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 1853d5444c8SNeel Natu extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 186e285ef8dSPeter Grehan 187e285ef8dSPeter Grehan int guest_ncpus; 18801d822d3SRodney W. Grimes uint16_t cores, maxcpus, sockets, threads; 18901d822d3SRodney W. Grimes 190332eff95SVincenzo Maffione int raw_stdio = 0; 191332eff95SVincenzo Maffione 192e285ef8dSPeter Grehan static char *progname; 193e285ef8dSPeter Grehan static const int BSP = 0; 194e285ef8dSPeter Grehan 1950826d045SNeel Natu static cpuset_t cpumask; 196e285ef8dSPeter Grehan 197e285ef8dSPeter Grehan static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 198e285ef8dSPeter Grehan 199d37f2adbSNeel Natu static struct vm_exit vmexit[VM_MAXCPU]; 200e285ef8dSPeter Grehan 20194c3b3bfSPeter Grehan struct bhyvestats { 202e285ef8dSPeter Grehan uint64_t vmexit_bogus; 203248e6799SNeel Natu uint64_t vmexit_reqidle; 204e285ef8dSPeter Grehan uint64_t vmexit_hlt; 205e285ef8dSPeter Grehan uint64_t vmexit_pause; 206e285ef8dSPeter Grehan uint64_t vmexit_mtrap; 207318224bbSNeel Natu uint64_t vmexit_inst_emul; 208e285ef8dSPeter Grehan uint64_t cpu_switch_rotate; 209e285ef8dSPeter Grehan uint64_t cpu_switch_direct; 210e285ef8dSPeter Grehan } stats; 211e285ef8dSPeter Grehan 212e285ef8dSPeter Grehan struct mt_vmm_info { 213e285ef8dSPeter Grehan pthread_t mt_thr; 214e285ef8dSPeter Grehan struct vmctx *mt_ctx; 215e285ef8dSPeter Grehan int mt_vcpu; 216e285ef8dSPeter Grehan } mt_vmm_info[VM_MAXCPU]; 217e285ef8dSPeter Grehan 2189b6155a2SNeel Natu static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 2199b6155a2SNeel Natu 220e285ef8dSPeter Grehan static void 221e285ef8dSPeter Grehan usage(int code) 222e285ef8dSPeter Grehan { 223e285ef8dSPeter Grehan 224e285ef8dSPeter Grehan fprintf(stderr, 225c4df8cbfSRobert Wing "Usage: %s [-aehuwxACDHPSWY]\n" 22601d822d3SRodney W. Grimes " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" 227*621b5090SJohn Baldwin " %*s [-k <file>] [-l <lpc>] [-m mem] [-o <var>=<value>]\n" 228*621b5090SJohn Baldwin " %*s [-p vcpu:hostcpu] [-s <pci>] [-U uuid] [<vm>]\n" 22952e5c8a2SNeel Natu " -a: local apic is in xAPIC mode (deprecated)\n" 2305749449dSJohn Baldwin " -A: create ACPI tables\n" 231d96ee3e0SRodney W. Grimes " -c: number of cpus and/or topology specification\n" 2320dd10c00SNeel Natu " -C: include guest memory in core file\n" 2330a1016f9SPawel Biernacki " -D: destroy on power-off\n" 234b5331f4dSNeel Natu " -e: exit on unhandled I/O access\n" 235e285ef8dSPeter Grehan " -h: help\n" 236cde1f5b8SJohn Baldwin " -H: vmexit from the guest on hlt\n" 237*621b5090SJohn Baldwin " -k: key=value flat config file\n" 238ea7f1c8cSNeel Natu " -l: LPC device configuration\n" 2392cf9911fSPeter Grehan " -m: memory size in MB\n" 240*621b5090SJohn Baldwin " -o: set config 'var' to 'value'\n" 241*621b5090SJohn Baldwin " -p: pin 'vcpu' to 'hostcpu'\n" 242*621b5090SJohn Baldwin " -P: vmexit from the guest on pause\n" 243483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 244483d953aSJohn Baldwin " -r: path to checkpoint file\n" 245483d953aSJohn Baldwin #endif 246cde1f5b8SJohn Baldwin " -s: <slot,driver,configinfo> PCI slot config\n" 2479b1aa8d6SNeel Natu " -S: guest memory cannot be swapped\n" 248c9747678SNeel Natu " -u: RTC keeps UTC time\n" 249cde1f5b8SJohn Baldwin " -U: uuid\n" 25052e5c8a2SNeel Natu " -w: ignore unimplemented MSRs\n" 251cde1f5b8SJohn Baldwin " -W: force virtio to use single-vector MSI\n" 252af5bfc53STycho Nightingale " -x: local apic is in x2APIC mode\n" 253cde1f5b8SJohn Baldwin " -Y: disable MPtable generation\n", 2544edc7f41SMarcelo Araujo progname, (int)strlen(progname), "", (int)strlen(progname), "", 2554edc7f41SMarcelo Araujo (int)strlen(progname), ""); 25694c3b3bfSPeter Grehan 257e285ef8dSPeter Grehan exit(code); 258e285ef8dSPeter Grehan } 259e285ef8dSPeter Grehan 26001d822d3SRodney W. Grimes /* 26101d822d3SRodney W. Grimes * XXX This parser is known to have the following issues: 262*621b5090SJohn Baldwin * 1. It accepts null key=value tokens ",," as setting "cpus" to an 263*621b5090SJohn Baldwin * empty string. 26401d822d3SRodney W. Grimes * 26501d822d3SRodney W. Grimes * The acceptance of a null specification ('-c ""') is by design to match the 26601d822d3SRodney W. Grimes * manual page syntax specification, this results in a topology of 1 vCPU. 26701d822d3SRodney W. Grimes */ 26801d822d3SRodney W. Grimes static int 26901d822d3SRodney W. Grimes topology_parse(const char *opt) 27001d822d3SRodney W. Grimes { 27101d822d3SRodney W. Grimes char *cp, *str; 27201d822d3SRodney W. Grimes 273*621b5090SJohn Baldwin if (*opt == '\0') { 274*621b5090SJohn Baldwin set_config_value("sockets", "1"); 275*621b5090SJohn Baldwin set_config_value("cores", "1"); 276*621b5090SJohn Baldwin set_config_value("threads", "1"); 277*621b5090SJohn Baldwin set_config_value("cpus", "1"); 278*621b5090SJohn Baldwin return (0); 279*621b5090SJohn Baldwin } 280*621b5090SJohn Baldwin 28101d822d3SRodney W. Grimes str = strdup(opt); 282635a2c89SMarcelo Araujo if (str == NULL) 283*621b5090SJohn Baldwin errx(4, "Failed to allocate memory"); 28401d822d3SRodney W. Grimes 28501d822d3SRodney W. Grimes while ((cp = strsep(&str, ",")) != NULL) { 286*621b5090SJohn Baldwin if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) 287*621b5090SJohn Baldwin set_config_value("cpus", cp + strlen("cpus=")); 288*621b5090SJohn Baldwin else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0) 289*621b5090SJohn Baldwin set_config_value("sockets", cp + strlen("sockets=")); 290*621b5090SJohn Baldwin else if (strncmp(cp, "cores=", strlen("cores=")) == 0) 291*621b5090SJohn Baldwin set_config_value("cores", cp + strlen("cores=")); 292*621b5090SJohn Baldwin else if (strncmp(cp, "threads=", strlen("threads=")) == 0) 293*621b5090SJohn Baldwin set_config_value("threads", cp + strlen("threads=")); 29401d822d3SRodney W. Grimes #ifdef notyet /* Do not expose this until vmm.ko implements it */ 295*621b5090SJohn Baldwin else if (strncmp(cp, "maxcpus=", strlen("maxcpus=")) == 0) 296*621b5090SJohn Baldwin set_config_value("maxcpus", cp + strlen("maxcpus=")); 29701d822d3SRodney W. Grimes #endif 298*621b5090SJohn Baldwin else if (strchr(cp, '=') != NULL) 299*621b5090SJohn Baldwin goto out; 30001d822d3SRodney W. Grimes else 301*621b5090SJohn Baldwin set_config_value("cpus", cp); 30201d822d3SRodney W. Grimes } 303ea089f8cSMarcelo Araujo free(str); 30401d822d3SRodney W. Grimes return (0); 305ea089f8cSMarcelo Araujo 306ea089f8cSMarcelo Araujo out: 307ea089f8cSMarcelo Araujo free(str); 308ea089f8cSMarcelo Araujo return (-1); 30901d822d3SRodney W. Grimes } 31001d822d3SRodney W. Grimes 3119b6155a2SNeel Natu static int 312*621b5090SJohn Baldwin parse_int_value(const char *key, const char *value, int minval, int maxval) 313*621b5090SJohn Baldwin { 314*621b5090SJohn Baldwin char *cp; 315*621b5090SJohn Baldwin long lval; 316*621b5090SJohn Baldwin 317*621b5090SJohn Baldwin errno = 0; 318*621b5090SJohn Baldwin lval = strtol(value, &cp, 0); 319*621b5090SJohn Baldwin if (errno != 0 || *cp != '\0' || cp == value || lval < minval || 320*621b5090SJohn Baldwin lval > maxval) 321*621b5090SJohn Baldwin errx(4, "Invalid value for %s: '%s'", key, value); 322*621b5090SJohn Baldwin return (lval); 323*621b5090SJohn Baldwin } 324*621b5090SJohn Baldwin 325*621b5090SJohn Baldwin /* 326*621b5090SJohn Baldwin * Set the sockets, cores, threads, and guest_cpus variables based on 327*621b5090SJohn Baldwin * the configured topology. 328*621b5090SJohn Baldwin * 329*621b5090SJohn Baldwin * The limits of UINT16_MAX are due to the types passed to 330*621b5090SJohn Baldwin * vm_set_topology(). vmm.ko may enforce tighter limits. 331*621b5090SJohn Baldwin */ 332*621b5090SJohn Baldwin static void 333*621b5090SJohn Baldwin calc_topolopgy(void) 334*621b5090SJohn Baldwin { 335*621b5090SJohn Baldwin const char *value; 336*621b5090SJohn Baldwin bool explicit_cpus; 337*621b5090SJohn Baldwin uint64_t ncpus; 338*621b5090SJohn Baldwin 339*621b5090SJohn Baldwin value = get_config_value("cpus"); 340*621b5090SJohn Baldwin if (value != NULL) { 341*621b5090SJohn Baldwin guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX); 342*621b5090SJohn Baldwin explicit_cpus = true; 343*621b5090SJohn Baldwin } else { 344*621b5090SJohn Baldwin guest_ncpus = 1; 345*621b5090SJohn Baldwin explicit_cpus = false; 346*621b5090SJohn Baldwin } 347*621b5090SJohn Baldwin value = get_config_value("cores"); 348*621b5090SJohn Baldwin if (value != NULL) 349*621b5090SJohn Baldwin cores = parse_int_value("cores", value, 1, UINT16_MAX); 350*621b5090SJohn Baldwin else 351*621b5090SJohn Baldwin cores = 1; 352*621b5090SJohn Baldwin value = get_config_value("threads"); 353*621b5090SJohn Baldwin if (value != NULL) 354*621b5090SJohn Baldwin threads = parse_int_value("threads", value, 1, UINT16_MAX); 355*621b5090SJohn Baldwin else 356*621b5090SJohn Baldwin threads = 1; 357*621b5090SJohn Baldwin value = get_config_value("sockets"); 358*621b5090SJohn Baldwin if (value != NULL) 359*621b5090SJohn Baldwin sockets = parse_int_value("sockets", value, 1, UINT16_MAX); 360*621b5090SJohn Baldwin else 361*621b5090SJohn Baldwin sockets = guest_ncpus; 362*621b5090SJohn Baldwin 363*621b5090SJohn Baldwin /* 364*621b5090SJohn Baldwin * Compute sockets * cores * threads avoiding overflow. The 365*621b5090SJohn Baldwin * range check above insures these are 16 bit values. 366*621b5090SJohn Baldwin */ 367*621b5090SJohn Baldwin ncpus = (uint64_t)sockets * cores * threads; 368*621b5090SJohn Baldwin if (ncpus > UINT16_MAX) 369*621b5090SJohn Baldwin errx(4, "Computed number of vCPUs too high: %ju", 370*621b5090SJohn Baldwin (uintmax_t)ncpus); 371*621b5090SJohn Baldwin 372*621b5090SJohn Baldwin if (explicit_cpus) { 373*621b5090SJohn Baldwin if (guest_ncpus != ncpus) 374*621b5090SJohn Baldwin errx(4, "Topology (%d sockets, %d cores, %d threads) " 375*621b5090SJohn Baldwin "does not match %d vCPUs", sockets, cores, threads, 376*621b5090SJohn Baldwin guest_ncpus); 377*621b5090SJohn Baldwin } else 378*621b5090SJohn Baldwin guest_ncpus = ncpus; 379*621b5090SJohn Baldwin } 380*621b5090SJohn Baldwin 381*621b5090SJohn Baldwin static int 3829b6155a2SNeel Natu pincpu_parse(const char *opt) 3839b6155a2SNeel Natu { 384*621b5090SJohn Baldwin const char *value; 385*621b5090SJohn Baldwin char *newval; 386*621b5090SJohn Baldwin char key[16]; 3879b6155a2SNeel Natu int vcpu, pcpu; 3889b6155a2SNeel Natu 3899b6155a2SNeel Natu if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 3909b6155a2SNeel Natu fprintf(stderr, "invalid format: %s\n", opt); 3919b6155a2SNeel Natu return (-1); 3929b6155a2SNeel Natu } 3939b6155a2SNeel Natu 3949b6155a2SNeel Natu if (vcpu < 0 || vcpu >= VM_MAXCPU) { 3959b6155a2SNeel Natu fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 3969b6155a2SNeel Natu vcpu, VM_MAXCPU - 1); 3979b6155a2SNeel Natu return (-1); 3989b6155a2SNeel Natu } 3999b6155a2SNeel Natu 4009b6155a2SNeel Natu if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 4019b6155a2SNeel Natu fprintf(stderr, "hostcpu '%d' outside valid range from " 4029b6155a2SNeel Natu "0 to %d\n", pcpu, CPU_SETSIZE - 1); 4039b6155a2SNeel Natu return (-1); 4049b6155a2SNeel Natu } 4059b6155a2SNeel Natu 406*621b5090SJohn Baldwin snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 407*621b5090SJohn Baldwin value = get_config_value(key); 408*621b5090SJohn Baldwin 409*621b5090SJohn Baldwin if (asprintf(&newval, "%s%s%d", value != NULL ? value : "", 410*621b5090SJohn Baldwin value != NULL ? "," : "", pcpu) == -1) { 411*621b5090SJohn Baldwin perror("failed to build new cpuset string"); 4129b6155a2SNeel Natu return (-1); 4139b6155a2SNeel Natu } 414*621b5090SJohn Baldwin 415*621b5090SJohn Baldwin set_config_value(key, newval); 416*621b5090SJohn Baldwin free(newval); 417*621b5090SJohn Baldwin return (0); 418*621b5090SJohn Baldwin } 419*621b5090SJohn Baldwin 420*621b5090SJohn Baldwin static void 421*621b5090SJohn Baldwin parse_cpuset(int vcpu, const char *list, cpuset_t *set) 422*621b5090SJohn Baldwin { 423*621b5090SJohn Baldwin char *cp, *token; 424*621b5090SJohn Baldwin int pcpu, start; 425*621b5090SJohn Baldwin 426*621b5090SJohn Baldwin CPU_ZERO(set); 427*621b5090SJohn Baldwin start = -1; 428*621b5090SJohn Baldwin token = __DECONST(char *, list); 429*621b5090SJohn Baldwin for (;;) { 430*621b5090SJohn Baldwin pcpu = strtoul(token, &cp, 0); 431*621b5090SJohn Baldwin if (cp == token) 432*621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 433*621b5090SJohn Baldwin if (pcpu < 0 || pcpu >= CPU_SETSIZE) 434*621b5090SJohn Baldwin errx(4, "hostcpu '%d' outside valid range from 0 to %d", 435*621b5090SJohn Baldwin pcpu, CPU_SETSIZE - 1); 436*621b5090SJohn Baldwin switch (*cp) { 437*621b5090SJohn Baldwin case ',': 438*621b5090SJohn Baldwin case '\0': 439*621b5090SJohn Baldwin if (start >= 0) { 440*621b5090SJohn Baldwin if (start > pcpu) 441*621b5090SJohn Baldwin errx(4, "Invalid hostcpu range %d-%d", 442*621b5090SJohn Baldwin start, pcpu); 443*621b5090SJohn Baldwin while (start < pcpu) { 444*621b5090SJohn Baldwin CPU_SET(start, vcpumap[vcpu]); 445*621b5090SJohn Baldwin start++; 446*621b5090SJohn Baldwin } 447*621b5090SJohn Baldwin start = -1; 4489b6155a2SNeel Natu } 4499b6155a2SNeel Natu CPU_SET(pcpu, vcpumap[vcpu]); 450*621b5090SJohn Baldwin break; 451*621b5090SJohn Baldwin case '-': 452*621b5090SJohn Baldwin if (start >= 0) 453*621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", 454*621b5090SJohn Baldwin vcpu, list); 455*621b5090SJohn Baldwin start = pcpu; 456*621b5090SJohn Baldwin break; 457*621b5090SJohn Baldwin default: 458*621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 459*621b5090SJohn Baldwin } 460*621b5090SJohn Baldwin if (*cp == '\0') 461*621b5090SJohn Baldwin break; 462*621b5090SJohn Baldwin token = cp + 1; 463*621b5090SJohn Baldwin } 464*621b5090SJohn Baldwin } 465*621b5090SJohn Baldwin 466*621b5090SJohn Baldwin static void 467*621b5090SJohn Baldwin build_vcpumaps(void) 468*621b5090SJohn Baldwin { 469*621b5090SJohn Baldwin char key[16]; 470*621b5090SJohn Baldwin const char *value; 471*621b5090SJohn Baldwin int vcpu; 472*621b5090SJohn Baldwin 473*621b5090SJohn Baldwin for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { 474*621b5090SJohn Baldwin snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 475*621b5090SJohn Baldwin value = get_config_value(key); 476*621b5090SJohn Baldwin if (value == NULL) 477*621b5090SJohn Baldwin continue; 478*621b5090SJohn Baldwin vcpumap[vcpu] = malloc(sizeof(cpuset_t)); 479*621b5090SJohn Baldwin if (vcpumap[vcpu] == NULL) 480*621b5090SJohn Baldwin err(4, "Failed to allocate cpuset for vcpu %d", vcpu); 481*621b5090SJohn Baldwin parse_cpuset(vcpu, value, vcpumap[vcpu]); 482*621b5090SJohn Baldwin } 4839b6155a2SNeel Natu } 4849b6155a2SNeel Natu 485d37f2adbSNeel Natu void 486d37f2adbSNeel Natu vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 487d37f2adbSNeel Natu int errcode) 488d37f2adbSNeel Natu { 489d37f2adbSNeel Natu struct vmctx *ctx; 490d087a399SNeel Natu int error, restart_instruction; 491d37f2adbSNeel Natu 492d37f2adbSNeel Natu ctx = arg; 493d087a399SNeel Natu restart_instruction = 1; 494d37f2adbSNeel Natu 495d087a399SNeel Natu error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 496d087a399SNeel Natu restart_instruction); 497d087a399SNeel Natu assert(error == 0); 498d37f2adbSNeel Natu } 499d37f2adbSNeel Natu 500e285ef8dSPeter Grehan void * 501b060ba50SNeel Natu paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 502e285ef8dSPeter Grehan { 503e285ef8dSPeter Grehan 504b060ba50SNeel Natu return (vm_map_gpa(ctx, gaddr, len)); 505e285ef8dSPeter Grehan } 506e285ef8dSPeter Grehan 507483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 508483d953aSJohn Baldwin uintptr_t 509483d953aSJohn Baldwin paddr_host2guest(struct vmctx *ctx, void *addr) 510483d953aSJohn Baldwin { 511483d953aSJohn Baldwin return (vm_rev_map_gpa(ctx, addr)); 512483d953aSJohn Baldwin } 513483d953aSJohn Baldwin #endif 514483d953aSJohn Baldwin 515e285ef8dSPeter Grehan int 516062b878fSPeter Grehan fbsdrun_virtio_msix(void) 517062b878fSPeter Grehan { 518062b878fSPeter Grehan 519*621b5090SJohn Baldwin return (get_config_bool_default("virtio_msix", true)); 520062b878fSPeter Grehan } 521062b878fSPeter Grehan 522e285ef8dSPeter Grehan static void * 523e285ef8dSPeter Grehan fbsdrun_start_thread(void *param) 524e285ef8dSPeter Grehan { 525e285ef8dSPeter Grehan char tname[MAXCOMLEN + 1]; 526e285ef8dSPeter Grehan struct mt_vmm_info *mtp; 527e285ef8dSPeter Grehan int vcpu; 528e285ef8dSPeter Grehan 529e285ef8dSPeter Grehan mtp = param; 530e285ef8dSPeter Grehan vcpu = mtp->mt_vcpu; 531e285ef8dSPeter Grehan 5327f5487acSPeter Grehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 533e285ef8dSPeter Grehan pthread_set_name_np(mtp->mt_thr, tname); 534e285ef8dSPeter Grehan 535483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 536483d953aSJohn Baldwin checkpoint_cpu_add(vcpu); 537483d953aSJohn Baldwin #endif 538cd377eb3SJohn Baldwin gdb_cpu_add(vcpu); 539cd377eb3SJohn Baldwin 540e285ef8dSPeter Grehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 541e285ef8dSPeter Grehan 542e285ef8dSPeter Grehan /* not reached */ 543e285ef8dSPeter Grehan exit(1); 544e285ef8dSPeter Grehan return (NULL); 545e285ef8dSPeter Grehan } 546e285ef8dSPeter Grehan 547e285ef8dSPeter Grehan void 5480826d045SNeel Natu fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 549e285ef8dSPeter Grehan { 550e285ef8dSPeter Grehan int error; 551e285ef8dSPeter Grehan 5520826d045SNeel Natu assert(fromcpu == BSP); 553e285ef8dSPeter Grehan 55495ebc360SNeel Natu /* 55595ebc360SNeel Natu * The 'newcpu' must be activated in the context of 'fromcpu'. If 55695ebc360SNeel Natu * vm_activate_cpu() is delayed until newcpu's pthread starts running 55795ebc360SNeel Natu * then vmm.ko is out-of-sync with bhyve and this can create a race 55895ebc360SNeel Natu * with vm_suspend(). 55995ebc360SNeel Natu */ 56095ebc360SNeel Natu error = vm_activate_cpu(ctx, newcpu); 56168dd37f7SEnji Cooper if (error != 0) 56268dd37f7SEnji Cooper err(EX_OSERR, "could not activate CPU %d", newcpu); 56395ebc360SNeel Natu 5640826d045SNeel Natu CPU_SET_ATOMIC(newcpu, &cpumask); 565e285ef8dSPeter Grehan 566e285ef8dSPeter Grehan /* 567e285ef8dSPeter Grehan * Set up the vmexit struct to allow execution to start 568e285ef8dSPeter Grehan * at the given RIP 569e285ef8dSPeter Grehan */ 5700826d045SNeel Natu vmexit[newcpu].rip = rip; 5710826d045SNeel Natu vmexit[newcpu].inst_length = 0; 572e285ef8dSPeter Grehan 5730826d045SNeel Natu mt_vmm_info[newcpu].mt_ctx = ctx; 5740826d045SNeel Natu mt_vmm_info[newcpu].mt_vcpu = newcpu; 575e285ef8dSPeter Grehan 5760826d045SNeel Natu error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 5770826d045SNeel Natu fbsdrun_start_thread, &mt_vmm_info[newcpu]); 578e285ef8dSPeter Grehan assert(error == 0); 579e285ef8dSPeter Grehan } 580e285ef8dSPeter Grehan 581e285ef8dSPeter Grehan static int 5821c052192SNeel Natu fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 5831c052192SNeel Natu { 5841c052192SNeel Natu 5850826d045SNeel Natu if (!CPU_ISSET(vcpu, &cpumask)) { 58606782425SNeel Natu fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 587989e062bSMarcelo Araujo exit(4); 5881c052192SNeel Natu } 5891c052192SNeel Natu 5900826d045SNeel Natu CPU_CLR_ATOMIC(vcpu, &cpumask); 5910826d045SNeel Natu return (CPU_EMPTY(&cpumask)); 5921c052192SNeel Natu } 5931c052192SNeel Natu 5941c052192SNeel Natu static int 595e285ef8dSPeter Grehan vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 596e285ef8dSPeter Grehan uint32_t eax) 597e285ef8dSPeter Grehan { 59894c3b3bfSPeter Grehan #if BHYVE_DEBUG 59994c3b3bfSPeter Grehan /* 60094c3b3bfSPeter Grehan * put guest-driven debug here 60194c3b3bfSPeter Grehan */ 602e285ef8dSPeter Grehan #endif 603e285ef8dSPeter Grehan return (VMEXIT_CONTINUE); 604e285ef8dSPeter Grehan } 605e285ef8dSPeter Grehan 606e285ef8dSPeter Grehan static int 607e285ef8dSPeter Grehan vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 608e285ef8dSPeter Grehan { 609e285ef8dSPeter Grehan int error; 6103ec1cff5SMarcelo Araujo int bytes, port, in, out; 611e285ef8dSPeter Grehan int vcpu; 612e285ef8dSPeter Grehan 613e285ef8dSPeter Grehan vcpu = *pvcpu; 614e285ef8dSPeter Grehan 615e285ef8dSPeter Grehan port = vme->u.inout.port; 616e285ef8dSPeter Grehan bytes = vme->u.inout.bytes; 617e285ef8dSPeter Grehan in = vme->u.inout.in; 618e285ef8dSPeter Grehan out = !in; 619e285ef8dSPeter Grehan 620e285ef8dSPeter Grehan /* Extra-special case of host notifications */ 621d17b5104SNeel Natu if (out && port == GUEST_NIO_PORT) { 622d17b5104SNeel Natu error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 623d17b5104SNeel Natu return (error); 624d17b5104SNeel Natu } 625e285ef8dSPeter Grehan 626*621b5090SJohn Baldwin error = emulate_inout(ctx, vcpu, vme); 627afd5e8baSNeel Natu if (error) { 6283b65fbe4STycho Nightingale fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 6293b65fbe4STycho Nightingale in ? "in" : "out", 6303b65fbe4STycho Nightingale bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 6313b65fbe4STycho Nightingale port, vmexit->rip); 632ee2dbd02SNeel Natu return (VMEXIT_ABORT); 633afd5e8baSNeel Natu } else { 634afd5e8baSNeel Natu return (VMEXIT_CONTINUE); 635e285ef8dSPeter Grehan } 636e285ef8dSPeter Grehan } 637e285ef8dSPeter Grehan 638e285ef8dSPeter Grehan static int 639e285ef8dSPeter Grehan vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 640e285ef8dSPeter Grehan { 641851d84f1SNeel Natu uint64_t val; 642851d84f1SNeel Natu uint32_t eax, edx; 643851d84f1SNeel Natu int error; 644851d84f1SNeel Natu 645851d84f1SNeel Natu val = 0; 646851d84f1SNeel Natu error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 647851d84f1SNeel Natu if (error != 0) { 648851d84f1SNeel Natu fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 649851d84f1SNeel Natu vme->u.msr.code, *pvcpu); 650*621b5090SJohn Baldwin if (get_config_bool("x86.strictmsr")) { 651d37f2adbSNeel Natu vm_inject_gp(ctx, *pvcpu); 652d087a399SNeel Natu return (VMEXIT_CONTINUE); 653dc506506SNeel Natu } 654e285ef8dSPeter Grehan } 655e285ef8dSPeter Grehan 656851d84f1SNeel Natu eax = val; 657851d84f1SNeel Natu error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 658851d84f1SNeel Natu assert(error == 0); 659851d84f1SNeel Natu 660851d84f1SNeel Natu edx = val >> 32; 661851d84f1SNeel Natu error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 662851d84f1SNeel Natu assert(error == 0); 663851d84f1SNeel Natu 664851d84f1SNeel Natu return (VMEXIT_CONTINUE); 665851d84f1SNeel Natu } 666851d84f1SNeel Natu 667e285ef8dSPeter Grehan static int 668e285ef8dSPeter Grehan vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 669e285ef8dSPeter Grehan { 670851d84f1SNeel Natu int error; 671e285ef8dSPeter Grehan 672851d84f1SNeel Natu error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 673851d84f1SNeel Natu if (error != 0) { 674851d84f1SNeel Natu fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 675851d84f1SNeel Natu vme->u.msr.code, vme->u.msr.wval, *pvcpu); 676*621b5090SJohn Baldwin if (get_config_bool("x86.strictmsr")) { 677d37f2adbSNeel Natu vm_inject_gp(ctx, *pvcpu); 678d087a399SNeel Natu return (VMEXIT_CONTINUE); 679dc506506SNeel Natu } 680851d84f1SNeel Natu } 681851d84f1SNeel Natu return (VMEXIT_CONTINUE); 682e285ef8dSPeter Grehan } 683e285ef8dSPeter Grehan 684e285ef8dSPeter Grehan static int 685e285ef8dSPeter Grehan vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 686e285ef8dSPeter Grehan { 687e285ef8dSPeter Grehan 6889c9eaf63SEnji Cooper (void)spinup_ap(ctx, *pvcpu, 689e285ef8dSPeter Grehan vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 690e285ef8dSPeter Grehan 6919c9eaf63SEnji Cooper return (VMEXIT_CONTINUE); 692e285ef8dSPeter Grehan } 693e285ef8dSPeter Grehan 69464fe7235SNeel Natu #define DEBUG_EPT_MISCONFIG 69564fe7235SNeel Natu #ifdef DEBUG_EPT_MISCONFIG 69664fe7235SNeel Natu #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 69764fe7235SNeel Natu 69864fe7235SNeel Natu static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 69964fe7235SNeel Natu static int ept_misconfig_ptenum; 70064fe7235SNeel Natu #endif 70164fe7235SNeel Natu 7028d56c805SYuri Pankov static const char * 7038d56c805SYuri Pankov vmexit_vmx_desc(uint32_t exit_reason) 7048d56c805SYuri Pankov { 7058d56c805SYuri Pankov 7068d56c805SYuri Pankov if (exit_reason >= nitems(vmx_exit_reason_desc) || 7078d56c805SYuri Pankov vmx_exit_reason_desc[exit_reason] == NULL) 7088d56c805SYuri Pankov return ("Unknown"); 7098d56c805SYuri Pankov return (vmx_exit_reason_desc[exit_reason]); 7108d56c805SYuri Pankov } 7118d56c805SYuri Pankov 712e285ef8dSPeter Grehan static int 713e285ef8dSPeter Grehan vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 714e285ef8dSPeter Grehan { 715e285ef8dSPeter Grehan 716e285ef8dSPeter Grehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 717e285ef8dSPeter Grehan fprintf(stderr, "\treason\t\tVMX\n"); 718e285ef8dSPeter Grehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 719e285ef8dSPeter Grehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 7200492757cSNeel Natu fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 7218d56c805SYuri Pankov fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, 7228d56c805SYuri Pankov vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); 723e285ef8dSPeter Grehan fprintf(stderr, "\tqualification\t0x%016lx\n", 724e285ef8dSPeter Grehan vmexit->u.vmx.exit_qualification); 7250492757cSNeel Natu fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 7260492757cSNeel Natu fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 72764fe7235SNeel Natu #ifdef DEBUG_EPT_MISCONFIG 72864fe7235SNeel Natu if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 72964fe7235SNeel Natu vm_get_register(ctx, *pvcpu, 73064fe7235SNeel Natu VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 73164fe7235SNeel Natu &ept_misconfig_gpa); 73264fe7235SNeel Natu vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 73364fe7235SNeel Natu &ept_misconfig_ptenum); 73464fe7235SNeel Natu fprintf(stderr, "\tEPT misconfiguration:\n"); 73564fe7235SNeel Natu fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 73664fe7235SNeel Natu fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 73764fe7235SNeel Natu ept_misconfig_ptenum, ept_misconfig_pte[0], 73864fe7235SNeel Natu ept_misconfig_pte[1], ept_misconfig_pte[2], 73964fe7235SNeel Natu ept_misconfig_pte[3]); 74064fe7235SNeel Natu } 74164fe7235SNeel Natu #endif /* DEBUG_EPT_MISCONFIG */ 742e285ef8dSPeter Grehan return (VMEXIT_ABORT); 743e285ef8dSPeter Grehan } 744e285ef8dSPeter Grehan 745e285ef8dSPeter Grehan static int 746bbadcde4SNeel Natu vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 747bbadcde4SNeel Natu { 748bbadcde4SNeel Natu 749bbadcde4SNeel Natu fprintf(stderr, "vm exit[%d]\n", *pvcpu); 750bbadcde4SNeel Natu fprintf(stderr, "\treason\t\tSVM\n"); 751bbadcde4SNeel Natu fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 752bbadcde4SNeel Natu fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 753bbadcde4SNeel Natu fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 754bbadcde4SNeel Natu fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 755bbadcde4SNeel Natu fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 756bbadcde4SNeel Natu return (VMEXIT_ABORT); 757bbadcde4SNeel Natu } 758bbadcde4SNeel Natu 759bbadcde4SNeel Natu static int 760e285ef8dSPeter Grehan vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 761e285ef8dSPeter Grehan { 76294c3b3bfSPeter Grehan 763d087a399SNeel Natu assert(vmexit->inst_length == 0); 764d087a399SNeel Natu 765e285ef8dSPeter Grehan stats.vmexit_bogus++; 766e285ef8dSPeter Grehan 767d087a399SNeel Natu return (VMEXIT_CONTINUE); 768e285ef8dSPeter Grehan } 769e285ef8dSPeter Grehan 770e285ef8dSPeter Grehan static int 771248e6799SNeel Natu vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 772248e6799SNeel Natu { 773248e6799SNeel Natu 774248e6799SNeel Natu assert(vmexit->inst_length == 0); 775248e6799SNeel Natu 776248e6799SNeel Natu stats.vmexit_reqidle++; 777248e6799SNeel Natu 778248e6799SNeel Natu return (VMEXIT_CONTINUE); 779248e6799SNeel Natu } 780248e6799SNeel Natu 781248e6799SNeel Natu static int 782e285ef8dSPeter Grehan vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 783e285ef8dSPeter Grehan { 78494c3b3bfSPeter Grehan 785e285ef8dSPeter Grehan stats.vmexit_hlt++; 78694c3b3bfSPeter Grehan 787e285ef8dSPeter Grehan /* 788e285ef8dSPeter Grehan * Just continue execution with the next instruction. We use 789e285ef8dSPeter Grehan * the HLT VM exit as a way to be friendly with the host 790e285ef8dSPeter Grehan * scheduler. 791e285ef8dSPeter Grehan */ 792e285ef8dSPeter Grehan return (VMEXIT_CONTINUE); 793e285ef8dSPeter Grehan } 794e285ef8dSPeter Grehan 795e285ef8dSPeter Grehan static int 796e285ef8dSPeter Grehan vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 797e285ef8dSPeter Grehan { 79894c3b3bfSPeter Grehan 799e285ef8dSPeter Grehan stats.vmexit_pause++; 800e285ef8dSPeter Grehan 801e285ef8dSPeter Grehan return (VMEXIT_CONTINUE); 802e285ef8dSPeter Grehan } 803e285ef8dSPeter Grehan 804e285ef8dSPeter Grehan static int 805e285ef8dSPeter Grehan vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 806e285ef8dSPeter Grehan { 80794c3b3bfSPeter Grehan 808d087a399SNeel Natu assert(vmexit->inst_length == 0); 809d087a399SNeel Natu 810e285ef8dSPeter Grehan stats.vmexit_mtrap++; 811e285ef8dSPeter Grehan 812483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 813483d953aSJohn Baldwin checkpoint_cpu_suspend(*pvcpu); 814483d953aSJohn Baldwin #endif 815cd377eb3SJohn Baldwin gdb_cpu_mtrap(*pvcpu); 816483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 817483d953aSJohn Baldwin checkpoint_cpu_resume(*pvcpu); 818483d953aSJohn Baldwin #endif 819483d953aSJohn Baldwin 820d087a399SNeel Natu return (VMEXIT_CONTINUE); 821e285ef8dSPeter Grehan } 822e285ef8dSPeter Grehan 823e285ef8dSPeter Grehan static int 824318224bbSNeel Natu vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 825e285ef8dSPeter Grehan { 8264daa95f8SConrad Meyer int err, i, cs_d; 827703e4974STycho Nightingale struct vie *vie; 8284daa95f8SConrad Meyer enum vm_cpu_mode mode; 829703e4974STycho Nightingale 830318224bbSNeel Natu stats.vmexit_inst_emul++; 831e285ef8dSPeter Grehan 832703e4974STycho Nightingale vie = &vmexit->u.inst_emul.vie; 8334daa95f8SConrad Meyer if (!vie->decoded) { 8344daa95f8SConrad Meyer /* 8354daa95f8SConrad Meyer * Attempt to decode in userspace as a fallback. This allows 8364daa95f8SConrad Meyer * updating instruction decode in bhyve without rebooting the 8374daa95f8SConrad Meyer * kernel (rapid prototyping), albeit with much slower 8384daa95f8SConrad Meyer * emulation. 8394daa95f8SConrad Meyer */ 8404daa95f8SConrad Meyer vie_restart(vie); 8414daa95f8SConrad Meyer mode = vmexit->u.inst_emul.paging.cpu_mode; 8424daa95f8SConrad Meyer cs_d = vmexit->u.inst_emul.cs_d; 843887d46efSPeter Grehan if (vmm_decode_instruction(mode, cs_d, vie) != 0) 844887d46efSPeter Grehan goto fail; 845887d46efSPeter Grehan if (vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RIP, 846887d46efSPeter Grehan vmexit->rip + vie->num_processed) != 0) 847887d46efSPeter Grehan goto fail; 8484daa95f8SConrad Meyer } 8494daa95f8SConrad Meyer 850318224bbSNeel Natu err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 851703e4974STycho Nightingale vie, &vmexit->u.inst_emul.paging); 852e285ef8dSPeter Grehan 853e285ef8dSPeter Grehan if (err) { 854703e4974STycho Nightingale if (err == ESRCH) { 85552c39ee6SConrad Meyer EPRINTLN("Unhandled memory access to 0x%lx\n", 856318224bbSNeel Natu vmexit->u.inst_emul.gpa); 857e285ef8dSPeter Grehan } 858887d46efSPeter Grehan goto fail; 859887d46efSPeter Grehan } 860e285ef8dSPeter Grehan 861887d46efSPeter Grehan return (VMEXIT_CONTINUE); 862887d46efSPeter Grehan 863887d46efSPeter Grehan fail: 86452c39ee6SConrad Meyer fprintf(stderr, "Failed to emulate instruction sequence [ "); 86552c39ee6SConrad Meyer for (i = 0; i < vie->num_valid; i++) 86652c39ee6SConrad Meyer fprintf(stderr, "%02x", vie->inst[i]); 86752c39ee6SConrad Meyer FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip); 868e285ef8dSPeter Grehan return (VMEXIT_ABORT); 869e285ef8dSPeter Grehan } 870e285ef8dSPeter Grehan 871b15a09c0SNeel Natu static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 872b15a09c0SNeel Natu static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 873b15a09c0SNeel Natu 874b15a09c0SNeel Natu static int 875b15a09c0SNeel Natu vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 876b15a09c0SNeel Natu { 877f0fdcfe2SNeel Natu enum vm_suspend_how how; 878b15a09c0SNeel Natu 879f0fdcfe2SNeel Natu how = vmexit->u.suspended.how; 880b15a09c0SNeel Natu 881b15a09c0SNeel Natu fbsdrun_deletecpu(ctx, *pvcpu); 882b15a09c0SNeel Natu 883f0fdcfe2SNeel Natu if (*pvcpu != BSP) { 884b15a09c0SNeel Natu pthread_mutex_lock(&resetcpu_mtx); 885b15a09c0SNeel Natu pthread_cond_signal(&resetcpu_cond); 886b15a09c0SNeel Natu pthread_mutex_unlock(&resetcpu_mtx); 887b15a09c0SNeel Natu pthread_exit(NULL); 888b15a09c0SNeel Natu } 889b15a09c0SNeel Natu 890b15a09c0SNeel Natu pthread_mutex_lock(&resetcpu_mtx); 891b15a09c0SNeel Natu while (!CPU_EMPTY(&cpumask)) { 892b15a09c0SNeel Natu pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 893b15a09c0SNeel Natu } 894b15a09c0SNeel Natu pthread_mutex_unlock(&resetcpu_mtx); 895f0fdcfe2SNeel Natu 896e50ce2aaSNeel Natu switch (how) { 897e50ce2aaSNeel Natu case VM_SUSPEND_RESET: 898b15a09c0SNeel Natu exit(0); 899e50ce2aaSNeel Natu case VM_SUSPEND_POWEROFF: 900*621b5090SJohn Baldwin if (get_config_bool_default("destroy_on_poweroff", false)) 9010a1016f9SPawel Biernacki vm_destroy(ctx); 902f0fdcfe2SNeel Natu exit(1); 903e50ce2aaSNeel Natu case VM_SUSPEND_HALT: 904e50ce2aaSNeel Natu exit(2); 905091d4532SNeel Natu case VM_SUSPEND_TRIPLEFAULT: 906091d4532SNeel Natu exit(3); 907e50ce2aaSNeel Natu default: 908e50ce2aaSNeel Natu fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 909e50ce2aaSNeel Natu exit(100); 910e50ce2aaSNeel Natu } 911f0fdcfe2SNeel Natu return (0); /* NOTREACHED */ 912b15a09c0SNeel Natu } 913b15a09c0SNeel Natu 914cd377eb3SJohn Baldwin static int 915cd377eb3SJohn Baldwin vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 916cd377eb3SJohn Baldwin { 917cd377eb3SJohn Baldwin 918483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 919483d953aSJohn Baldwin checkpoint_cpu_suspend(*pvcpu); 920483d953aSJohn Baldwin #endif 921cd377eb3SJohn Baldwin gdb_cpu_suspend(*pvcpu); 922483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 923483d953aSJohn Baldwin checkpoint_cpu_resume(*pvcpu); 924483d953aSJohn Baldwin #endif 925cd377eb3SJohn Baldwin return (VMEXIT_CONTINUE); 926cd377eb3SJohn Baldwin } 927cd377eb3SJohn Baldwin 928cbd03a9dSJohn Baldwin static int 929cbd03a9dSJohn Baldwin vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 930cbd03a9dSJohn Baldwin { 931cbd03a9dSJohn Baldwin 932cbd03a9dSJohn Baldwin gdb_cpu_breakpoint(*pvcpu, vmexit); 933cbd03a9dSJohn Baldwin return (VMEXIT_CONTINUE); 934cbd03a9dSJohn Baldwin } 935cbd03a9dSJohn Baldwin 936e285ef8dSPeter Grehan static vmexit_handler_t handler[VM_EXITCODE_MAX] = { 937e285ef8dSPeter Grehan [VM_EXITCODE_INOUT] = vmexit_inout, 938d17b5104SNeel Natu [VM_EXITCODE_INOUT_STR] = vmexit_inout, 939e285ef8dSPeter Grehan [VM_EXITCODE_VMX] = vmexit_vmx, 940bbadcde4SNeel Natu [VM_EXITCODE_SVM] = vmexit_svm, 941e285ef8dSPeter Grehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 942248e6799SNeel Natu [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 943e285ef8dSPeter Grehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 944e285ef8dSPeter Grehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 945e285ef8dSPeter Grehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 946318224bbSNeel Natu [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 947e285ef8dSPeter Grehan [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 9483d5444c8SNeel Natu [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 9493d5444c8SNeel Natu [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 950cd377eb3SJohn Baldwin [VM_EXITCODE_DEBUG] = vmexit_debug, 951cbd03a9dSJohn Baldwin [VM_EXITCODE_BPT] = vmexit_breakpoint, 952e285ef8dSPeter Grehan }; 953e285ef8dSPeter Grehan 954e285ef8dSPeter Grehan static void 955d087a399SNeel Natu vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 956e285ef8dSPeter Grehan { 957cc398e21SBjoern A. Zeeb int error, rc; 9588b271170SPeter Grehan enum vm_exitcode exitcode; 95995ebc360SNeel Natu cpuset_t active_cpus; 960e285ef8dSPeter Grehan 9619b6155a2SNeel Natu if (vcpumap[vcpu] != NULL) { 962485b3300SNeel Natu error = pthread_setaffinity_np(pthread_self(), 9639b6155a2SNeel Natu sizeof(cpuset_t), vcpumap[vcpu]); 964e285ef8dSPeter Grehan assert(error == 0); 965e285ef8dSPeter Grehan } 966e285ef8dSPeter Grehan 96795ebc360SNeel Natu error = vm_active_cpus(ctx, &active_cpus); 96895ebc360SNeel Natu assert(CPU_ISSET(vcpu, &active_cpus)); 96995ebc360SNeel Natu 970d087a399SNeel Natu error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 971d087a399SNeel Natu assert(error == 0); 972d087a399SNeel Natu 973e285ef8dSPeter Grehan while (1) { 974d087a399SNeel Natu error = vm_run(ctx, vcpu, &vmexit[vcpu]); 975f80330a8SNeel Natu if (error != 0) 976e285ef8dSPeter Grehan break; 977e285ef8dSPeter Grehan 9788b271170SPeter Grehan exitcode = vmexit[vcpu].exitcode; 9798b271170SPeter Grehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 9808b271170SPeter Grehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 9818b271170SPeter Grehan exitcode); 982989e062bSMarcelo Araujo exit(4); 9838b271170SPeter Grehan } 9848b271170SPeter Grehan 9858b271170SPeter Grehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 9868b271170SPeter Grehan 987e285ef8dSPeter Grehan switch (rc) { 988e285ef8dSPeter Grehan case VMEXIT_CONTINUE: 989e285ef8dSPeter Grehan break; 990ee2dbd02SNeel Natu case VMEXIT_ABORT: 991ee2dbd02SNeel Natu abort(); 992e285ef8dSPeter Grehan default: 993989e062bSMarcelo Araujo exit(4); 994e285ef8dSPeter Grehan } 995e285ef8dSPeter Grehan } 996e285ef8dSPeter Grehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 997e285ef8dSPeter Grehan } 998e285ef8dSPeter Grehan 9995f0677d3SNeel Natu static int 10005f0677d3SNeel Natu num_vcpus_allowed(struct vmctx *ctx) 10015f0677d3SNeel Natu { 10025f0677d3SNeel Natu int tmp, error; 10035f0677d3SNeel Natu 10045f0677d3SNeel Natu error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 10055f0677d3SNeel Natu 10065f0677d3SNeel Natu /* 10075f0677d3SNeel Natu * The guest is allowed to spinup more than one processor only if the 10085f0677d3SNeel Natu * UNRESTRICTED_GUEST capability is available. 10095f0677d3SNeel Natu */ 10105f0677d3SNeel Natu if (error == 0) 10115f0677d3SNeel Natu return (VM_MAXCPU); 10125f0677d3SNeel Natu else 10135f0677d3SNeel Natu return (1); 10145f0677d3SNeel Natu } 1015e285ef8dSPeter Grehan 101649cc03daSNeel Natu void 101749cc03daSNeel Natu fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 101849cc03daSNeel Natu { 101949cc03daSNeel Natu int err, tmp; 102049cc03daSNeel Natu 1021*621b5090SJohn Baldwin if (get_config_bool_default("x86.vmexit_on_hlt", false)) { 102249cc03daSNeel Natu err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 102349cc03daSNeel Natu if (err < 0) { 102449cc03daSNeel Natu fprintf(stderr, "VM exit on HLT not supported\n"); 1025989e062bSMarcelo Araujo exit(4); 102649cc03daSNeel Natu } 102749cc03daSNeel Natu vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 102849cc03daSNeel Natu if (cpu == BSP) 102949cc03daSNeel Natu handler[VM_EXITCODE_HLT] = vmexit_hlt; 103049cc03daSNeel Natu } 103149cc03daSNeel Natu 1032*621b5090SJohn Baldwin if (get_config_bool_default("x86.vmexit_on_pause", false)) { 103349cc03daSNeel Natu /* 103449cc03daSNeel Natu * pause exit support required for this mode 103549cc03daSNeel Natu */ 103649cc03daSNeel Natu err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 103749cc03daSNeel Natu if (err < 0) { 103849cc03daSNeel Natu fprintf(stderr, 103949cc03daSNeel Natu "SMP mux requested, no pause support\n"); 1040989e062bSMarcelo Araujo exit(4); 104149cc03daSNeel Natu } 104249cc03daSNeel Natu vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 104349cc03daSNeel Natu if (cpu == BSP) 104449cc03daSNeel Natu handler[VM_EXITCODE_PAUSE] = vmexit_pause; 104549cc03daSNeel Natu } 104649cc03daSNeel Natu 1047*621b5090SJohn Baldwin if (get_config_bool_default("x86.x2apic", false)) 104849cc03daSNeel Natu err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 104952e5c8a2SNeel Natu else 105052e5c8a2SNeel Natu err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 105149cc03daSNeel Natu 105249cc03daSNeel Natu if (err) { 105349cc03daSNeel Natu fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 1054989e062bSMarcelo Araujo exit(4); 105549cc03daSNeel Natu } 105649cc03daSNeel Natu 105749cc03daSNeel Natu vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 105849cc03daSNeel Natu } 105949cc03daSNeel Natu 10609b1aa8d6SNeel Natu static struct vmctx * 10619b1aa8d6SNeel Natu do_open(const char *vmname) 10629b1aa8d6SNeel Natu { 10639b1aa8d6SNeel Natu struct vmctx *ctx; 10649b1aa8d6SNeel Natu int error; 10659b1aa8d6SNeel Natu bool reinit, romboot; 106600ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 106700ef17beSBartek Rutkowski cap_rights_t rights; 106800ef17beSBartek Rutkowski const cap_ioctl_t *cmds; 106900ef17beSBartek Rutkowski size_t ncmds; 107000ef17beSBartek Rutkowski #endif 10719b1aa8d6SNeel Natu 10729b1aa8d6SNeel Natu reinit = romboot = false; 10739b1aa8d6SNeel Natu 10749b1aa8d6SNeel Natu if (lpc_bootrom()) 10759b1aa8d6SNeel Natu romboot = true; 10769b1aa8d6SNeel Natu 10779b1aa8d6SNeel Natu error = vm_create(vmname); 10789b1aa8d6SNeel Natu if (error) { 10799b1aa8d6SNeel Natu if (errno == EEXIST) { 10809b1aa8d6SNeel Natu if (romboot) { 10819b1aa8d6SNeel Natu reinit = true; 10829b1aa8d6SNeel Natu } else { 10839b1aa8d6SNeel Natu /* 10849b1aa8d6SNeel Natu * The virtual machine has been setup by the 10859b1aa8d6SNeel Natu * userspace bootloader. 10869b1aa8d6SNeel Natu */ 10879b1aa8d6SNeel Natu } 10889b1aa8d6SNeel Natu } else { 10899b1aa8d6SNeel Natu perror("vm_create"); 1090989e062bSMarcelo Araujo exit(4); 10919b1aa8d6SNeel Natu } 10929b1aa8d6SNeel Natu } else { 10939b1aa8d6SNeel Natu if (!romboot) { 10949b1aa8d6SNeel Natu /* 10959b1aa8d6SNeel Natu * If the virtual machine was just created then a 10969b1aa8d6SNeel Natu * bootrom must be configured to boot it. 10979b1aa8d6SNeel Natu */ 10989b1aa8d6SNeel Natu fprintf(stderr, "virtual machine cannot be booted\n"); 1099989e062bSMarcelo Araujo exit(4); 11009b1aa8d6SNeel Natu } 11019b1aa8d6SNeel Natu } 11029b1aa8d6SNeel Natu 11039b1aa8d6SNeel Natu ctx = vm_open(vmname); 11049b1aa8d6SNeel Natu if (ctx == NULL) { 11059b1aa8d6SNeel Natu perror("vm_open"); 1106989e062bSMarcelo Araujo exit(4); 11079b1aa8d6SNeel Natu } 11089b1aa8d6SNeel Natu 110900ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 111000ef17beSBartek Rutkowski cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 1111abfa3c39SMarcelo Araujo if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) 111200ef17beSBartek Rutkowski errx(EX_OSERR, "Unable to apply rights for sandbox"); 111300ef17beSBartek Rutkowski vm_get_ioctls(&ncmds); 111400ef17beSBartek Rutkowski cmds = vm_get_ioctls(NULL); 111500ef17beSBartek Rutkowski if (cmds == NULL) 111600ef17beSBartek Rutkowski errx(EX_OSERR, "out of memory"); 1117abfa3c39SMarcelo Araujo if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) 111800ef17beSBartek Rutkowski errx(EX_OSERR, "Unable to apply rights for sandbox"); 111900ef17beSBartek Rutkowski free((cap_ioctl_t *)cmds); 112000ef17beSBartek Rutkowski #endif 112100ef17beSBartek Rutkowski 11229b1aa8d6SNeel Natu if (reinit) { 11239b1aa8d6SNeel Natu error = vm_reinit(ctx); 11249b1aa8d6SNeel Natu if (error) { 11259b1aa8d6SNeel Natu perror("vm_reinit"); 1126989e062bSMarcelo Araujo exit(4); 11279b1aa8d6SNeel Natu } 11289b1aa8d6SNeel Natu } 112901d822d3SRodney W. Grimes error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); 113001d822d3SRodney W. Grimes if (error) 113101d822d3SRodney W. Grimes errx(EX_OSERR, "vm_set_topology"); 11329b1aa8d6SNeel Natu return (ctx); 11339b1aa8d6SNeel Natu } 11349b1aa8d6SNeel Natu 1135483d953aSJohn Baldwin void 1136483d953aSJohn Baldwin spinup_vcpu(struct vmctx *ctx, int vcpu) 1137483d953aSJohn Baldwin { 1138483d953aSJohn Baldwin int error; 1139483d953aSJohn Baldwin uint64_t rip; 1140483d953aSJohn Baldwin 1141483d953aSJohn Baldwin error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); 1142483d953aSJohn Baldwin assert(error == 0); 1143483d953aSJohn Baldwin 1144483d953aSJohn Baldwin fbsdrun_set_capabilities(ctx, vcpu); 1145483d953aSJohn Baldwin error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); 1146483d953aSJohn Baldwin assert(error == 0); 1147483d953aSJohn Baldwin 1148483d953aSJohn Baldwin fbsdrun_addcpu(ctx, BSP, vcpu, rip); 1149483d953aSJohn Baldwin } 1150483d953aSJohn Baldwin 1151*621b5090SJohn Baldwin static bool 1152*621b5090SJohn Baldwin parse_config_option(const char *option) 1153*621b5090SJohn Baldwin { 1154*621b5090SJohn Baldwin const char *value; 1155*621b5090SJohn Baldwin char *path; 1156*621b5090SJohn Baldwin 1157*621b5090SJohn Baldwin value = strchr(option, '='); 1158*621b5090SJohn Baldwin if (value == NULL || value[1] == '\0') 1159*621b5090SJohn Baldwin return (false); 1160*621b5090SJohn Baldwin path = strndup(option, value - option); 1161*621b5090SJohn Baldwin if (path == NULL) 1162*621b5090SJohn Baldwin err(4, "Failed to allocate memory"); 1163*621b5090SJohn Baldwin set_config_value(path, value + 1); 1164*621b5090SJohn Baldwin return (true); 1165*621b5090SJohn Baldwin } 1166*621b5090SJohn Baldwin 1167*621b5090SJohn Baldwin static void 1168*621b5090SJohn Baldwin parse_simple_config_file(const char *path) 1169*621b5090SJohn Baldwin { 1170*621b5090SJohn Baldwin FILE *fp; 1171*621b5090SJohn Baldwin char *line, *cp; 1172*621b5090SJohn Baldwin size_t linecap; 1173*621b5090SJohn Baldwin unsigned int lineno; 1174*621b5090SJohn Baldwin 1175*621b5090SJohn Baldwin fp = fopen(path, "r"); 1176*621b5090SJohn Baldwin if (fp == NULL) 1177*621b5090SJohn Baldwin err(4, "Failed to open configuration file %s", path); 1178*621b5090SJohn Baldwin line = NULL; 1179*621b5090SJohn Baldwin linecap = 0; 1180*621b5090SJohn Baldwin lineno = 1; 1181*621b5090SJohn Baldwin for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) { 1182*621b5090SJohn Baldwin if (*line == '#' || *line == '\n') 1183*621b5090SJohn Baldwin continue; 1184*621b5090SJohn Baldwin cp = strchr(line, '\n'); 1185*621b5090SJohn Baldwin if (cp != NULL) 1186*621b5090SJohn Baldwin *cp = '\0'; 1187*621b5090SJohn Baldwin if (!parse_config_option(line)) 1188*621b5090SJohn Baldwin errx(4, "%s line %u: invalid config option '%s'", path, 1189*621b5090SJohn Baldwin lineno, line); 1190*621b5090SJohn Baldwin } 1191*621b5090SJohn Baldwin free(line); 1192*621b5090SJohn Baldwin fclose(fp); 1193*621b5090SJohn Baldwin } 1194*621b5090SJohn Baldwin 1195*621b5090SJohn Baldwin static void 1196*621b5090SJohn Baldwin set_defaults(void) 1197*621b5090SJohn Baldwin { 1198*621b5090SJohn Baldwin 1199*621b5090SJohn Baldwin set_config_bool("acpi_tables", false); 1200*621b5090SJohn Baldwin set_config_value("memory.size", "256M"); 1201*621b5090SJohn Baldwin set_config_bool("x86.strictmsr", true); 1202*621b5090SJohn Baldwin } 1203*621b5090SJohn Baldwin 1204e285ef8dSPeter Grehan int 1205e285ef8dSPeter Grehan main(int argc, char *argv[]) 1206e285ef8dSPeter Grehan { 1207c4df8cbfSRobert Wing int c, error, err; 1208*621b5090SJohn Baldwin int max_vcpus, memflags; 1209e285ef8dSPeter Grehan struct vmctx *ctx; 1210e285ef8dSPeter Grehan uint64_t rip; 1211b060ba50SNeel Natu size_t memsize; 1212*621b5090SJohn Baldwin const char *value, *vmname; 12139b1aa8d6SNeel Natu char *optstr; 1214483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1215483d953aSJohn Baldwin char *restore_file; 1216483d953aSJohn Baldwin struct restore_state rstate; 1217483d953aSJohn Baldwin int vcpu; 1218483d953aSJohn Baldwin 1219483d953aSJohn Baldwin restore_file = NULL; 1220483d953aSJohn Baldwin #endif 1221e285ef8dSPeter Grehan 1222*621b5090SJohn Baldwin init_config(); 1223*621b5090SJohn Baldwin set_defaults(); 1224e285ef8dSPeter Grehan progname = basename(argv[0]); 1225e285ef8dSPeter Grehan 1226483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1227*621b5090SJohn Baldwin optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:r:"; 1228483d953aSJohn Baldwin #else 1229*621b5090SJohn Baldwin optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:"; 1230483d953aSJohn Baldwin #endif 12319b1aa8d6SNeel Natu while ((c = getopt(argc, argv, optstr)) != -1) { 1232e285ef8dSPeter Grehan switch (c) { 1233e285ef8dSPeter Grehan case 'a': 1234*621b5090SJohn Baldwin set_config_bool("x86.x2apic", false); 1235e285ef8dSPeter Grehan break; 1236e285ef8dSPeter Grehan case 'A': 1237*621b5090SJohn Baldwin set_config_bool("acpi_tables", true); 1238e285ef8dSPeter Grehan break; 12390a1016f9SPawel Biernacki case 'D': 1240*621b5090SJohn Baldwin set_config_bool("destroy_on_poweroff", true); 12410a1016f9SPawel Biernacki break; 1242e285ef8dSPeter Grehan case 'p': 12439b6155a2SNeel Natu if (pincpu_parse(optarg) != 0) { 12449b6155a2SNeel Natu errx(EX_USAGE, "invalid vcpu pinning " 12459b6155a2SNeel Natu "configuration '%s'", optarg); 12469b6155a2SNeel Natu } 1247e285ef8dSPeter Grehan break; 1248e285ef8dSPeter Grehan case 'c': 124901d822d3SRodney W. Grimes if (topology_parse(optarg) != 0) { 125001d822d3SRodney W. Grimes errx(EX_USAGE, "invalid cpu topology " 125101d822d3SRodney W. Grimes "'%s'", optarg); 125201d822d3SRodney W. Grimes } 1253e285ef8dSPeter Grehan break; 12540dd10c00SNeel Natu case 'C': 1255*621b5090SJohn Baldwin set_config_bool("memory.guest_in_core", true); 12560dd10c00SNeel Natu break; 1257cd377eb3SJohn Baldwin case 'G': 1258cd377eb3SJohn Baldwin if (optarg[0] == 'w') { 1259*621b5090SJohn Baldwin set_config_bool("gdb.wait", true); 1260cd377eb3SJohn Baldwin optarg++; 1261cd377eb3SJohn Baldwin } 1262*621b5090SJohn Baldwin set_config_value("gdb.port", optarg); 1263*621b5090SJohn Baldwin break; 1264*621b5090SJohn Baldwin case 'k': 1265*621b5090SJohn Baldwin parse_simple_config_file(optarg); 1266e285ef8dSPeter Grehan break; 1267ea7f1c8cSNeel Natu case 'l': 1268657d2158SMarcelo Araujo if (strncmp(optarg, "help", strlen(optarg)) == 0) { 1269657d2158SMarcelo Araujo lpc_print_supported_devices(); 1270657d2158SMarcelo Araujo exit(0); 1271657d2158SMarcelo Araujo } else if (lpc_device_parse(optarg) != 0) { 1272ea7f1c8cSNeel Natu errx(EX_USAGE, "invalid lpc device " 1273ea7f1c8cSNeel Natu "configuration '%s'", optarg); 1274ea7f1c8cSNeel Natu } 1275ea7f1c8cSNeel Natu break; 1276483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1277483d953aSJohn Baldwin case 'r': 1278483d953aSJohn Baldwin restore_file = optarg; 1279483d953aSJohn Baldwin break; 1280483d953aSJohn Baldwin #endif 1281e285ef8dSPeter Grehan case 's': 1282657d2158SMarcelo Araujo if (strncmp(optarg, "help", strlen(optarg)) == 0) { 1283657d2158SMarcelo Araujo pci_print_supported_devices(); 1284657d2158SMarcelo Araujo exit(0); 1285657d2158SMarcelo Araujo } else if (pci_parse_slot(optarg) != 0) 1286989e062bSMarcelo Araujo exit(4); 1287b05c77ffSNeel Natu else 1288e285ef8dSPeter Grehan break; 12899b1aa8d6SNeel Natu case 'S': 1290*621b5090SJohn Baldwin set_config_bool("memory.wired", true); 12919b1aa8d6SNeel Natu break; 1292e285ef8dSPeter Grehan case 'm': 1293*621b5090SJohn Baldwin set_config_value("memory.size", optarg); 1294*621b5090SJohn Baldwin break; 1295*621b5090SJohn Baldwin case 'o': 1296*621b5090SJohn Baldwin if (!parse_config_option(optarg)) 1297*621b5090SJohn Baldwin errx(EX_USAGE, "invalid configuration option '%s'", optarg); 1298e285ef8dSPeter Grehan break; 1299e285ef8dSPeter Grehan case 'H': 1300*621b5090SJohn Baldwin set_config_bool("x86.vmexit_on_hlt", true); 1301e285ef8dSPeter Grehan break; 1302e285ef8dSPeter Grehan case 'I': 1303a1a4cbeaSNeel Natu /* 1304a1a4cbeaSNeel Natu * The "-I" option was used to add an ioapic to the 1305a1a4cbeaSNeel Natu * virtual machine. 1306a1a4cbeaSNeel Natu * 1307a1a4cbeaSNeel Natu * An ioapic is now provided unconditionally for each 1308a1a4cbeaSNeel Natu * virtual machine and this option is now deprecated. 1309a1a4cbeaSNeel Natu */ 1310e285ef8dSPeter Grehan break; 1311e285ef8dSPeter Grehan case 'P': 1312*621b5090SJohn Baldwin set_config_bool("x86.vmexit_on_pause", true); 1313e285ef8dSPeter Grehan break; 1314e285ef8dSPeter Grehan case 'e': 1315*621b5090SJohn Baldwin set_config_bool("x86.strictio", true); 1316e285ef8dSPeter Grehan break; 1317c9747678SNeel Natu case 'u': 1318*621b5090SJohn Baldwin set_config_bool("rtc.use_localtime", false); 1319c9747678SNeel Natu break; 1320af5bfc53STycho Nightingale case 'U': 1321*621b5090SJohn Baldwin set_config_value("uuid", optarg); 1322af5bfc53STycho Nightingale break; 1323851d84f1SNeel Natu case 'w': 1324*621b5090SJohn Baldwin set_config_bool("x86.strictmsr", false); 1325851d84f1SNeel Natu break; 1326062b878fSPeter Grehan case 'W': 1327*621b5090SJohn Baldwin set_config_bool("virtio_msix", false); 1328062b878fSPeter Grehan break; 132952e5c8a2SNeel Natu case 'x': 1330*621b5090SJohn Baldwin set_config_bool("x86.x2apic", true); 133152e5c8a2SNeel Natu break; 1332b100acf2SNeel Natu case 'Y': 1333*621b5090SJohn Baldwin set_config_bool("x86.mptable", false); 1334b100acf2SNeel Natu break; 1335e285ef8dSPeter Grehan case 'h': 1336e285ef8dSPeter Grehan usage(0); 1337e285ef8dSPeter Grehan default: 1338e285ef8dSPeter Grehan usage(1); 1339e285ef8dSPeter Grehan } 1340e285ef8dSPeter Grehan } 1341e285ef8dSPeter Grehan argc -= optind; 1342e285ef8dSPeter Grehan argv += optind; 1343e285ef8dSPeter Grehan 1344*621b5090SJohn Baldwin if (argc > 1) 1345483d953aSJohn Baldwin usage(1); 1346483d953aSJohn Baldwin 1347*621b5090SJohn Baldwin #ifdef BHYVE_SNAPSHOT 1348483d953aSJohn Baldwin if (restore_file != NULL) { 1349483d953aSJohn Baldwin error = load_restore_file(restore_file, &rstate); 1350483d953aSJohn Baldwin if (error) { 1351483d953aSJohn Baldwin fprintf(stderr, "Failed to read checkpoint info from " 1352483d953aSJohn Baldwin "file: '%s'.\n", restore_file); 1353483d953aSJohn Baldwin exit(1); 1354483d953aSJohn Baldwin } 1355483d953aSJohn Baldwin vmname = lookup_vmname(&rstate); 1356*621b5090SJohn Baldwin if (vmname != NULL) 1357*621b5090SJohn Baldwin set_config_value("name", vmname); 1358483d953aSJohn Baldwin } 1359*621b5090SJohn Baldwin #endif 1360*621b5090SJohn Baldwin 1361*621b5090SJohn Baldwin if (argc == 1) 1362*621b5090SJohn Baldwin set_config_value("name", argv[0]); 1363*621b5090SJohn Baldwin 1364*621b5090SJohn Baldwin vmname = get_config_value("name"); 1365*621b5090SJohn Baldwin if (vmname == NULL) 1366e285ef8dSPeter Grehan usage(1); 1367e285ef8dSPeter Grehan 1368*621b5090SJohn Baldwin if (get_config_bool_default("config.dump", false)) { 1369*621b5090SJohn Baldwin dump_config(); 1370*621b5090SJohn Baldwin exit(1); 1371*621b5090SJohn Baldwin } 1372*621b5090SJohn Baldwin 1373*621b5090SJohn Baldwin calc_topolopgy(); 1374*621b5090SJohn Baldwin build_vcpumaps(); 1375*621b5090SJohn Baldwin 1376*621b5090SJohn Baldwin value = get_config_value("memory.size"); 1377*621b5090SJohn Baldwin error = vm_parse_memsize(value, &memsize); 1378*621b5090SJohn Baldwin if (error) 1379*621b5090SJohn Baldwin errx(EX_USAGE, "invalid memsize '%s'", value); 1380*621b5090SJohn Baldwin 13819b1aa8d6SNeel Natu ctx = do_open(vmname); 1382e285ef8dSPeter Grehan 1383483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1384483d953aSJohn Baldwin if (restore_file != NULL) { 1385483d953aSJohn Baldwin guest_ncpus = lookup_guest_ncpus(&rstate); 1386483d953aSJohn Baldwin memflags = lookup_memflags(&rstate); 1387483d953aSJohn Baldwin memsize = lookup_memsize(&rstate); 1388483d953aSJohn Baldwin } 1389483d953aSJohn Baldwin 1390483d953aSJohn Baldwin if (guest_ncpus < 1) { 1391483d953aSJohn Baldwin fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 1392483d953aSJohn Baldwin exit(1); 1393483d953aSJohn Baldwin } 1394483d953aSJohn Baldwin #endif 1395483d953aSJohn Baldwin 13965f0677d3SNeel Natu max_vcpus = num_vcpus_allowed(ctx); 13975f0677d3SNeel Natu if (guest_ncpus > max_vcpus) { 13985f0677d3SNeel Natu fprintf(stderr, "%d vCPUs requested but only %d available\n", 13995f0677d3SNeel Natu guest_ncpus, max_vcpus); 1400989e062bSMarcelo Araujo exit(4); 14015f0677d3SNeel Natu } 14025f0677d3SNeel Natu 140349cc03daSNeel Natu fbsdrun_set_capabilities(ctx, BSP); 1404e285ef8dSPeter Grehan 1405*621b5090SJohn Baldwin memflags = 0; 1406*621b5090SJohn Baldwin if (get_config_bool_default("memory.wired", false)) 1407*621b5090SJohn Baldwin memflags |= VM_MEM_F_WIRED; 1408*621b5090SJohn Baldwin if (get_config_bool_default("memory.guest_in_core", false)) 1409*621b5090SJohn Baldwin memflags |= VM_MEM_F_INCORE; 14109b1aa8d6SNeel Natu vm_set_memflags(ctx, memflags); 1411b060ba50SNeel Natu err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 1412b060ba50SNeel Natu if (err) { 14139b1aa8d6SNeel Natu fprintf(stderr, "Unable to setup memory (%d)\n", errno); 1414989e062bSMarcelo Araujo exit(4); 1415e285ef8dSPeter Grehan } 1416e285ef8dSPeter Grehan 1417c3498942SNeel Natu error = init_msr(); 1418c3498942SNeel Natu if (error) { 1419c3498942SNeel Natu fprintf(stderr, "init_msr error %d", error); 1420989e062bSMarcelo Araujo exit(4); 1421c3498942SNeel Natu } 1422c3498942SNeel Natu 14230e2ca4e6SNeel Natu init_mem(); 1424e285ef8dSPeter Grehan init_inout(); 14258a68ae80SConrad Meyer kernemu_dev_init(); 1426bb30b08eSConrad Meyer init_bootrom(ctx); 14272cf9911fSPeter Grehan atkbdc_init(ctx); 1428b3e9732aSJohn Baldwin pci_irq_init(ctx); 14293cbf3585SJohn Baldwin ioapic_init(ctx); 1430a38e2a64SPeter Grehan 1431*621b5090SJohn Baldwin rtc_init(ctx); 1432b3e9732aSJohn Baldwin sci_init(ctx); 14339d6be09fSPeter Grehan 1434a38e2a64SPeter Grehan /* 143528323addSBryan Drewery * Exit if a device emulation finds an error in its initilization 1436a38e2a64SPeter Grehan */ 1437989e062bSMarcelo Araujo if (init_pci(ctx) != 0) { 1438989e062bSMarcelo Araujo perror("device emulation initialization error"); 1439989e062bSMarcelo Araujo exit(4); 1440989e062bSMarcelo Araujo } 1441a38e2a64SPeter Grehan 14429cb339ccSConrad Meyer /* 14439cb339ccSConrad Meyer * Initialize after PCI, to allow a bootrom file to reserve the high 14449cb339ccSConrad Meyer * region. 14459cb339ccSConrad Meyer */ 1446*621b5090SJohn Baldwin if (get_config_bool("acpi_tables")) 14479cb339ccSConrad Meyer vmgenc_init(ctx); 14489cb339ccSConrad Meyer 1449*621b5090SJohn Baldwin value = get_config_value("gdb.port"); 1450*621b5090SJohn Baldwin if (value != NULL) 1451*621b5090SJohn Baldwin init_gdb(ctx, atoi(value), get_config_bool_default("gdb.wait", 1452*621b5090SJohn Baldwin false)); 1453e285ef8dSPeter Grehan 14549b1aa8d6SNeel Natu if (lpc_bootrom()) { 14559b1aa8d6SNeel Natu if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 14569b1aa8d6SNeel Natu fprintf(stderr, "ROM boot failed: unrestricted guest " 14579b1aa8d6SNeel Natu "capability not available\n"); 1458989e062bSMarcelo Araujo exit(4); 14599b1aa8d6SNeel Natu } 14609b1aa8d6SNeel Natu error = vcpu_reset(ctx, BSP); 14619b1aa8d6SNeel Natu assert(error == 0); 14629b1aa8d6SNeel Natu } 14639b1aa8d6SNeel Natu 1464483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1465483d953aSJohn Baldwin if (restore_file != NULL) { 1466483d953aSJohn Baldwin fprintf(stdout, "Pausing pci devs...\r\n"); 1467483d953aSJohn Baldwin if (vm_pause_user_devs(ctx) != 0) { 1468483d953aSJohn Baldwin fprintf(stderr, "Failed to pause PCI device state.\n"); 1469483d953aSJohn Baldwin exit(1); 1470483d953aSJohn Baldwin } 1471483d953aSJohn Baldwin 1472483d953aSJohn Baldwin fprintf(stdout, "Restoring vm mem...\r\n"); 1473483d953aSJohn Baldwin if (restore_vm_mem(ctx, &rstate) != 0) { 1474483d953aSJohn Baldwin fprintf(stderr, "Failed to restore VM memory.\n"); 1475483d953aSJohn Baldwin exit(1); 1476483d953aSJohn Baldwin } 1477483d953aSJohn Baldwin 1478483d953aSJohn Baldwin fprintf(stdout, "Restoring pci devs...\r\n"); 1479483d953aSJohn Baldwin if (vm_restore_user_devs(ctx, &rstate) != 0) { 1480483d953aSJohn Baldwin fprintf(stderr, "Failed to restore PCI device state.\n"); 1481483d953aSJohn Baldwin exit(1); 1482483d953aSJohn Baldwin } 1483483d953aSJohn Baldwin 1484483d953aSJohn Baldwin fprintf(stdout, "Restoring kernel structs...\r\n"); 1485483d953aSJohn Baldwin if (vm_restore_kern_structs(ctx, &rstate) != 0) { 1486483d953aSJohn Baldwin fprintf(stderr, "Failed to restore kernel structs.\n"); 1487483d953aSJohn Baldwin exit(1); 1488483d953aSJohn Baldwin } 1489483d953aSJohn Baldwin 1490483d953aSJohn Baldwin fprintf(stdout, "Resuming pci devs...\r\n"); 1491483d953aSJohn Baldwin if (vm_resume_user_devs(ctx) != 0) { 1492483d953aSJohn Baldwin fprintf(stderr, "Failed to resume PCI device state.\n"); 1493483d953aSJohn Baldwin exit(1); 1494483d953aSJohn Baldwin } 1495483d953aSJohn Baldwin } 1496483d953aSJohn Baldwin #endif 1497483d953aSJohn Baldwin 1498e285ef8dSPeter Grehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 1499e285ef8dSPeter Grehan assert(error == 0); 1500e285ef8dSPeter Grehan 1501e285ef8dSPeter Grehan /* 1502e285ef8dSPeter Grehan * build the guest tables, MP etc. 1503e285ef8dSPeter Grehan */ 1504*621b5090SJohn Baldwin if (get_config_bool_default("x86.mptable", true)) { 1505b100acf2SNeel Natu error = mptable_build(ctx, guest_ncpus); 1506989e062bSMarcelo Araujo if (error) { 1507989e062bSMarcelo Araujo perror("error to build the guest tables"); 1508989e062bSMarcelo Araujo exit(4); 1509989e062bSMarcelo Araujo } 1510b100acf2SNeel Natu } 1511e285ef8dSPeter Grehan 1512af5bfc53STycho Nightingale error = smbios_build(ctx); 1513af5bfc53STycho Nightingale assert(error == 0); 1514af5bfc53STycho Nightingale 1515*621b5090SJohn Baldwin if (get_config_bool("acpi_tables")) { 1516a1a4cbeaSNeel Natu error = acpi_build(ctx, guest_ncpus); 1517e285ef8dSPeter Grehan assert(error == 0); 1518e285ef8dSPeter Grehan } 1519e285ef8dSPeter Grehan 152088ac6958SPeter Grehan if (lpc_bootrom()) 152188ac6958SPeter Grehan fwctl_init(); 152288ac6958SPeter Grehan 1523dcbebe85SMariusz Zaborski /* 1524dcbebe85SMariusz Zaborski * Change the proc title to include the VM name. 1525dcbebe85SMariusz Zaborski */ 1526dcbebe85SMariusz Zaborski setproctitle("%s", vmname); 1527dcbebe85SMariusz Zaborski 152800ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 152900ef17beSBartek Rutkowski caph_cache_catpages(); 153000ef17beSBartek Rutkowski 153100ef17beSBartek Rutkowski if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) 153200ef17beSBartek Rutkowski errx(EX_OSERR, "Unable to apply rights for sandbox"); 153300ef17beSBartek Rutkowski 15347672a014SMariusz Zaborski if (caph_enter() == -1) 153500ef17beSBartek Rutkowski errx(EX_OSERR, "cap_enter() failed"); 153600ef17beSBartek Rutkowski #endif 153700ef17beSBartek Rutkowski 1538483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1539483d953aSJohn Baldwin if (restore_file != NULL) 1540483d953aSJohn Baldwin destroy_restore_state(&rstate); 1541483d953aSJohn Baldwin 1542483d953aSJohn Baldwin /* 1543483d953aSJohn Baldwin * checkpointing thread for communication with bhyvectl 1544483d953aSJohn Baldwin */ 1545483d953aSJohn Baldwin if (init_checkpoint_thread(ctx) < 0) 1546483d953aSJohn Baldwin printf("Failed to start checkpoint thread!\r\n"); 1547483d953aSJohn Baldwin 1548483d953aSJohn Baldwin if (restore_file != NULL) 1549483d953aSJohn Baldwin vm_restore_time(ctx); 1550483d953aSJohn Baldwin #endif 1551483d953aSJohn Baldwin 1552e285ef8dSPeter Grehan /* 1553e285ef8dSPeter Grehan * Add CPU 0 1554e285ef8dSPeter Grehan */ 15550826d045SNeel Natu fbsdrun_addcpu(ctx, BSP, BSP, rip); 1556e285ef8dSPeter Grehan 1557483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1558483d953aSJohn Baldwin /* 1559483d953aSJohn Baldwin * If we restore a VM, start all vCPUs now (including APs), otherwise, 1560483d953aSJohn Baldwin * let the guest OS to spin them up later via vmexits. 1561483d953aSJohn Baldwin */ 1562483d953aSJohn Baldwin if (restore_file != NULL) { 1563483d953aSJohn Baldwin for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { 1564483d953aSJohn Baldwin if (vcpu == BSP) 1565483d953aSJohn Baldwin continue; 1566483d953aSJohn Baldwin 1567483d953aSJohn Baldwin fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu); 1568483d953aSJohn Baldwin spinup_vcpu(ctx, vcpu); 1569483d953aSJohn Baldwin } 1570483d953aSJohn Baldwin } 1571483d953aSJohn Baldwin #endif 1572483d953aSJohn Baldwin 1573e285ef8dSPeter Grehan /* 1574e285ef8dSPeter Grehan * Head off to the main event dispatch loop 1575e285ef8dSPeter Grehan */ 1576e285ef8dSPeter Grehan mevent_dispatch(); 1577e285ef8dSPeter Grehan 1578989e062bSMarcelo Araujo exit(4); 1579e285ef8dSPeter Grehan } 1580