1bf21cd93STycho Nightingale /*- 24c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 34c87aefeSPatrick Mooney * 4bf21cd93STycho Nightingale * Copyright (c) 2011 NetApp, Inc. 5bf21cd93STycho Nightingale * All rights reserved. 6bf21cd93STycho Nightingale * 7bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without 8bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions 9bf21cd93STycho Nightingale * are met: 10bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright 11bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer. 12bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright 13bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the 14bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution. 15bf21cd93STycho Nightingale * 16bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26bf21cd93STycho Nightingale * SUCH DAMAGE. 27bf21cd93STycho Nightingale * 284c87aefeSPatrick Mooney * $FreeBSD$ 29bf21cd93STycho Nightingale */ 30bf21cd93STycho Nightingale /* 31bf21cd93STycho Nightingale * This file and its contents are supplied under the terms of the 32bf21cd93STycho Nightingale * Common Development and Distribution License ("CDDL"), version 1.0. 33bf21cd93STycho Nightingale * You may only use this file in accordance with the terms of version 34bf21cd93STycho Nightingale * 1.0 of the CDDL. 35bf21cd93STycho Nightingale * 36bf21cd93STycho Nightingale * A full copy of the text of the CDDL should have accompanied this 37bf21cd93STycho Nightingale * source. A copy of the CDDL is also available via the Internet at 38bf21cd93STycho Nightingale * http://www.illumos.org/license/CDDL. 39bf21cd93STycho Nightingale * 40bf21cd93STycho Nightingale * Copyright 2015 Pluribus Networks Inc. 414c87aefeSPatrick Mooney * Copyright 2018 Joyent, Inc. 42b57f5d3eSPatrick Mooney * Copyright 2021 Oxide Computer Company 43*2c4fdd8fSAndy Fiddaman * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 44bf21cd93STycho Nightingale */ 45bf21cd93STycho Nightingale 46bf21cd93STycho Nightingale #include <sys/cdefs.h> 474c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 48bf21cd93STycho Nightingale 49bf21cd93STycho Nightingale #include <sys/types.h> 504c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 514c87aefeSPatrick Mooney #include <sys/capsicum.h> 524c87aefeSPatrick Mooney #endif 53bf21cd93STycho Nightingale #include <sys/mman.h> 54bf21cd93STycho Nightingale #include <sys/time.h> 554c87aefeSPatrick Mooney #include <sys/cpuset.h> 56bf21cd93STycho Nightingale 574c87aefeSPatrick Mooney #ifdef __FreeBSD__ 584c87aefeSPatrick Mooney #include <amd64/vmm/intel/vmcs.h> 594c87aefeSPatrick Mooney #else 604c87aefeSPatrick Mooney #include <intel/vmcs.h> 614c87aefeSPatrick Mooney #endif 624c87aefeSPatrick Mooney 634c87aefeSPatrick Mooney #include <machine/atomic.h> 64bf21cd93STycho Nightingale #include <machine/segments.h> 65bf21cd93STycho Nightingale 664c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 674c87aefeSPatrick Mooney #include <capsicum_helpers.h> 684c87aefeSPatrick Mooney #endif 69bf21cd93STycho Nightingale #include <stdio.h> 70bf21cd93STycho Nightingale #include <stdlib.h> 71bf21cd93STycho Nightingale #include <string.h> 72bf21cd93STycho Nightingale #include <err.h> 734c87aefeSPatrick Mooney #include <errno.h> 74bf21cd93STycho Nightingale #include <libgen.h> 75bf21cd93STycho Nightingale #include <unistd.h> 76bf21cd93STycho Nightingale #include <assert.h> 77bf21cd93STycho Nightingale #include <pthread.h> 78bf21cd93STycho Nightingale #include <pthread_np.h> 79bf21cd93STycho Nightingale #include <sysexits.h> 804c87aefeSPatrick Mooney #include <stdbool.h> 814c87aefeSPatrick Mooney #include <stdint.h> 82bf21cd93STycho Nightingale 83bf21cd93STycho Nightingale #include <machine/vmm.h> 844c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 854c87aefeSPatrick Mooney #include <machine/vmm_dev.h> 864c87aefeSPatrick Mooney #endif 87bf21cd93STycho Nightingale #include <vmmapi.h> 88bf21cd93STycho Nightingale 89bf21cd93STycho Nightingale #include "bhyverun.h" 90bf21cd93STycho Nightingale #include "acpi.h" 91bf21cd93STycho Nightingale #include "atkbdc.h" 92bf21cd93STycho Nightingale #include "console.h" 93154972afSPatrick Mooney #include "bootrom.h" 942b948146SAndy Fiddaman #include "config.h" 95bf21cd93STycho Nightingale #include "inout.h" 96154972afSPatrick Mooney #include "debug.h" 974c87aefeSPatrick Mooney #include "fwctl.h" 984c87aefeSPatrick Mooney #include "gdb.h" 99bf21cd93STycho Nightingale #include "ioapic.h" 100154972afSPatrick Mooney #include "kernemu_dev.h" 101bf21cd93STycho Nightingale #include "mem.h" 102bf21cd93STycho Nightingale #include "mevent.h" 103bf21cd93STycho Nightingale #include "mptbl.h" 104bf21cd93STycho Nightingale #include "pci_emul.h" 105bf21cd93STycho Nightingale #include "pci_irq.h" 106bf21cd93STycho Nightingale #include "pci_lpc.h" 107bf21cd93STycho Nightingale #include "smbiostbl.h" 108bf21cd93STycho Nightingale #include "xmsr.h" 109bf21cd93STycho Nightingale #include "spinup_ap.h" 110bf21cd93STycho Nightingale #include "rfb.h" 111bf21cd93STycho Nightingale #include "rtc.h" 112bf21cd93STycho Nightingale #include "vga.h" 113154972afSPatrick Mooney #include "vmgenc.h" 1142817ebc2SAndy Fiddaman #ifndef __FreeBSD__ 1152817ebc2SAndy Fiddaman #include "privileges.h" 1162817ebc2SAndy Fiddaman #endif 117bf21cd93STycho Nightingale 118bf21cd93STycho Nightingale #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 119bf21cd93STycho Nightingale 120bf21cd93STycho Nightingale #define MB (1024UL * 1024) 121bf21cd93STycho Nightingale #define GB (1024UL * MB) 122bf21cd93STycho Nightingale 1234c87aefeSPatrick Mooney static const char * const vmx_exit_reason_desc[] = { 1244c87aefeSPatrick Mooney [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", 1254c87aefeSPatrick Mooney [EXIT_REASON_EXT_INTR] = "External interrupt", 1264c87aefeSPatrick Mooney [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", 1274c87aefeSPatrick Mooney [EXIT_REASON_INIT] = "INIT signal", 1284c87aefeSPatrick Mooney [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", 1294c87aefeSPatrick Mooney [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", 1304c87aefeSPatrick Mooney [EXIT_REASON_SMI] = "Other SMI", 1314c87aefeSPatrick Mooney [EXIT_REASON_INTR_WINDOW] = "Interrupt window", 1324c87aefeSPatrick Mooney [EXIT_REASON_NMI_WINDOW] = "NMI window", 1334c87aefeSPatrick Mooney [EXIT_REASON_TASK_SWITCH] = "Task switch", 1344c87aefeSPatrick Mooney [EXIT_REASON_CPUID] = "CPUID", 1354c87aefeSPatrick Mooney [EXIT_REASON_GETSEC] = "GETSEC", 1364c87aefeSPatrick Mooney [EXIT_REASON_HLT] = "HLT", 1374c87aefeSPatrick Mooney [EXIT_REASON_INVD] = "INVD", 1384c87aefeSPatrick Mooney [EXIT_REASON_INVLPG] = "INVLPG", 1394c87aefeSPatrick Mooney [EXIT_REASON_RDPMC] = "RDPMC", 1404c87aefeSPatrick Mooney [EXIT_REASON_RDTSC] = "RDTSC", 1414c87aefeSPatrick Mooney [EXIT_REASON_RSM] = "RSM", 1424c87aefeSPatrick Mooney [EXIT_REASON_VMCALL] = "VMCALL", 1434c87aefeSPatrick Mooney [EXIT_REASON_VMCLEAR] = "VMCLEAR", 1444c87aefeSPatrick Mooney [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", 1454c87aefeSPatrick Mooney [EXIT_REASON_VMPTRLD] = "VMPTRLD", 1464c87aefeSPatrick Mooney [EXIT_REASON_VMPTRST] = "VMPTRST", 1474c87aefeSPatrick Mooney [EXIT_REASON_VMREAD] = "VMREAD", 1484c87aefeSPatrick Mooney [EXIT_REASON_VMRESUME] = "VMRESUME", 1494c87aefeSPatrick Mooney [EXIT_REASON_VMWRITE] = "VMWRITE", 1504c87aefeSPatrick Mooney [EXIT_REASON_VMXOFF] = "VMXOFF", 1514c87aefeSPatrick Mooney [EXIT_REASON_VMXON] = "VMXON", 1524c87aefeSPatrick Mooney [EXIT_REASON_CR_ACCESS] = "Control-register accesses", 1534c87aefeSPatrick Mooney [EXIT_REASON_DR_ACCESS] = "MOV DR", 1544c87aefeSPatrick Mooney [EXIT_REASON_INOUT] = "I/O instruction", 1554c87aefeSPatrick Mooney [EXIT_REASON_RDMSR] = "RDMSR", 1564c87aefeSPatrick Mooney [EXIT_REASON_WRMSR] = "WRMSR", 1574c87aefeSPatrick Mooney [EXIT_REASON_INVAL_VMCS] = 1584c87aefeSPatrick Mooney "VM-entry failure due to invalid guest state", 1594c87aefeSPatrick Mooney [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", 1604c87aefeSPatrick Mooney [EXIT_REASON_MWAIT] = "MWAIT", 1614c87aefeSPatrick Mooney [EXIT_REASON_MTF] = "Monitor trap flag", 1624c87aefeSPatrick Mooney [EXIT_REASON_MONITOR] = "MONITOR", 1634c87aefeSPatrick Mooney [EXIT_REASON_PAUSE] = "PAUSE", 1644c87aefeSPatrick Mooney [EXIT_REASON_MCE_DURING_ENTRY] = 1654c87aefeSPatrick Mooney "VM-entry failure due to machine-check event", 1664c87aefeSPatrick Mooney [EXIT_REASON_TPR] = "TPR below threshold", 1674c87aefeSPatrick Mooney [EXIT_REASON_APIC_ACCESS] = "APIC access", 1684c87aefeSPatrick Mooney [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", 1694c87aefeSPatrick Mooney [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", 1704c87aefeSPatrick Mooney [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", 1714c87aefeSPatrick Mooney [EXIT_REASON_EPT_FAULT] = "EPT violation", 1724c87aefeSPatrick Mooney [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", 1734c87aefeSPatrick Mooney [EXIT_REASON_INVEPT] = "INVEPT", 1744c87aefeSPatrick Mooney [EXIT_REASON_RDTSCP] = "RDTSCP", 1754c87aefeSPatrick Mooney [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", 1764c87aefeSPatrick Mooney [EXIT_REASON_INVVPID] = "INVVPID", 1774c87aefeSPatrick Mooney [EXIT_REASON_WBINVD] = "WBINVD", 1784c87aefeSPatrick Mooney [EXIT_REASON_XSETBV] = "XSETBV", 1794c87aefeSPatrick Mooney [EXIT_REASON_APIC_WRITE] = "APIC write", 1804c87aefeSPatrick Mooney [EXIT_REASON_RDRAND] = "RDRAND", 1814c87aefeSPatrick Mooney [EXIT_REASON_INVPCID] = "INVPCID", 1824c87aefeSPatrick Mooney [EXIT_REASON_VMFUNC] = "VMFUNC", 1834c87aefeSPatrick Mooney [EXIT_REASON_ENCLS] = "ENCLS", 1844c87aefeSPatrick Mooney [EXIT_REASON_RDSEED] = "RDSEED", 1854c87aefeSPatrick Mooney [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", 1864c87aefeSPatrick Mooney [EXIT_REASON_XSAVES] = "XSAVES", 1874c87aefeSPatrick Mooney [EXIT_REASON_XRSTORS] = "XRSTORS" 1884c87aefeSPatrick Mooney }; 1894c87aefeSPatrick Mooney 190bf21cd93STycho Nightingale typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 1914c87aefeSPatrick Mooney extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 192bf21cd93STycho Nightingale 193bf21cd93STycho Nightingale int guest_ncpus; 1944c87aefeSPatrick Mooney uint16_t cores, maxcpus, sockets, threads; 1954c87aefeSPatrick Mooney 196154972afSPatrick Mooney int raw_stdio = 0; 197154972afSPatrick Mooney 198bf21cd93STycho Nightingale static char *progname; 199bf21cd93STycho Nightingale static const int BSP = 0; 200bf21cd93STycho Nightingale 201bf21cd93STycho Nightingale static cpuset_t cpumask; 202bf21cd93STycho Nightingale 203bf21cd93STycho Nightingale static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 204bf21cd93STycho Nightingale 205bf21cd93STycho Nightingale static struct vm_exit vmexit[VM_MAXCPU]; 206e0c0d44eSPatrick Mooney static struct vm_entry vmentry[VM_MAXCPU]; 207bf21cd93STycho Nightingale 208bf21cd93STycho Nightingale struct bhyvestats { 209bf21cd93STycho Nightingale uint64_t vmexit_bogus; 2104c87aefeSPatrick Mooney uint64_t vmexit_reqidle; 211bf21cd93STycho Nightingale uint64_t vmexit_hlt; 212bf21cd93STycho Nightingale uint64_t vmexit_pause; 213bf21cd93STycho Nightingale uint64_t vmexit_mtrap; 214e0c0d44eSPatrick Mooney uint64_t vmexit_mmio; 215e0c0d44eSPatrick Mooney uint64_t vmexit_inout; 216bf21cd93STycho Nightingale uint64_t cpu_switch_rotate; 217bf21cd93STycho Nightingale uint64_t cpu_switch_direct; 218e0c0d44eSPatrick Mooney uint64_t mmio_unhandled; 219bf21cd93STycho Nightingale } stats; 220bf21cd93STycho Nightingale 221bf21cd93STycho Nightingale struct mt_vmm_info { 222bf21cd93STycho Nightingale pthread_t mt_thr; 223bf21cd93STycho Nightingale struct vmctx *mt_ctx; 224bf21cd93STycho Nightingale int mt_vcpu; 225e0c0d44eSPatrick Mooney uint64_t mt_startrip; 226bf21cd93STycho Nightingale } mt_vmm_info[VM_MAXCPU]; 227bf21cd93STycho Nightingale 2284c87aefeSPatrick Mooney #ifdef __FreeBSD__ 2294c87aefeSPatrick Mooney static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 2304c87aefeSPatrick Mooney #endif 2314c87aefeSPatrick Mooney 232bf21cd93STycho Nightingale static void 233bf21cd93STycho Nightingale usage(int code) 234bf21cd93STycho Nightingale { 235bf21cd93STycho Nightingale 2364c87aefeSPatrick Mooney fprintf(stderr, 2376960cd89SAndy Fiddaman #ifdef __FreeBSD__ 238b0de25cbSAndy Fiddaman "Usage: %s [-AaCDeHhPSuWwxY]\n" 2396960cd89SAndy Fiddaman #else 240*2c4fdd8fSAndy Fiddaman "Usage: %s [-aCDdeHhPSuWwxY]\n" 2416960cd89SAndy Fiddaman #endif 2424c87aefeSPatrick Mooney " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" 243bf21cd93STycho Nightingale #ifdef __FreeBSD__ 2446dc98349SAndy Fiddaman " %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n" 245b0de25cbSAndy Fiddaman " %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n" 246b0de25cbSAndy Fiddaman 247*2c4fdd8fSAndy Fiddaman " -A: create ACPI tables\n" 248bf21cd93STycho Nightingale #else 2496dc98349SAndy Fiddaman " %*s [-k <config_file>] [-l <lpc>] [-m mem] [-o <var>=<value>]\n" 250b0de25cbSAndy Fiddaman " %*s [-s <pci>] [-U uuid] vmname\n" 251bf21cd93STycho Nightingale #endif 252b0de25cbSAndy Fiddaman " -a: local apic is in xAPIC mode (deprecated)\n" 2534c87aefeSPatrick Mooney " -C: include guest memory in core file\n" 254b0de25cbSAndy Fiddaman " -c: number of cpus and/or topology specification\n" 255b0de25cbSAndy Fiddaman " -D: destroy on power-off\n" 2569c3024a3SHans Rosenfeld #ifndef __FreeBSD__ 2579c3024a3SHans Rosenfeld " -d: suspend cpu at boot\n" 2589c3024a3SHans Rosenfeld #endif 2594c87aefeSPatrick Mooney " -e: exit on unhandled I/O access\n" 2604c87aefeSPatrick Mooney " -H: vmexit from the guest on hlt\n" 261b0de25cbSAndy Fiddaman " -h: help\n" 2622b948146SAndy Fiddaman " -k: key=value flat config file\n" 2634c87aefeSPatrick Mooney " -l: LPC device configuration\n" 2644c87aefeSPatrick Mooney " -m: memory size\n" 2652b948146SAndy Fiddaman " -o: set config 'var' to 'value'\n" 266b0de25cbSAndy Fiddaman " -P: vmexit from the guest on pause\n" 2674c87aefeSPatrick Mooney #ifdef __FreeBSD__ 2684c87aefeSPatrick Mooney " -p: pin 'vcpu' to 'hostcpu'\n" 2694c87aefeSPatrick Mooney #endif 2704c87aefeSPatrick Mooney " -S: guest memory cannot be swapped\n" 271b0de25cbSAndy Fiddaman " -s: <slot,driver,configinfo> PCI slot config\n" 2724c87aefeSPatrick Mooney " -U: uuid\n" 273b0de25cbSAndy Fiddaman " -u: RTC keeps UTC time\n" 2744c87aefeSPatrick Mooney " -W: force virtio to use single-vector MSI\n" 275b0de25cbSAndy Fiddaman " -w: ignore unimplemented MSRs\n" 2764c87aefeSPatrick Mooney " -x: local apic is in x2APIC mode\n" 2774c87aefeSPatrick Mooney " -Y: disable MPtable generation\n", 2784c87aefeSPatrick Mooney progname, (int)strlen(progname), "", (int)strlen(progname), "", 2794c87aefeSPatrick Mooney (int)strlen(progname), ""); 280bf21cd93STycho Nightingale 281bf21cd93STycho Nightingale exit(code); 282bf21cd93STycho Nightingale } 283bf21cd93STycho Nightingale 2844c87aefeSPatrick Mooney /* 2854c87aefeSPatrick Mooney * XXX This parser is known to have the following issues: 2862b948146SAndy Fiddaman * 1. It accepts null key=value tokens ",," as setting "cpus" to an 2872b948146SAndy Fiddaman * empty string. 2884c87aefeSPatrick Mooney * 2894c87aefeSPatrick Mooney * The acceptance of a null specification ('-c ""') is by design to match the 2904c87aefeSPatrick Mooney * manual page syntax specification, this results in a topology of 1 vCPU. 2914c87aefeSPatrick Mooney */ 2924c87aefeSPatrick Mooney static int 2934c87aefeSPatrick Mooney topology_parse(const char *opt) 2944c87aefeSPatrick Mooney { 2954c87aefeSPatrick Mooney char *cp, *str; 2964c87aefeSPatrick Mooney 2972b948146SAndy Fiddaman if (*opt == '\0') { 2982b948146SAndy Fiddaman set_config_value("sockets", "1"); 2992b948146SAndy Fiddaman set_config_value("cores", "1"); 3002b948146SAndy Fiddaman set_config_value("threads", "1"); 3012b948146SAndy Fiddaman set_config_value("cpus", "1"); 3022b948146SAndy Fiddaman return (0); 3032b948146SAndy Fiddaman } 3042b948146SAndy Fiddaman 3054c87aefeSPatrick Mooney str = strdup(opt); 3064c87aefeSPatrick Mooney if (str == NULL) 3072b948146SAndy Fiddaman errx(4, "Failed to allocate memory"); 3084c87aefeSPatrick Mooney 3094c87aefeSPatrick Mooney while ((cp = strsep(&str, ",")) != NULL) { 3102b948146SAndy Fiddaman if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) 3112b948146SAndy Fiddaman set_config_value("cpus", cp + strlen("cpus=")); 3122b948146SAndy Fiddaman else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0) 3132b948146SAndy Fiddaman set_config_value("sockets", cp + strlen("sockets=")); 3142b948146SAndy Fiddaman else if (strncmp(cp, "cores=", strlen("cores=")) == 0) 3152b948146SAndy Fiddaman set_config_value("cores", cp + strlen("cores=")); 3162b948146SAndy Fiddaman else if (strncmp(cp, "threads=", strlen("threads=")) == 0) 3172b948146SAndy Fiddaman set_config_value("threads", cp + strlen("threads=")); 3184c87aefeSPatrick Mooney #ifdef notyet /* Do not expose this until vmm.ko implements it */ 3192b948146SAndy Fiddaman else if (strncmp(cp, "maxcpus=", strlen("maxcpus=")) == 0) 3202b948146SAndy Fiddaman set_config_value("maxcpus", cp + strlen("maxcpus=")); 3214c87aefeSPatrick Mooney #endif 3222b948146SAndy Fiddaman else if (strchr(cp, '=') != NULL) 3232b948146SAndy Fiddaman goto out; 3244c87aefeSPatrick Mooney else 3252b948146SAndy Fiddaman set_config_value("cpus", cp); 3264c87aefeSPatrick Mooney } 3274c87aefeSPatrick Mooney free(str); 3284c87aefeSPatrick Mooney return (0); 3294c87aefeSPatrick Mooney 3304c87aefeSPatrick Mooney out: 3314c87aefeSPatrick Mooney free(str); 3324c87aefeSPatrick Mooney return (-1); 3334c87aefeSPatrick Mooney } 3344c87aefeSPatrick Mooney 3352b948146SAndy Fiddaman static int 3362b948146SAndy Fiddaman parse_int_value(const char *key, const char *value, int minval, int maxval) 3372b948146SAndy Fiddaman { 3382b948146SAndy Fiddaman char *cp; 3392b948146SAndy Fiddaman long lval; 3402b948146SAndy Fiddaman 3412b948146SAndy Fiddaman errno = 0; 3422b948146SAndy Fiddaman lval = strtol(value, &cp, 0); 3432b948146SAndy Fiddaman if (errno != 0 || *cp != '\0' || cp == value || lval < minval || 3442b948146SAndy Fiddaman lval > maxval) 3452b948146SAndy Fiddaman errx(4, "Invalid value for %s: '%s'", key, value); 3462b948146SAndy Fiddaman return (lval); 3472b948146SAndy Fiddaman } 3482b948146SAndy Fiddaman 3492b948146SAndy Fiddaman /* 3502b948146SAndy Fiddaman * Set the sockets, cores, threads, and guest_cpus variables based on 3512b948146SAndy Fiddaman * the configured topology. 3522b948146SAndy Fiddaman * 3532b948146SAndy Fiddaman * The limits of UINT16_MAX are due to the types passed to 3542b948146SAndy Fiddaman * vm_set_topology(). vmm.ko may enforce tighter limits. 3552b948146SAndy Fiddaman */ 3562b948146SAndy Fiddaman static void 3572b948146SAndy Fiddaman calc_topolopgy(void) 3582b948146SAndy Fiddaman { 3592b948146SAndy Fiddaman const char *value; 3602b948146SAndy Fiddaman bool explicit_cpus; 3612b948146SAndy Fiddaman uint64_t ncpus; 3622b948146SAndy Fiddaman 3632b948146SAndy Fiddaman value = get_config_value("cpus"); 3642b948146SAndy Fiddaman if (value != NULL) { 3652b948146SAndy Fiddaman guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX); 3662b948146SAndy Fiddaman explicit_cpus = true; 3672b948146SAndy Fiddaman } else { 3682b948146SAndy Fiddaman guest_ncpus = 1; 3692b948146SAndy Fiddaman explicit_cpus = false; 3702b948146SAndy Fiddaman } 3712b948146SAndy Fiddaman value = get_config_value("cores"); 3722b948146SAndy Fiddaman if (value != NULL) 3732b948146SAndy Fiddaman cores = parse_int_value("cores", value, 1, UINT16_MAX); 3742b948146SAndy Fiddaman else 3752b948146SAndy Fiddaman cores = 1; 3762b948146SAndy Fiddaman value = get_config_value("threads"); 3772b948146SAndy Fiddaman if (value != NULL) 3782b948146SAndy Fiddaman threads = parse_int_value("threads", value, 1, UINT16_MAX); 3792b948146SAndy Fiddaman else 3802b948146SAndy Fiddaman threads = 1; 3812b948146SAndy Fiddaman value = get_config_value("sockets"); 3822b948146SAndy Fiddaman if (value != NULL) 3832b948146SAndy Fiddaman sockets = parse_int_value("sockets", value, 1, UINT16_MAX); 3842b948146SAndy Fiddaman else 3852b948146SAndy Fiddaman sockets = guest_ncpus; 3862b948146SAndy Fiddaman 3872b948146SAndy Fiddaman /* 3882b948146SAndy Fiddaman * Compute sockets * cores * threads avoiding overflow. The 3892b948146SAndy Fiddaman * range check above insures these are 16 bit values. 3902b948146SAndy Fiddaman */ 3912b948146SAndy Fiddaman ncpus = (uint64_t)sockets * cores * threads; 3922b948146SAndy Fiddaman if (ncpus > UINT16_MAX) 3932b948146SAndy Fiddaman errx(4, "Computed number of vCPUs too high: %ju", 3942b948146SAndy Fiddaman (uintmax_t)ncpus); 3952b948146SAndy Fiddaman 3962b948146SAndy Fiddaman if (explicit_cpus) { 3972b948146SAndy Fiddaman if (guest_ncpus != ncpus) 3982b948146SAndy Fiddaman errx(4, "Topology (%d sockets, %d cores, %d threads) " 3992b948146SAndy Fiddaman "does not match %d vCPUs", sockets, cores, threads, 4002b948146SAndy Fiddaman guest_ncpus); 4012b948146SAndy Fiddaman } else 4022b948146SAndy Fiddaman guest_ncpus = ncpus; 4032b948146SAndy Fiddaman } 4042b948146SAndy Fiddaman 4054c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 4064c87aefeSPatrick Mooney /* 4074c87aefeSPatrick Mooney * 11-stable capsicum helpers 4084c87aefeSPatrick Mooney */ 4094c87aefeSPatrick Mooney static void 4104c87aefeSPatrick Mooney bhyve_caph_cache_catpages(void) 4114c87aefeSPatrick Mooney { 4124c87aefeSPatrick Mooney 4134c87aefeSPatrick Mooney (void)catopen("libc", NL_CAT_LOCALE); 4144c87aefeSPatrick Mooney } 4154c87aefeSPatrick Mooney 4164c87aefeSPatrick Mooney static int 4174c87aefeSPatrick Mooney bhyve_caph_limit_stdoe(void) 4184c87aefeSPatrick Mooney { 4194c87aefeSPatrick Mooney cap_rights_t rights; 4204c87aefeSPatrick Mooney unsigned long cmds[] = { TIOCGETA, TIOCGWINSZ }; 4214c87aefeSPatrick Mooney int i, fds[] = { STDOUT_FILENO, STDERR_FILENO }; 4224c87aefeSPatrick Mooney 4234c87aefeSPatrick Mooney cap_rights_init(&rights, CAP_FCNTL, CAP_FSTAT, CAP_IOCTL); 4244c87aefeSPatrick Mooney cap_rights_set(&rights, CAP_WRITE); 4254c87aefeSPatrick Mooney 4264c87aefeSPatrick Mooney for (i = 0; i < nitems(fds); i++) { 4274c87aefeSPatrick Mooney if (cap_rights_limit(fds[i], &rights) < 0 && errno != ENOSYS) 4284c87aefeSPatrick Mooney return (-1); 4294c87aefeSPatrick Mooney 4304c87aefeSPatrick Mooney if (cap_ioctls_limit(fds[i], cmds, nitems(cmds)) < 0 && errno != ENOSYS) 4314c87aefeSPatrick Mooney return (-1); 4324c87aefeSPatrick Mooney 4334c87aefeSPatrick Mooney if (cap_fcntls_limit(fds[i], CAP_FCNTL_GETFL) < 0 && errno != ENOSYS) 4344c87aefeSPatrick Mooney return (-1); 4354c87aefeSPatrick Mooney } 4364c87aefeSPatrick Mooney 4374c87aefeSPatrick Mooney return (0); 4384c87aefeSPatrick Mooney } 4394c87aefeSPatrick Mooney 4404c87aefeSPatrick Mooney #endif 4414c87aefeSPatrick Mooney 4424c87aefeSPatrick Mooney #ifdef __FreeBSD__ 4434c87aefeSPatrick Mooney static int 4444c87aefeSPatrick Mooney pincpu_parse(const char *opt) 4454c87aefeSPatrick Mooney { 4464c87aefeSPatrick Mooney int vcpu, pcpu; 4474c87aefeSPatrick Mooney 4484c87aefeSPatrick Mooney if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 4494c87aefeSPatrick Mooney fprintf(stderr, "invalid format: %s\n", opt); 4504c87aefeSPatrick Mooney return (-1); 4514c87aefeSPatrick Mooney } 4524c87aefeSPatrick Mooney 4534c87aefeSPatrick Mooney if (vcpu < 0 || vcpu >= VM_MAXCPU) { 4544c87aefeSPatrick Mooney fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 4554c87aefeSPatrick Mooney vcpu, VM_MAXCPU - 1); 4564c87aefeSPatrick Mooney return (-1); 4574c87aefeSPatrick Mooney } 4584c87aefeSPatrick Mooney 4594c87aefeSPatrick Mooney if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 4604c87aefeSPatrick Mooney fprintf(stderr, "hostcpu '%d' outside valid range from " 4614c87aefeSPatrick Mooney "0 to %d\n", pcpu, CPU_SETSIZE - 1); 4624c87aefeSPatrick Mooney return (-1); 4634c87aefeSPatrick Mooney } 4644c87aefeSPatrick Mooney 4652b948146SAndy Fiddaman snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 4662b948146SAndy Fiddaman value = get_config_value(key); 4672b948146SAndy Fiddaman 4682b948146SAndy Fiddaman if (asprintf(&newval, "%s%s%d", value != NULL ? value : "", 4692b948146SAndy Fiddaman value != NULL ? "," : "", pcpu) == -1) { 4702b948146SAndy Fiddaman perror("failed to build new cpuset string"); 4714c87aefeSPatrick Mooney return (-1); 4724c87aefeSPatrick Mooney } 4732b948146SAndy Fiddaman 4742b948146SAndy Fiddaman set_config_value(key, newval); 4752b948146SAndy Fiddaman free(newval); 4762b948146SAndy Fiddaman return (0); 4772b948146SAndy Fiddaman } 4782b948146SAndy Fiddaman 4792b948146SAndy Fiddaman static void 4802b948146SAndy Fiddaman parse_cpuset(int vcpu, const char *list, cpuset_t *set) 4812b948146SAndy Fiddaman { 4822b948146SAndy Fiddaman char *cp, *token; 4832b948146SAndy Fiddaman int pcpu, start; 4842b948146SAndy Fiddaman 4852b948146SAndy Fiddaman CPU_ZERO(set); 4862b948146SAndy Fiddaman start = -1; 4872b948146SAndy Fiddaman token = __DECONST(char *, list); 4882b948146SAndy Fiddaman for (;;) { 4892b948146SAndy Fiddaman pcpu = strtoul(token, &cp, 0); 4902b948146SAndy Fiddaman if (cp == token) 4912b948146SAndy Fiddaman errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 4922b948146SAndy Fiddaman if (pcpu < 0 || pcpu >= CPU_SETSIZE) 4932b948146SAndy Fiddaman errx(4, "hostcpu '%d' outside valid range from 0 to %d", 4942b948146SAndy Fiddaman pcpu, CPU_SETSIZE - 1); 4952b948146SAndy Fiddaman switch (*cp) { 4962b948146SAndy Fiddaman case ',': 4972b948146SAndy Fiddaman case '\0': 4982b948146SAndy Fiddaman if (start >= 0) { 4992b948146SAndy Fiddaman if (start > pcpu) 5002b948146SAndy Fiddaman errx(4, "Invalid hostcpu range %d-%d", 5012b948146SAndy Fiddaman start, pcpu); 5022b948146SAndy Fiddaman while (start < pcpu) { 5032b948146SAndy Fiddaman CPU_SET(start, vcpumap[vcpu]); 5042b948146SAndy Fiddaman start++; 5052b948146SAndy Fiddaman } 5062b948146SAndy Fiddaman start = -1; 5074c87aefeSPatrick Mooney } 5084c87aefeSPatrick Mooney CPU_SET(pcpu, vcpumap[vcpu]); 5092b948146SAndy Fiddaman break; 5102b948146SAndy Fiddaman case '-': 5112b948146SAndy Fiddaman if (start >= 0) 5122b948146SAndy Fiddaman errx(4, "invalid cpuset for vcpu %d: '%s'", 5132b948146SAndy Fiddaman vcpu, list); 5142b948146SAndy Fiddaman start = pcpu; 5152b948146SAndy Fiddaman break; 5162b948146SAndy Fiddaman default: 5172b948146SAndy Fiddaman errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 5182b948146SAndy Fiddaman } 5192b948146SAndy Fiddaman if (*cp == '\0') 5202b948146SAndy Fiddaman break; 5212b948146SAndy Fiddaman token = cp + 1; 5222b948146SAndy Fiddaman } 5232b948146SAndy Fiddaman } 5242b948146SAndy Fiddaman 5252b948146SAndy Fiddaman static void 5262b948146SAndy Fiddaman build_vcpumaps(void) 5272b948146SAndy Fiddaman { 5282b948146SAndy Fiddaman char key[16]; 5292b948146SAndy Fiddaman const char *value; 5302b948146SAndy Fiddaman int vcpu; 5312b948146SAndy Fiddaman 5322b948146SAndy Fiddaman for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { 5332b948146SAndy Fiddaman snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 5342b948146SAndy Fiddaman value = get_config_value(key); 5352b948146SAndy Fiddaman if (value == NULL) 5362b948146SAndy Fiddaman continue; 5372b948146SAndy Fiddaman vcpumap[vcpu] = malloc(sizeof(cpuset_t)); 5382b948146SAndy Fiddaman if (vcpumap[vcpu] == NULL) 5392b948146SAndy Fiddaman err(4, "Failed to allocate cpuset for vcpu %d", vcpu); 5402b948146SAndy Fiddaman parse_cpuset(vcpu, value, vcpumap[vcpu]); 5412b948146SAndy Fiddaman } 5424c87aefeSPatrick Mooney } 5434c87aefeSPatrick Mooney 544bf21cd93STycho Nightingale void 545bf21cd93STycho Nightingale vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 546bf21cd93STycho Nightingale int errcode) 547bf21cd93STycho Nightingale { 548bf21cd93STycho Nightingale struct vmctx *ctx; 549bf21cd93STycho Nightingale int error, restart_instruction; 550bf21cd93STycho Nightingale 551bf21cd93STycho Nightingale ctx = arg; 552bf21cd93STycho Nightingale restart_instruction = 1; 553bf21cd93STycho Nightingale 554bf21cd93STycho Nightingale error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 555bf21cd93STycho Nightingale restart_instruction); 556bf21cd93STycho Nightingale assert(error == 0); 557bf21cd93STycho Nightingale } 558b58b977eSPatrick Mooney #endif /* __FreeBSD__ */ 559bf21cd93STycho Nightingale 560bf21cd93STycho Nightingale void * 561bf21cd93STycho Nightingale paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 562bf21cd93STycho Nightingale { 563bf21cd93STycho Nightingale 564bf21cd93STycho Nightingale return (vm_map_gpa(ctx, gaddr, len)); 565bf21cd93STycho Nightingale } 566bf21cd93STycho Nightingale 567bf21cd93STycho Nightingale int 568bf21cd93STycho Nightingale fbsdrun_virtio_msix(void) 569bf21cd93STycho Nightingale { 570bf21cd93STycho Nightingale 5712b948146SAndy Fiddaman return (get_config_bool_default("virtio_msix", true)); 572bf21cd93STycho Nightingale } 573bf21cd93STycho Nightingale 574bf21cd93STycho Nightingale static void * 575bf21cd93STycho Nightingale fbsdrun_start_thread(void *param) 576bf21cd93STycho Nightingale { 577bf21cd93STycho Nightingale char tname[MAXCOMLEN + 1]; 578bf21cd93STycho Nightingale struct mt_vmm_info *mtp; 579bf21cd93STycho Nightingale int vcpu; 580bf21cd93STycho Nightingale 581bf21cd93STycho Nightingale mtp = param; 582bf21cd93STycho Nightingale vcpu = mtp->mt_vcpu; 583bf21cd93STycho Nightingale 584bf21cd93STycho Nightingale snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 585bf21cd93STycho Nightingale pthread_set_name_np(mtp->mt_thr, tname); 586bf21cd93STycho Nightingale 5874c87aefeSPatrick Mooney gdb_cpu_add(vcpu); 5884c87aefeSPatrick Mooney 589e0c0d44eSPatrick Mooney vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip); 590bf21cd93STycho Nightingale 591bf21cd93STycho Nightingale /* not reached */ 592bf21cd93STycho Nightingale exit(1); 593bf21cd93STycho Nightingale return (NULL); 594bf21cd93STycho Nightingale } 595bf21cd93STycho Nightingale 5969c3024a3SHans Rosenfeld #ifdef __FreeBSD__ 597bf21cd93STycho Nightingale void 598bf21cd93STycho Nightingale fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 5999c3024a3SHans Rosenfeld #else 6009c3024a3SHans Rosenfeld void 6012606939dSPatrick Mooney fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend) 6029c3024a3SHans Rosenfeld #endif 603bf21cd93STycho Nightingale { 604bf21cd93STycho Nightingale int error; 605bf21cd93STycho Nightingale 6062606939dSPatrick Mooney #ifdef __FreeBSD__ 607bf21cd93STycho Nightingale assert(fromcpu == BSP); 6082606939dSPatrick Mooney #endif 609bf21cd93STycho Nightingale 610bf21cd93STycho Nightingale /* 611bf21cd93STycho Nightingale * The 'newcpu' must be activated in the context of 'fromcpu'. If 612bf21cd93STycho Nightingale * vm_activate_cpu() is delayed until newcpu's pthread starts running 613bf21cd93STycho Nightingale * then vmm.ko is out-of-sync with bhyve and this can create a race 614bf21cd93STycho Nightingale * with vm_suspend(). 615bf21cd93STycho Nightingale */ 616bf21cd93STycho Nightingale error = vm_activate_cpu(ctx, newcpu); 6174c87aefeSPatrick Mooney if (error != 0) 6184c87aefeSPatrick Mooney err(EX_OSERR, "could not activate CPU %d", newcpu); 619bf21cd93STycho Nightingale 620bf21cd93STycho Nightingale CPU_SET_ATOMIC(newcpu, &cpumask); 621bf21cd93STycho Nightingale 6229c3024a3SHans Rosenfeld #ifndef __FreeBSD__ 6239c3024a3SHans Rosenfeld if (suspend) 6249c3024a3SHans Rosenfeld (void) vm_suspend_cpu(ctx, newcpu); 6259c3024a3SHans Rosenfeld #endif 6269c3024a3SHans Rosenfeld 627bf21cd93STycho Nightingale /* 628bf21cd93STycho Nightingale * Set up the vmexit struct to allow execution to start 629bf21cd93STycho Nightingale * at the given RIP 630bf21cd93STycho Nightingale */ 631bf21cd93STycho Nightingale mt_vmm_info[newcpu].mt_ctx = ctx; 632bf21cd93STycho Nightingale mt_vmm_info[newcpu].mt_vcpu = newcpu; 633e0c0d44eSPatrick Mooney mt_vmm_info[newcpu].mt_startrip = rip; 634bf21cd93STycho Nightingale 635bf21cd93STycho Nightingale error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 636bf21cd93STycho Nightingale fbsdrun_start_thread, &mt_vmm_info[newcpu]); 637bf21cd93STycho Nightingale assert(error == 0); 638bf21cd93STycho Nightingale } 639bf21cd93STycho Nightingale 640bf21cd93STycho Nightingale static int 6414c87aefeSPatrick Mooney fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 6424c87aefeSPatrick Mooney { 6434c87aefeSPatrick Mooney 6444c87aefeSPatrick Mooney if (!CPU_ISSET(vcpu, &cpumask)) { 6454c87aefeSPatrick Mooney fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 6464c87aefeSPatrick Mooney exit(4); 6474c87aefeSPatrick Mooney } 6484c87aefeSPatrick Mooney 6494c87aefeSPatrick Mooney CPU_CLR_ATOMIC(vcpu, &cpumask); 6504c87aefeSPatrick Mooney return (CPU_EMPTY(&cpumask)); 6514c87aefeSPatrick Mooney } 6524c87aefeSPatrick Mooney 653e0c0d44eSPatrick Mooney static void 654e0c0d44eSPatrick Mooney vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data) 655e0c0d44eSPatrick Mooney { 656e0c0d44eSPatrick Mooney struct vm_entry *entry = &vmentry[vcpu]; 657e0c0d44eSPatrick Mooney struct vm_mmio *mmio = &entry->u.mmio; 658e0c0d44eSPatrick Mooney 659e0c0d44eSPatrick Mooney assert(entry->cmd == VEC_DEFAULT); 660e0c0d44eSPatrick Mooney 6612606939dSPatrick Mooney entry->cmd = VEC_FULFILL_MMIO; 662e0c0d44eSPatrick Mooney mmio->bytes = bytes; 663e0c0d44eSPatrick Mooney mmio->read = 1; 664e0c0d44eSPatrick Mooney mmio->gpa = gpa; 665e0c0d44eSPatrick Mooney mmio->data = data; 666e0c0d44eSPatrick Mooney } 667e0c0d44eSPatrick Mooney 668e0c0d44eSPatrick Mooney static void 669e0c0d44eSPatrick Mooney vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes) 670e0c0d44eSPatrick Mooney { 671e0c0d44eSPatrick Mooney struct vm_entry *entry = &vmentry[vcpu]; 672e0c0d44eSPatrick Mooney struct vm_mmio *mmio = &entry->u.mmio; 673e0c0d44eSPatrick Mooney 674e0c0d44eSPatrick Mooney assert(entry->cmd == VEC_DEFAULT); 675e0c0d44eSPatrick Mooney 6762606939dSPatrick Mooney entry->cmd = VEC_FULFILL_MMIO; 677e0c0d44eSPatrick Mooney mmio->bytes = bytes; 678e0c0d44eSPatrick Mooney mmio->read = 0; 679e0c0d44eSPatrick Mooney mmio->gpa = gpa; 680e0c0d44eSPatrick Mooney mmio->data = 0; 681e0c0d44eSPatrick Mooney } 682e0c0d44eSPatrick Mooney 683e0c0d44eSPatrick Mooney static void 684e0c0d44eSPatrick Mooney vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data) 685e0c0d44eSPatrick Mooney { 686e0c0d44eSPatrick Mooney struct vm_entry *entry = &vmentry[vcpu]; 687e0c0d44eSPatrick Mooney struct vm_inout *inout = &entry->u.inout; 688e0c0d44eSPatrick Mooney 689e0c0d44eSPatrick Mooney assert(entry->cmd == VEC_DEFAULT); 690e0c0d44eSPatrick Mooney 6912606939dSPatrick Mooney entry->cmd = VEC_FULFILL_INOUT; 692e0c0d44eSPatrick Mooney inout->bytes = bytes; 693e0c0d44eSPatrick Mooney inout->flags = INOUT_IN; 694e0c0d44eSPatrick Mooney inout->port = port; 695e0c0d44eSPatrick Mooney inout->eax = data; 696e0c0d44eSPatrick Mooney } 697e0c0d44eSPatrick Mooney 698e0c0d44eSPatrick Mooney static void 699e0c0d44eSPatrick Mooney vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes) 700e0c0d44eSPatrick Mooney { 701e0c0d44eSPatrick Mooney struct vm_entry *entry = &vmentry[vcpu]; 702e0c0d44eSPatrick Mooney struct vm_inout *inout = &entry->u.inout; 703e0c0d44eSPatrick Mooney 704e0c0d44eSPatrick Mooney assert(entry->cmd == VEC_DEFAULT); 705e0c0d44eSPatrick Mooney 7062606939dSPatrick Mooney entry->cmd = VEC_FULFILL_INOUT; 707e0c0d44eSPatrick Mooney inout->bytes = bytes; 708e0c0d44eSPatrick Mooney inout->flags = 0; 709e0c0d44eSPatrick Mooney inout->port = port; 710e0c0d44eSPatrick Mooney inout->eax = 0; 711e0c0d44eSPatrick Mooney } 712e0c0d44eSPatrick Mooney 7134c87aefeSPatrick Mooney static int 714bf21cd93STycho Nightingale vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 715bf21cd93STycho Nightingale uint32_t eax) 716bf21cd93STycho Nightingale { 717bf21cd93STycho Nightingale #if BHYVE_DEBUG 718bf21cd93STycho Nightingale /* 719bf21cd93STycho Nightingale * put guest-driven debug here 720bf21cd93STycho Nightingale */ 721bf21cd93STycho Nightingale #endif 722bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 723bf21cd93STycho Nightingale } 724bf21cd93STycho Nightingale 725bf21cd93STycho Nightingale static int 726bf21cd93STycho Nightingale vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 727bf21cd93STycho Nightingale { 728bf21cd93STycho Nightingale int error; 729bf21cd93STycho Nightingale int vcpu; 730e0c0d44eSPatrick Mooney struct vm_inout inout; 731e0c0d44eSPatrick Mooney bool in; 732e0c0d44eSPatrick Mooney uint8_t bytes; 733e0c0d44eSPatrick Mooney 734e0c0d44eSPatrick Mooney stats.vmexit_inout++; 735bf21cd93STycho Nightingale 736bf21cd93STycho Nightingale vcpu = *pvcpu; 737e0c0d44eSPatrick Mooney inout = vme->u.inout; 738e0c0d44eSPatrick Mooney in = (inout.flags & INOUT_IN) != 0; 739e0c0d44eSPatrick Mooney bytes = inout.bytes; 740bf21cd93STycho Nightingale 741bf21cd93STycho Nightingale /* Extra-special case of host notifications */ 742e0c0d44eSPatrick Mooney if (!in && inout.port == GUEST_NIO_PORT) { 743e0c0d44eSPatrick Mooney error = vmexit_handle_notify(ctx, vme, pvcpu, inout.eax); 744e0c0d44eSPatrick Mooney vmentry_inout_write(vcpu, inout.port, bytes); 745bf21cd93STycho Nightingale return (error); 746bf21cd93STycho Nightingale } 747bf21cd93STycho Nightingale 7482b948146SAndy Fiddaman error = emulate_inout(ctx, vcpu, &inout); 749bf21cd93STycho Nightingale if (error) { 750bf21cd93STycho Nightingale fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 751bf21cd93STycho Nightingale in ? "in" : "out", 752bf21cd93STycho Nightingale bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 753e0c0d44eSPatrick Mooney inout.port, vmexit->rip); 754bf21cd93STycho Nightingale return (VMEXIT_ABORT); 755bf21cd93STycho Nightingale } else { 756e0c0d44eSPatrick Mooney /* 757e0c0d44eSPatrick Mooney * Communicate the status of the inout operation back to the 758e0c0d44eSPatrick Mooney * in-kernel instruction emulation. 759e0c0d44eSPatrick Mooney */ 760e0c0d44eSPatrick Mooney if (in) { 761e0c0d44eSPatrick Mooney vmentry_inout_read(vcpu, inout.port, bytes, inout.eax); 762e0c0d44eSPatrick Mooney } else { 763e0c0d44eSPatrick Mooney vmentry_inout_write(vcpu, inout.port, bytes); 764e0c0d44eSPatrick Mooney } 765bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 766bf21cd93STycho Nightingale } 767bf21cd93STycho Nightingale } 768bf21cd93STycho Nightingale 769bf21cd93STycho Nightingale static int 770bf21cd93STycho Nightingale vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 771bf21cd93STycho Nightingale { 772bf21cd93STycho Nightingale uint64_t val; 773bf21cd93STycho Nightingale uint32_t eax, edx; 774bf21cd93STycho Nightingale int error; 775bf21cd93STycho Nightingale 776bf21cd93STycho Nightingale val = 0; 777bf21cd93STycho Nightingale error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 778bf21cd93STycho Nightingale if (error != 0) { 779bf21cd93STycho Nightingale fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 780bf21cd93STycho Nightingale vme->u.msr.code, *pvcpu); 7812b948146SAndy Fiddaman if (get_config_bool("x86.strictmsr")) { 782bf21cd93STycho Nightingale vm_inject_gp(ctx, *pvcpu); 783bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 784bf21cd93STycho Nightingale } 785bf21cd93STycho Nightingale } 786bf21cd93STycho Nightingale 787bf21cd93STycho Nightingale eax = val; 788bf21cd93STycho Nightingale error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 789bf21cd93STycho Nightingale assert(error == 0); 790bf21cd93STycho Nightingale 791bf21cd93STycho Nightingale edx = val >> 32; 792bf21cd93STycho Nightingale error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 793bf21cd93STycho Nightingale assert(error == 0); 794bf21cd93STycho Nightingale 795bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 796bf21cd93STycho Nightingale } 797bf21cd93STycho Nightingale 798bf21cd93STycho Nightingale static int 799bf21cd93STycho Nightingale vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 800bf21cd93STycho Nightingale { 801bf21cd93STycho Nightingale int error; 802bf21cd93STycho Nightingale 803bf21cd93STycho Nightingale error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 804bf21cd93STycho Nightingale if (error != 0) { 805bf21cd93STycho Nightingale fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 806bf21cd93STycho Nightingale vme->u.msr.code, vme->u.msr.wval, *pvcpu); 8072b948146SAndy Fiddaman if (get_config_bool("x86.strictmsr")) { 808bf21cd93STycho Nightingale vm_inject_gp(ctx, *pvcpu); 809bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 810bf21cd93STycho Nightingale } 811bf21cd93STycho Nightingale } 812bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 813bf21cd93STycho Nightingale } 814bf21cd93STycho Nightingale 8152606939dSPatrick Mooney #ifdef __FreeBSD__ 816bf21cd93STycho Nightingale static int 817bf21cd93STycho Nightingale vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 818bf21cd93STycho Nightingale { 819bf21cd93STycho Nightingale 8204c87aefeSPatrick Mooney (void)spinup_ap(ctx, *pvcpu, 821bf21cd93STycho Nightingale vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 822bf21cd93STycho Nightingale 8234c87aefeSPatrick Mooney return (VMEXIT_CONTINUE); 8244c87aefeSPatrick Mooney } 8252606939dSPatrick Mooney #else 8262606939dSPatrick Mooney static int 8272606939dSPatrick Mooney vmexit_run_state(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 8282606939dSPatrick Mooney { 8292606939dSPatrick Mooney /* 8302606939dSPatrick Mooney * Run-state transitions (INIT, SIPI, etc) are handled in-kernel, so an 8312606939dSPatrick Mooney * exit to userspace with that code is not expected. 8322606939dSPatrick Mooney */ 8332606939dSPatrick Mooney fprintf(stderr, "unexpected run-state VM exit"); 8342606939dSPatrick Mooney return (VMEXIT_ABORT); 8352606939dSPatrick Mooney } 8362606939dSPatrick Mooney #endif /* __FreeBSD__ */ 8374c87aefeSPatrick Mooney 838007ca332SPatrick Mooney #ifdef __FreeBSD__ 8394c87aefeSPatrick Mooney #define DEBUG_EPT_MISCONFIG 840007ca332SPatrick Mooney #else 841007ca332SPatrick Mooney /* EPT misconfig debugging not possible now that raw VMCS access is gone */ 842007ca332SPatrick Mooney #endif 843007ca332SPatrick Mooney 8444c87aefeSPatrick Mooney #ifdef DEBUG_EPT_MISCONFIG 8454c87aefeSPatrick Mooney #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 8464c87aefeSPatrick Mooney 8474c87aefeSPatrick Mooney static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 8484c87aefeSPatrick Mooney static int ept_misconfig_ptenum; 8494c87aefeSPatrick Mooney #endif 8504c87aefeSPatrick Mooney 8514c87aefeSPatrick Mooney static const char * 8524c87aefeSPatrick Mooney vmexit_vmx_desc(uint32_t exit_reason) 8534c87aefeSPatrick Mooney { 8544c87aefeSPatrick Mooney 8554c87aefeSPatrick Mooney if (exit_reason >= nitems(vmx_exit_reason_desc) || 8564c87aefeSPatrick Mooney vmx_exit_reason_desc[exit_reason] == NULL) 8574c87aefeSPatrick Mooney return ("Unknown"); 8584c87aefeSPatrick Mooney return (vmx_exit_reason_desc[exit_reason]); 859bf21cd93STycho Nightingale } 860bf21cd93STycho Nightingale 861bf21cd93STycho Nightingale static int 862bf21cd93STycho Nightingale vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 863bf21cd93STycho Nightingale { 864bf21cd93STycho Nightingale 865bf21cd93STycho Nightingale fprintf(stderr, "vm exit[%d]\n", *pvcpu); 866bf21cd93STycho Nightingale fprintf(stderr, "\treason\t\tVMX\n"); 867bf21cd93STycho Nightingale fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 868bf21cd93STycho Nightingale fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 869bf21cd93STycho Nightingale fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 8704c87aefeSPatrick Mooney fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, 8714c87aefeSPatrick Mooney vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); 872bf21cd93STycho Nightingale fprintf(stderr, "\tqualification\t0x%016lx\n", 873bf21cd93STycho Nightingale vmexit->u.vmx.exit_qualification); 874bf21cd93STycho Nightingale fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 875bf21cd93STycho Nightingale fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 8764c87aefeSPatrick Mooney #ifdef DEBUG_EPT_MISCONFIG 8774c87aefeSPatrick Mooney if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 8784c87aefeSPatrick Mooney vm_get_register(ctx, *pvcpu, 8794c87aefeSPatrick Mooney VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 8804c87aefeSPatrick Mooney &ept_misconfig_gpa); 8814c87aefeSPatrick Mooney vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 8824c87aefeSPatrick Mooney &ept_misconfig_ptenum); 8834c87aefeSPatrick Mooney fprintf(stderr, "\tEPT misconfiguration:\n"); 8844c87aefeSPatrick Mooney fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 8854c87aefeSPatrick Mooney fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 8864c87aefeSPatrick Mooney ept_misconfig_ptenum, ept_misconfig_pte[0], 8874c87aefeSPatrick Mooney ept_misconfig_pte[1], ept_misconfig_pte[2], 8884c87aefeSPatrick Mooney ept_misconfig_pte[3]); 8894c87aefeSPatrick Mooney } 8904c87aefeSPatrick Mooney #endif /* DEBUG_EPT_MISCONFIG */ 8914c87aefeSPatrick Mooney return (VMEXIT_ABORT); 8924c87aefeSPatrick Mooney } 893bf21cd93STycho Nightingale 8944c87aefeSPatrick Mooney static int 8954c87aefeSPatrick Mooney vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 8964c87aefeSPatrick Mooney { 8974c87aefeSPatrick Mooney 8984c87aefeSPatrick Mooney fprintf(stderr, "vm exit[%d]\n", *pvcpu); 8994c87aefeSPatrick Mooney fprintf(stderr, "\treason\t\tSVM\n"); 9004c87aefeSPatrick Mooney fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 9014c87aefeSPatrick Mooney fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 9024c87aefeSPatrick Mooney fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 9034c87aefeSPatrick Mooney fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 9044c87aefeSPatrick Mooney fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 905bf21cd93STycho Nightingale return (VMEXIT_ABORT); 906bf21cd93STycho Nightingale } 907bf21cd93STycho Nightingale 908bf21cd93STycho Nightingale static int 909bf21cd93STycho Nightingale vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 910bf21cd93STycho Nightingale { 911bf21cd93STycho Nightingale 9124c87aefeSPatrick Mooney assert(vmexit->inst_length == 0); 9134c87aefeSPatrick Mooney 914bf21cd93STycho Nightingale stats.vmexit_bogus++; 915bf21cd93STycho Nightingale 916bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 917bf21cd93STycho Nightingale } 918bf21cd93STycho Nightingale 919bf21cd93STycho Nightingale static int 9204c87aefeSPatrick Mooney vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 9214c87aefeSPatrick Mooney { 9224c87aefeSPatrick Mooney 9234c87aefeSPatrick Mooney assert(vmexit->inst_length == 0); 9244c87aefeSPatrick Mooney 9254c87aefeSPatrick Mooney stats.vmexit_reqidle++; 9264c87aefeSPatrick Mooney 9274c87aefeSPatrick Mooney return (VMEXIT_CONTINUE); 9284c87aefeSPatrick Mooney } 9294c87aefeSPatrick Mooney 9304c87aefeSPatrick Mooney static int 931bf21cd93STycho Nightingale vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 932bf21cd93STycho Nightingale { 933bf21cd93STycho Nightingale 934bf21cd93STycho Nightingale stats.vmexit_hlt++; 935bf21cd93STycho Nightingale 936bf21cd93STycho Nightingale /* 937bf21cd93STycho Nightingale * Just continue execution with the next instruction. We use 938bf21cd93STycho Nightingale * the HLT VM exit as a way to be friendly with the host 939bf21cd93STycho Nightingale * scheduler. 940bf21cd93STycho Nightingale */ 941bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 942bf21cd93STycho Nightingale } 943bf21cd93STycho Nightingale 944bf21cd93STycho Nightingale static int 945bf21cd93STycho Nightingale vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 946bf21cd93STycho Nightingale { 947bf21cd93STycho Nightingale 948bf21cd93STycho Nightingale stats.vmexit_pause++; 949bf21cd93STycho Nightingale 950bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 951bf21cd93STycho Nightingale } 952bf21cd93STycho Nightingale 953bf21cd93STycho Nightingale static int 954bf21cd93STycho Nightingale vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 955bf21cd93STycho Nightingale { 956bf21cd93STycho Nightingale 9574c87aefeSPatrick Mooney assert(vmexit->inst_length == 0); 9584c87aefeSPatrick Mooney 959bf21cd93STycho Nightingale stats.vmexit_mtrap++; 960bf21cd93STycho Nightingale 9614c87aefeSPatrick Mooney gdb_cpu_mtrap(*pvcpu); 9622b948146SAndy Fiddaman 963bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 964bf21cd93STycho Nightingale } 965bf21cd93STycho Nightingale 966bf21cd93STycho Nightingale static int 967bf21cd93STycho Nightingale vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 968bf21cd93STycho Nightingale { 969e0c0d44eSPatrick Mooney uint8_t i, valid; 970bf21cd93STycho Nightingale 971e0c0d44eSPatrick Mooney fprintf(stderr, "Failed to emulate instruction sequence "); 972bf21cd93STycho Nightingale 973e0c0d44eSPatrick Mooney valid = vmexit->u.inst_emul.num_valid; 974e0c0d44eSPatrick Mooney if (valid != 0) { 975e0c0d44eSPatrick Mooney assert(valid <= sizeof (vmexit->u.inst_emul.inst)); 976e0c0d44eSPatrick Mooney fprintf(stderr, "["); 977e0c0d44eSPatrick Mooney for (i = 0; i < valid; i++) { 978e0c0d44eSPatrick Mooney if (i == 0) { 979e0c0d44eSPatrick Mooney fprintf(stderr, "%02x", 980e0c0d44eSPatrick Mooney vmexit->u.inst_emul.inst[i]); 981e0c0d44eSPatrick Mooney } else { 982e0c0d44eSPatrick Mooney fprintf(stderr, ", %02x", 983e0c0d44eSPatrick Mooney vmexit->u.inst_emul.inst[i]); 984bf21cd93STycho Nightingale } 985e0c0d44eSPatrick Mooney } 986e0c0d44eSPatrick Mooney fprintf(stderr, "] "); 987e0c0d44eSPatrick Mooney } 988e0c0d44eSPatrick Mooney fprintf(stderr, "@ %rip = %x\n", vmexit->rip); 989bf21cd93STycho Nightingale 990bf21cd93STycho Nightingale return (VMEXIT_ABORT); 991bf21cd93STycho Nightingale } 992bf21cd93STycho Nightingale 993e0c0d44eSPatrick Mooney static int 994e0c0d44eSPatrick Mooney vmexit_mmio(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 995e0c0d44eSPatrick Mooney { 996e0c0d44eSPatrick Mooney int vcpu, err; 997e0c0d44eSPatrick Mooney struct vm_mmio mmio; 998e0c0d44eSPatrick Mooney bool is_read; 999e0c0d44eSPatrick Mooney 1000e0c0d44eSPatrick Mooney stats.vmexit_mmio++; 1001e0c0d44eSPatrick Mooney 1002e0c0d44eSPatrick Mooney vcpu = *pvcpu; 1003e0c0d44eSPatrick Mooney mmio = vmexit->u.mmio; 1004e0c0d44eSPatrick Mooney is_read = (mmio.read != 0); 1005e0c0d44eSPatrick Mooney 1006e0c0d44eSPatrick Mooney err = emulate_mem(ctx, vcpu, &mmio); 1007e0c0d44eSPatrick Mooney 1008e0c0d44eSPatrick Mooney if (err == ESRCH) { 1009e0c0d44eSPatrick Mooney fprintf(stderr, "Unhandled memory access to 0x%lx\n", mmio.gpa); 1010e0c0d44eSPatrick Mooney stats.mmio_unhandled++; 1011e0c0d44eSPatrick Mooney 1012e0c0d44eSPatrick Mooney /* 1013e0c0d44eSPatrick Mooney * Access to non-existent physical addresses is not likely to 1014e0c0d44eSPatrick Mooney * result in fatal errors on hardware machines, but rather reads 1015e0c0d44eSPatrick Mooney * of all-ones or discarded-but-acknowledged writes. 1016e0c0d44eSPatrick Mooney */ 1017e0c0d44eSPatrick Mooney mmio.data = ~0UL; 1018e0c0d44eSPatrick Mooney err = 0; 1019e0c0d44eSPatrick Mooney } 1020e0c0d44eSPatrick Mooney 1021e0c0d44eSPatrick Mooney if (err == 0) { 1022e0c0d44eSPatrick Mooney if (is_read) { 1023e0c0d44eSPatrick Mooney vmentry_mmio_read(vcpu, mmio.gpa, mmio.bytes, 1024e0c0d44eSPatrick Mooney mmio.data); 1025e0c0d44eSPatrick Mooney } else { 1026e0c0d44eSPatrick Mooney vmentry_mmio_write(vcpu, mmio.gpa, mmio.bytes); 1027e0c0d44eSPatrick Mooney } 1028bf21cd93STycho Nightingale return (VMEXIT_CONTINUE); 1029bf21cd93STycho Nightingale } 1030bf21cd93STycho Nightingale 1031e0c0d44eSPatrick Mooney fprintf(stderr, "Unhandled mmio error to 0x%lx: %d\n", mmio.gpa, err); 1032e0c0d44eSPatrick Mooney return (VMEXIT_ABORT); 1033e0c0d44eSPatrick Mooney } 1034e0c0d44eSPatrick Mooney 10354c87aefeSPatrick Mooney static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 10364c87aefeSPatrick Mooney static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 10374c87aefeSPatrick Mooney 10384c87aefeSPatrick Mooney static int 10394c87aefeSPatrick Mooney vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 10404c87aefeSPatrick Mooney { 10414c87aefeSPatrick Mooney enum vm_suspend_how how; 10424c87aefeSPatrick Mooney 10434c87aefeSPatrick Mooney how = vmexit->u.suspended.how; 10444c87aefeSPatrick Mooney 10454c87aefeSPatrick Mooney fbsdrun_deletecpu(ctx, *pvcpu); 10464c87aefeSPatrick Mooney 10474c87aefeSPatrick Mooney if (*pvcpu != BSP) { 10484c87aefeSPatrick Mooney pthread_mutex_lock(&resetcpu_mtx); 10494c87aefeSPatrick Mooney pthread_cond_signal(&resetcpu_cond); 10504c87aefeSPatrick Mooney pthread_mutex_unlock(&resetcpu_mtx); 10514c87aefeSPatrick Mooney pthread_exit(NULL); 10524c87aefeSPatrick Mooney } 10534c87aefeSPatrick Mooney 10544c87aefeSPatrick Mooney pthread_mutex_lock(&resetcpu_mtx); 10554c87aefeSPatrick Mooney while (!CPU_EMPTY(&cpumask)) { 10564c87aefeSPatrick Mooney pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 10574c87aefeSPatrick Mooney } 10584c87aefeSPatrick Mooney pthread_mutex_unlock(&resetcpu_mtx); 10594c87aefeSPatrick Mooney 10604c87aefeSPatrick Mooney switch (how) { 10614c87aefeSPatrick Mooney case VM_SUSPEND_RESET: 10624c87aefeSPatrick Mooney exit(0); 10634c87aefeSPatrick Mooney case VM_SUSPEND_POWEROFF: 10642b948146SAndy Fiddaman if (get_config_bool_default("destroy_on_poweroff", false)) 10656960cd89SAndy Fiddaman vm_destroy(ctx); 10664c87aefeSPatrick Mooney exit(1); 10674c87aefeSPatrick Mooney case VM_SUSPEND_HALT: 10684c87aefeSPatrick Mooney exit(2); 10694c87aefeSPatrick Mooney case VM_SUSPEND_TRIPLEFAULT: 10704c87aefeSPatrick Mooney exit(3); 10714c87aefeSPatrick Mooney default: 10724c87aefeSPatrick Mooney fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 10734c87aefeSPatrick Mooney exit(100); 10744c87aefeSPatrick Mooney } 10754c87aefeSPatrick Mooney return (0); /* NOTREACHED */ 10764c87aefeSPatrick Mooney } 10774c87aefeSPatrick Mooney 10784c87aefeSPatrick Mooney static int 10794c87aefeSPatrick Mooney vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 10804c87aefeSPatrick Mooney { 10814c87aefeSPatrick Mooney 10824c87aefeSPatrick Mooney gdb_cpu_suspend(*pvcpu); 10834c87aefeSPatrick Mooney return (VMEXIT_CONTINUE); 10844c87aefeSPatrick Mooney } 10854c87aefeSPatrick Mooney 1086154972afSPatrick Mooney static int 1087154972afSPatrick Mooney vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 1088154972afSPatrick Mooney { 1089154972afSPatrick Mooney 1090154972afSPatrick Mooney gdb_cpu_breakpoint(*pvcpu, vmexit); 1091154972afSPatrick Mooney return (VMEXIT_CONTINUE); 1092154972afSPatrick Mooney } 1093154972afSPatrick Mooney 1094bf21cd93STycho Nightingale static vmexit_handler_t handler[VM_EXITCODE_MAX] = { 1095bf21cd93STycho Nightingale [VM_EXITCODE_INOUT] = vmexit_inout, 1096e0c0d44eSPatrick Mooney [VM_EXITCODE_MMIO] = vmexit_mmio, 1097bf21cd93STycho Nightingale [VM_EXITCODE_VMX] = vmexit_vmx, 10984c87aefeSPatrick Mooney [VM_EXITCODE_SVM] = vmexit_svm, 1099bf21cd93STycho Nightingale [VM_EXITCODE_BOGUS] = vmexit_bogus, 11004c87aefeSPatrick Mooney [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 1101bf21cd93STycho Nightingale [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 1102bf21cd93STycho Nightingale [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 1103bf21cd93STycho Nightingale [VM_EXITCODE_MTRAP] = vmexit_mtrap, 1104bf21cd93STycho Nightingale [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 11052606939dSPatrick Mooney #ifdef __FreeBSD__ 1106bf21cd93STycho Nightingale [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 11072606939dSPatrick Mooney #else 11082606939dSPatrick Mooney [VM_EXITCODE_RUN_STATE] = vmexit_run_state, 11092606939dSPatrick Mooney #endif 11104c87aefeSPatrick Mooney [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 11114c87aefeSPatrick Mooney [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 11124c87aefeSPatrick Mooney [VM_EXITCODE_DEBUG] = vmexit_debug, 1113154972afSPatrick Mooney [VM_EXITCODE_BPT] = vmexit_breakpoint, 1114bf21cd93STycho Nightingale }; 1115bf21cd93STycho Nightingale 1116bf21cd93STycho Nightingale static void 1117bf21cd93STycho Nightingale vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 1118bf21cd93STycho Nightingale { 11194c87aefeSPatrick Mooney int error, rc; 1120bf21cd93STycho Nightingale enum vm_exitcode exitcode; 11214c87aefeSPatrick Mooney cpuset_t active_cpus; 1122e0c0d44eSPatrick Mooney struct vm_exit *vexit; 1123e0c0d44eSPatrick Mooney struct vm_entry *ventry; 1124bf21cd93STycho Nightingale 1125bf21cd93STycho Nightingale #ifdef __FreeBSD__ 11264c87aefeSPatrick Mooney if (vcpumap[vcpu] != NULL) { 1127bf21cd93STycho Nightingale error = pthread_setaffinity_np(pthread_self(), 11284c87aefeSPatrick Mooney sizeof(cpuset_t), vcpumap[vcpu]); 1129bf21cd93STycho Nightingale assert(error == 0); 1130bf21cd93STycho Nightingale } 1131bf21cd93STycho Nightingale #endif 11324c87aefeSPatrick Mooney error = vm_active_cpus(ctx, &active_cpus); 11334c87aefeSPatrick Mooney assert(CPU_ISSET(vcpu, &active_cpus)); 1134bf21cd93STycho Nightingale 1135bf21cd93STycho Nightingale error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 1136bf21cd93STycho Nightingale assert(error == 0); 1137bf21cd93STycho Nightingale 1138e0c0d44eSPatrick Mooney ventry = &vmentry[vcpu]; 1139e0c0d44eSPatrick Mooney vexit = &vmexit[vcpu]; 1140e0c0d44eSPatrick Mooney 1141bf21cd93STycho Nightingale while (1) { 1142e0c0d44eSPatrick Mooney error = vm_run(ctx, vcpu, ventry, vexit); 1143bf21cd93STycho Nightingale if (error != 0) 1144bf21cd93STycho Nightingale break; 1145bf21cd93STycho Nightingale 1146e0c0d44eSPatrick Mooney if (ventry->cmd != VEC_DEFAULT) { 1147e0c0d44eSPatrick Mooney /* 1148e0c0d44eSPatrick Mooney * Discard any lingering entry state after it has been 1149e0c0d44eSPatrick Mooney * submitted via vm_run(). 1150e0c0d44eSPatrick Mooney */ 1151e0c0d44eSPatrick Mooney bzero(ventry, sizeof (*ventry)); 1152e0c0d44eSPatrick Mooney } 1153e0c0d44eSPatrick Mooney 1154e0c0d44eSPatrick Mooney exitcode = vexit->exitcode; 1155bf21cd93STycho Nightingale if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 1156bf21cd93STycho Nightingale fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 1157bf21cd93STycho Nightingale exitcode); 11584c87aefeSPatrick Mooney exit(4); 1159bf21cd93STycho Nightingale } 1160bf21cd93STycho Nightingale 1161e0c0d44eSPatrick Mooney rc = (*handler[exitcode])(ctx, vexit, &vcpu); 1162bf21cd93STycho Nightingale 1163bf21cd93STycho Nightingale switch (rc) { 1164bf21cd93STycho Nightingale case VMEXIT_CONTINUE: 1165bf21cd93STycho Nightingale break; 1166bf21cd93STycho Nightingale case VMEXIT_ABORT: 1167bf21cd93STycho Nightingale abort(); 1168bf21cd93STycho Nightingale default: 11694c87aefeSPatrick Mooney exit(4); 1170bf21cd93STycho Nightingale } 1171bf21cd93STycho Nightingale } 1172bf21cd93STycho Nightingale fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 1173bf21cd93STycho Nightingale } 1174bf21cd93STycho Nightingale 1175bf21cd93STycho Nightingale static int 1176bf21cd93STycho Nightingale num_vcpus_allowed(struct vmctx *ctx) 1177bf21cd93STycho Nightingale { 1178c3ae3afaSPatrick Mooney #ifdef __FreeBSD__ 1179bf21cd93STycho Nightingale int tmp, error; 1180bf21cd93STycho Nightingale 1181bf21cd93STycho Nightingale error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 1182bf21cd93STycho Nightingale 1183bf21cd93STycho Nightingale /* 1184bf21cd93STycho Nightingale * The guest is allowed to spinup more than one processor only if the 1185bf21cd93STycho Nightingale * UNRESTRICTED_GUEST capability is available. 1186bf21cd93STycho Nightingale */ 1187bf21cd93STycho Nightingale if (error == 0) 1188bf21cd93STycho Nightingale return (VM_MAXCPU); 1189bf21cd93STycho Nightingale else 1190bf21cd93STycho Nightingale return (1); 1191c3ae3afaSPatrick Mooney #else 1192c3ae3afaSPatrick Mooney /* Unrestricted Guest is always enabled on illumos */ 1193c3ae3afaSPatrick Mooney return (VM_MAXCPU); 1194c3ae3afaSPatrick Mooney #endif /* __FreeBSD__ */ 1195bf21cd93STycho Nightingale } 1196bf21cd93STycho Nightingale 1197bf21cd93STycho Nightingale void 1198bf21cd93STycho Nightingale fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 1199bf21cd93STycho Nightingale { 1200bf21cd93STycho Nightingale int err, tmp; 1201bf21cd93STycho Nightingale 12022b948146SAndy Fiddaman if (get_config_bool_default("x86.vmexit_on_hlt", false)) { 1203bf21cd93STycho Nightingale err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 1204bf21cd93STycho Nightingale if (err < 0) { 1205bf21cd93STycho Nightingale fprintf(stderr, "VM exit on HLT not supported\n"); 12064c87aefeSPatrick Mooney exit(4); 1207bf21cd93STycho Nightingale } 1208bf21cd93STycho Nightingale vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 1209bf21cd93STycho Nightingale if (cpu == BSP) 1210bf21cd93STycho Nightingale handler[VM_EXITCODE_HLT] = vmexit_hlt; 1211bf21cd93STycho Nightingale } 1212bf21cd93STycho Nightingale 12132b948146SAndy Fiddaman if (get_config_bool_default("x86.vmexit_on_pause", false)) { 1214bf21cd93STycho Nightingale /* 1215bf21cd93STycho Nightingale * pause exit support required for this mode 1216bf21cd93STycho Nightingale */ 1217bf21cd93STycho Nightingale err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 1218bf21cd93STycho Nightingale if (err < 0) { 1219bf21cd93STycho Nightingale fprintf(stderr, 1220bf21cd93STycho Nightingale "SMP mux requested, no pause support\n"); 12214c87aefeSPatrick Mooney exit(4); 1222bf21cd93STycho Nightingale } 1223bf21cd93STycho Nightingale vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 1224bf21cd93STycho Nightingale if (cpu == BSP) 1225bf21cd93STycho Nightingale handler[VM_EXITCODE_PAUSE] = vmexit_pause; 1226bf21cd93STycho Nightingale } 1227bf21cd93STycho Nightingale 12282b948146SAndy Fiddaman if (get_config_bool_default("x86.x2apic", false)) 1229bf21cd93STycho Nightingale err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 1230bf21cd93STycho Nightingale else 1231bf21cd93STycho Nightingale err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 1232bf21cd93STycho Nightingale 1233bf21cd93STycho Nightingale if (err) { 1234bf21cd93STycho Nightingale fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 12354c87aefeSPatrick Mooney exit(4); 1236bf21cd93STycho Nightingale } 1237bf21cd93STycho Nightingale 1238bf21cd93STycho Nightingale #ifdef __FreeBSD__ 1239bf21cd93STycho Nightingale vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 1240bf21cd93STycho Nightingale #endif 1241bf21cd93STycho Nightingale } 1242bf21cd93STycho Nightingale 12434c87aefeSPatrick Mooney static struct vmctx * 12444c87aefeSPatrick Mooney do_open(const char *vmname) 12454c87aefeSPatrick Mooney { 12464c87aefeSPatrick Mooney struct vmctx *ctx; 12474c87aefeSPatrick Mooney int error; 12484c87aefeSPatrick Mooney bool reinit, romboot; 12494c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 12504c87aefeSPatrick Mooney cap_rights_t rights; 12514c87aefeSPatrick Mooney const cap_ioctl_t *cmds; 12524c87aefeSPatrick Mooney size_t ncmds; 12534c87aefeSPatrick Mooney #endif 12544c87aefeSPatrick Mooney 12554c87aefeSPatrick Mooney reinit = romboot = false; 12564c87aefeSPatrick Mooney 12574c87aefeSPatrick Mooney if (lpc_bootrom()) 12584c87aefeSPatrick Mooney romboot = true; 1259b57f5d3eSPatrick Mooney #ifndef __FreeBSD__ 1260b57f5d3eSPatrick Mooney uint64_t create_flags = 0; 1261b57f5d3eSPatrick Mooney if (get_config_bool_default("memory.use_reservoir", false)) { 1262b57f5d3eSPatrick Mooney create_flags |= VCF_RESERVOIR_MEM; 1263b57f5d3eSPatrick Mooney } 1264b57f5d3eSPatrick Mooney error = vm_create(vmname, create_flags); 1265b57f5d3eSPatrick Mooney #else 12664c87aefeSPatrick Mooney error = vm_create(vmname); 1267b57f5d3eSPatrick Mooney #endif /* __FreeBSD__ */ 12684c87aefeSPatrick Mooney if (error) { 12694c87aefeSPatrick Mooney if (errno == EEXIST) { 12704c87aefeSPatrick Mooney if (romboot) { 12714c87aefeSPatrick Mooney reinit = true; 12724c87aefeSPatrick Mooney } else { 12734c87aefeSPatrick Mooney /* 12744c87aefeSPatrick Mooney * The virtual machine has been setup by the 12754c87aefeSPatrick Mooney * userspace bootloader. 12764c87aefeSPatrick Mooney */ 12774c87aefeSPatrick Mooney } 12784c87aefeSPatrick Mooney } else { 12794c87aefeSPatrick Mooney perror("vm_create"); 12804c87aefeSPatrick Mooney exit(4); 12814c87aefeSPatrick Mooney } 12824c87aefeSPatrick Mooney } else { 12834c87aefeSPatrick Mooney if (!romboot) { 12844c87aefeSPatrick Mooney /* 12854c87aefeSPatrick Mooney * If the virtual machine was just created then a 12864c87aefeSPatrick Mooney * bootrom must be configured to boot it. 12874c87aefeSPatrick Mooney */ 12884c87aefeSPatrick Mooney fprintf(stderr, "virtual machine cannot be booted\n"); 12894c87aefeSPatrick Mooney exit(4); 12904c87aefeSPatrick Mooney } 12914c87aefeSPatrick Mooney } 12924c87aefeSPatrick Mooney 12934c87aefeSPatrick Mooney ctx = vm_open(vmname); 12944c87aefeSPatrick Mooney if (ctx == NULL) { 12954c87aefeSPatrick Mooney perror("vm_open"); 12964c87aefeSPatrick Mooney exit(4); 12974c87aefeSPatrick Mooney } 12984c87aefeSPatrick Mooney 12994c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 13004c87aefeSPatrick Mooney cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 13014c87aefeSPatrick Mooney if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) 13024c87aefeSPatrick Mooney errx(EX_OSERR, "Unable to apply rights for sandbox"); 13034c87aefeSPatrick Mooney vm_get_ioctls(&ncmds); 13044c87aefeSPatrick Mooney cmds = vm_get_ioctls(NULL); 13054c87aefeSPatrick Mooney if (cmds == NULL) 13064c87aefeSPatrick Mooney errx(EX_OSERR, "out of memory"); 13074c87aefeSPatrick Mooney if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) 13084c87aefeSPatrick Mooney errx(EX_OSERR, "Unable to apply rights for sandbox"); 13094c87aefeSPatrick Mooney free((cap_ioctl_t *)cmds); 13104c87aefeSPatrick Mooney #endif 13114c87aefeSPatrick Mooney 13124c87aefeSPatrick Mooney if (reinit) { 131352fac30eSPatrick Mooney #ifndef __FreeBSD__ 131452fac30eSPatrick Mooney error = vm_reinit(ctx, 0); 131552fac30eSPatrick Mooney #else 13164c87aefeSPatrick Mooney error = vm_reinit(ctx); 131752fac30eSPatrick Mooney #endif 13184c87aefeSPatrick Mooney if (error) { 13194c87aefeSPatrick Mooney perror("vm_reinit"); 13204c87aefeSPatrick Mooney exit(4); 13214c87aefeSPatrick Mooney } 13224c87aefeSPatrick Mooney } 13234c87aefeSPatrick Mooney error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); 13244c87aefeSPatrick Mooney if (error) 13254c87aefeSPatrick Mooney errx(EX_OSERR, "vm_set_topology"); 13264c87aefeSPatrick Mooney return (ctx); 13274c87aefeSPatrick Mooney } 13284c87aefeSPatrick Mooney 13292b948146SAndy Fiddaman static bool 13302b948146SAndy Fiddaman parse_config_option(const char *option) 13312b948146SAndy Fiddaman { 13322b948146SAndy Fiddaman const char *value; 13332b948146SAndy Fiddaman char *path; 13342b948146SAndy Fiddaman 13352b948146SAndy Fiddaman value = strchr(option, '='); 13362b948146SAndy Fiddaman if (value == NULL || value[1] == '\0') 13372b948146SAndy Fiddaman return (false); 13382b948146SAndy Fiddaman path = strndup(option, value - option); 13392b948146SAndy Fiddaman if (path == NULL) 13402b948146SAndy Fiddaman err(4, "Failed to allocate memory"); 13412b948146SAndy Fiddaman set_config_value(path, value + 1); 13422b948146SAndy Fiddaman return (true); 13432b948146SAndy Fiddaman } 13442b948146SAndy Fiddaman 13452b948146SAndy Fiddaman static void 13462b948146SAndy Fiddaman parse_simple_config_file(const char *path) 13472b948146SAndy Fiddaman { 13482b948146SAndy Fiddaman FILE *fp; 13492b948146SAndy Fiddaman char *line, *cp; 13502b948146SAndy Fiddaman size_t linecap; 13512b948146SAndy Fiddaman unsigned int lineno; 13522b948146SAndy Fiddaman 13532b948146SAndy Fiddaman fp = fopen(path, "r"); 13542b948146SAndy Fiddaman if (fp == NULL) 13552b948146SAndy Fiddaman err(4, "Failed to open configuration file %s", path); 13562b948146SAndy Fiddaman line = NULL; 13572b948146SAndy Fiddaman linecap = 0; 13582b948146SAndy Fiddaman lineno = 1; 13592b948146SAndy Fiddaman for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) { 13602b948146SAndy Fiddaman if (*line == '#' || *line == '\n') 13612b948146SAndy Fiddaman continue; 13622b948146SAndy Fiddaman cp = strchr(line, '\n'); 13632b948146SAndy Fiddaman if (cp != NULL) 13642b948146SAndy Fiddaman *cp = '\0'; 13652b948146SAndy Fiddaman if (!parse_config_option(line)) 13662b948146SAndy Fiddaman errx(4, "%s line %u: invalid config option '%s'", path, 13672b948146SAndy Fiddaman lineno, line); 13682b948146SAndy Fiddaman } 13692b948146SAndy Fiddaman free(line); 13702b948146SAndy Fiddaman fclose(fp); 13712b948146SAndy Fiddaman } 13722b948146SAndy Fiddaman 13732b948146SAndy Fiddaman static void 1374b0de25cbSAndy Fiddaman parse_gdb_options(char *optarg) 1375b0de25cbSAndy Fiddaman { 1376b0de25cbSAndy Fiddaman const char *sport; 1377b0de25cbSAndy Fiddaman char *colon; 1378b0de25cbSAndy Fiddaman 1379b0de25cbSAndy Fiddaman if (optarg[0] == 'w') { 1380b0de25cbSAndy Fiddaman set_config_bool("gdb.wait", true); 1381b0de25cbSAndy Fiddaman optarg++; 1382b0de25cbSAndy Fiddaman } 1383b0de25cbSAndy Fiddaman 1384b0de25cbSAndy Fiddaman colon = strrchr(optarg, ':'); 1385b0de25cbSAndy Fiddaman if (colon == NULL) { 1386b0de25cbSAndy Fiddaman sport = optarg; 1387b0de25cbSAndy Fiddaman } else { 1388b0de25cbSAndy Fiddaman *colon = '\0'; 1389b0de25cbSAndy Fiddaman colon++; 1390b0de25cbSAndy Fiddaman sport = colon; 1391b0de25cbSAndy Fiddaman set_config_value("gdb.address", optarg); 1392b0de25cbSAndy Fiddaman } 1393b0de25cbSAndy Fiddaman 1394b0de25cbSAndy Fiddaman set_config_value("gdb.port", sport); 1395b0de25cbSAndy Fiddaman } 1396b0de25cbSAndy Fiddaman 1397b0de25cbSAndy Fiddaman static void 13982b948146SAndy Fiddaman set_defaults(void) 13992b948146SAndy Fiddaman { 14002b948146SAndy Fiddaman 14012b948146SAndy Fiddaman set_config_bool("acpi_tables", false); 14022b948146SAndy Fiddaman set_config_value("memory.size", "256M"); 14032b948146SAndy Fiddaman set_config_bool("x86.strictmsr", true); 14042b948146SAndy Fiddaman } 14052b948146SAndy Fiddaman 1406bf21cd93STycho Nightingale int 1407bf21cd93STycho Nightingale main(int argc, char *argv[]) 1408bf21cd93STycho Nightingale { 14092b948146SAndy Fiddaman int c, error, err; 14102b948146SAndy Fiddaman int max_vcpus, memflags; 1411bf21cd93STycho Nightingale struct vmctx *ctx; 1412bf21cd93STycho Nightingale uint64_t rip; 1413bf21cd93STycho Nightingale size_t memsize; 14142b948146SAndy Fiddaman const char *value, *vmname; 14154c87aefeSPatrick Mooney char *optstr; 1416bf21cd93STycho Nightingale 14172b948146SAndy Fiddaman init_config(); 14182b948146SAndy Fiddaman set_defaults(); 1419bf21cd93STycho Nightingale progname = basename(argv[0]); 1420bf21cd93STycho Nightingale 1421bf21cd93STycho Nightingale #ifdef __FreeBSD__ 14222b948146SAndy Fiddaman optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:"; 1423bf21cd93STycho Nightingale #else 14242b948146SAndy Fiddaman /* +d, +B, -p */ 14252b948146SAndy Fiddaman optstr = "adehuwxACDHIPSWYk:o:G:c:s:m:l:B:U:"; 1426bf21cd93STycho Nightingale #endif 14274c87aefeSPatrick Mooney while ((c = getopt(argc, argv, optstr)) != -1) { 1428bf21cd93STycho Nightingale switch (c) { 1429bf21cd93STycho Nightingale case 'a': 14302b948146SAndy Fiddaman set_config_bool("x86.x2apic", false); 1431bf21cd93STycho Nightingale break; 1432bf21cd93STycho Nightingale case 'A': 1433*2c4fdd8fSAndy Fiddaman #ifdef __FreeBSD__ 1434*2c4fdd8fSAndy Fiddaman /* 1435*2c4fdd8fSAndy Fiddaman * This option is ignored on illumos since the 1436*2c4fdd8fSAndy Fiddaman * generated ACPI tables are not used; the bootroms 1437*2c4fdd8fSAndy Fiddaman * have their own. The option is retained for backwards 1438*2c4fdd8fSAndy Fiddaman * compatibility but does nothing. Note that the 1439*2c4fdd8fSAndy Fiddaman * acpi_tables configuration is still accepted via 1440*2c4fdd8fSAndy Fiddaman * -o if somebody really wants to generate these tables. 1441*2c4fdd8fSAndy Fiddaman */ 14422b948146SAndy Fiddaman set_config_bool("acpi_tables", true); 1443*2c4fdd8fSAndy Fiddaman #endif 1444bf21cd93STycho Nightingale break; 14456960cd89SAndy Fiddaman case 'D': 14462b948146SAndy Fiddaman set_config_bool("destroy_on_poweroff", true); 14476960cd89SAndy Fiddaman break; 14482b948146SAndy Fiddaman #ifndef __FreeBSD__ 14494c87aefeSPatrick Mooney case 'B': 14504c87aefeSPatrick Mooney if (smbios_parse(optarg) != 0) { 14514c87aefeSPatrick Mooney errx(EX_USAGE, "invalid SMBIOS " 14524c87aefeSPatrick Mooney "configuration '%s'", optarg); 14534c87aefeSPatrick Mooney } 14544c87aefeSPatrick Mooney break; 14559c3024a3SHans Rosenfeld case 'd': 14562b948146SAndy Fiddaman set_config_bool("suspend_at_boot", true); 14579c3024a3SHans Rosenfeld break; 14582b948146SAndy Fiddaman #endif 14592b948146SAndy Fiddaman #ifdef __FreeBSD__ 1460bf21cd93STycho Nightingale case 'p': 14614c87aefeSPatrick Mooney if (pincpu_parse(optarg) != 0) { 14624c87aefeSPatrick Mooney errx(EX_USAGE, "invalid vcpu pinning " 14634c87aefeSPatrick Mooney "configuration '%s'", optarg); 14644c87aefeSPatrick Mooney } 1465bf21cd93STycho Nightingale break; 1466bf21cd93STycho Nightingale #endif 1467bf21cd93STycho Nightingale case 'c': 14684c87aefeSPatrick Mooney if (topology_parse(optarg) != 0) { 14694c87aefeSPatrick Mooney errx(EX_USAGE, "invalid cpu topology " 14704c87aefeSPatrick Mooney "'%s'", optarg); 14714c87aefeSPatrick Mooney } 1472bf21cd93STycho Nightingale break; 14734c87aefeSPatrick Mooney case 'C': 14742b948146SAndy Fiddaman set_config_bool("memory.guest_in_core", true); 14754c87aefeSPatrick Mooney break; 14764c87aefeSPatrick Mooney case 'G': 1477b0de25cbSAndy Fiddaman parse_gdb_options(optarg); 14782b948146SAndy Fiddaman break; 14792b948146SAndy Fiddaman case 'k': 14802b948146SAndy Fiddaman parse_simple_config_file(optarg); 1481bf21cd93STycho Nightingale break; 1482bf21cd93STycho Nightingale case 'l': 14834c87aefeSPatrick Mooney if (strncmp(optarg, "help", strlen(optarg)) == 0) { 14844c87aefeSPatrick Mooney lpc_print_supported_devices(); 14854c87aefeSPatrick Mooney exit(0); 14864c87aefeSPatrick Mooney } else if (lpc_device_parse(optarg) != 0) { 1487bf21cd93STycho Nightingale errx(EX_USAGE, "invalid lpc device " 1488bf21cd93STycho Nightingale "configuration '%s'", optarg); 1489bf21cd93STycho Nightingale } 1490bf21cd93STycho Nightingale break; 1491bf21cd93STycho Nightingale case 's': 14924c87aefeSPatrick Mooney if (strncmp(optarg, "help", strlen(optarg)) == 0) { 14934c87aefeSPatrick Mooney pci_print_supported_devices(); 14944c87aefeSPatrick Mooney exit(0); 14954c87aefeSPatrick Mooney } else if (pci_parse_slot(optarg) != 0) 14964c87aefeSPatrick Mooney exit(4); 1497bf21cd93STycho Nightingale else 1498bf21cd93STycho Nightingale break; 14994c87aefeSPatrick Mooney case 'S': 15002b948146SAndy Fiddaman set_config_bool("memory.wired", true); 15014c87aefeSPatrick Mooney break; 1502bf21cd93STycho Nightingale case 'm': 15032b948146SAndy Fiddaman set_config_value("memory.size", optarg); 15042b948146SAndy Fiddaman break; 15052b948146SAndy Fiddaman case 'o': 15062b948146SAndy Fiddaman if (!parse_config_option(optarg)) 15072b948146SAndy Fiddaman errx(EX_USAGE, "invalid configuration option '%s'", optarg); 1508bf21cd93STycho Nightingale break; 1509bf21cd93STycho Nightingale case 'H': 15102b948146SAndy Fiddaman set_config_bool("x86.vmexit_on_hlt", true); 1511bf21cd93STycho Nightingale break; 1512bf21cd93STycho Nightingale case 'I': 1513bf21cd93STycho Nightingale /* 1514bf21cd93STycho Nightingale * The "-I" option was used to add an ioapic to the 1515bf21cd93STycho Nightingale * virtual machine. 1516bf21cd93STycho Nightingale * 1517bf21cd93STycho Nightingale * An ioapic is now provided unconditionally for each 1518bf21cd93STycho Nightingale * virtual machine and this option is now deprecated. 1519bf21cd93STycho Nightingale */ 1520bf21cd93STycho Nightingale break; 1521bf21cd93STycho Nightingale case 'P': 15222b948146SAndy Fiddaman set_config_bool("x86.vmexit_on_pause", true); 1523bf21cd93STycho Nightingale break; 1524bf21cd93STycho Nightingale case 'e': 15252b948146SAndy Fiddaman set_config_bool("x86.strictio", true); 1526bf21cd93STycho Nightingale break; 15274c87aefeSPatrick Mooney case 'u': 15282b948146SAndy Fiddaman set_config_bool("rtc.use_localtime", false); 15294c87aefeSPatrick Mooney break; 1530bf21cd93STycho Nightingale case 'U': 15312b948146SAndy Fiddaman set_config_value("uuid", optarg); 1532bf21cd93STycho Nightingale break; 15334c87aefeSPatrick Mooney case 'w': 15342b948146SAndy Fiddaman set_config_bool("x86.strictmsr", false); 15354c87aefeSPatrick Mooney break; 1536bf21cd93STycho Nightingale case 'W': 15372b948146SAndy Fiddaman set_config_bool("virtio_msix", false); 1538bf21cd93STycho Nightingale break; 1539bf21cd93STycho Nightingale case 'x': 15402b948146SAndy Fiddaman set_config_bool("x86.x2apic", true); 1541bf21cd93STycho Nightingale break; 15424c87aefeSPatrick Mooney case 'Y': 15432b948146SAndy Fiddaman set_config_bool("x86.mptable", false); 15444c87aefeSPatrick Mooney break; 1545bf21cd93STycho Nightingale case 'h': 1546bf21cd93STycho Nightingale usage(0); 1547bf21cd93STycho Nightingale default: 1548bf21cd93STycho Nightingale usage(1); 1549bf21cd93STycho Nightingale } 1550bf21cd93STycho Nightingale } 1551bf21cd93STycho Nightingale argc -= optind; 1552bf21cd93STycho Nightingale argv += optind; 1553bf21cd93STycho Nightingale 15542b948146SAndy Fiddaman if (argc > 1) 1555bf21cd93STycho Nightingale usage(1); 1556bf21cd93STycho Nightingale 15572b948146SAndy Fiddaman if (argc == 1) 15582b948146SAndy Fiddaman set_config_value("name", argv[0]); 15592b948146SAndy Fiddaman 15602b948146SAndy Fiddaman vmname = get_config_value("name"); 15612b948146SAndy Fiddaman if (vmname == NULL) 15622b948146SAndy Fiddaman usage(1); 15632b948146SAndy Fiddaman 15642b948146SAndy Fiddaman if (get_config_bool_default("config.dump", false)) { 15652b948146SAndy Fiddaman dump_config(); 15662b948146SAndy Fiddaman exit(1); 15672b948146SAndy Fiddaman } 15682b948146SAndy Fiddaman 15692817ebc2SAndy Fiddaman #ifndef __FreeBSD__ 15702817ebc2SAndy Fiddaman illumos_priv_init(); 15712817ebc2SAndy Fiddaman #endif 15722817ebc2SAndy Fiddaman 15732b948146SAndy Fiddaman calc_topolopgy(); 15742b948146SAndy Fiddaman #ifdef __FreeBSD__ 15752b948146SAndy Fiddaman build_vcpumaps(); 15762b948146SAndy Fiddaman #endif 15772b948146SAndy Fiddaman 15782b948146SAndy Fiddaman value = get_config_value("memory.size"); 15792b948146SAndy Fiddaman error = vm_parse_memsize(value, &memsize); 15802b948146SAndy Fiddaman if (error) 15812b948146SAndy Fiddaman errx(EX_USAGE, "invalid memsize '%s'", value); 15822b948146SAndy Fiddaman 15834c87aefeSPatrick Mooney ctx = do_open(vmname); 1584bf21cd93STycho Nightingale 1585bf21cd93STycho Nightingale max_vcpus = num_vcpus_allowed(ctx); 1586bf21cd93STycho Nightingale if (guest_ncpus > max_vcpus) { 1587bf21cd93STycho Nightingale fprintf(stderr, "%d vCPUs requested but only %d available\n", 1588bf21cd93STycho Nightingale guest_ncpus, max_vcpus); 15894c87aefeSPatrick Mooney exit(4); 1590bf21cd93STycho Nightingale } 1591bf21cd93STycho Nightingale 1592bf21cd93STycho Nightingale fbsdrun_set_capabilities(ctx, BSP); 1593bf21cd93STycho Nightingale 15942b948146SAndy Fiddaman memflags = 0; 15952b948146SAndy Fiddaman if (get_config_bool_default("memory.wired", false)) 15962b948146SAndy Fiddaman memflags |= VM_MEM_F_WIRED; 15972b948146SAndy Fiddaman if (get_config_bool_default("memory.guest_in_core", false)) 15982b948146SAndy Fiddaman memflags |= VM_MEM_F_INCORE; 15994c87aefeSPatrick Mooney vm_set_memflags(ctx, memflags); 16004c87aefeSPatrick Mooney #ifdef __FreeBSD__ 1601bf21cd93STycho Nightingale err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 16024c87aefeSPatrick Mooney #else 16034c87aefeSPatrick Mooney do { 16044c87aefeSPatrick Mooney errno = 0; 16054c87aefeSPatrick Mooney err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 16064c87aefeSPatrick Mooney error = errno; 16074c87aefeSPatrick Mooney if (err != 0 && error == ENOMEM) { 16084c87aefeSPatrick Mooney (void) fprintf(stderr, "Unable to allocate memory " 16094c87aefeSPatrick Mooney "(%llu), retrying in 1 second\n", memsize); 16104c87aefeSPatrick Mooney sleep(1); 16114c87aefeSPatrick Mooney } 16124c87aefeSPatrick Mooney } while (error == ENOMEM); 16134c87aefeSPatrick Mooney #endif 1614bf21cd93STycho Nightingale if (err) { 16154c87aefeSPatrick Mooney fprintf(stderr, "Unable to set up memory (%d)\n", errno); 16164c87aefeSPatrick Mooney exit(4); 1617bf21cd93STycho Nightingale } 1618bf21cd93STycho Nightingale 1619bf21cd93STycho Nightingale error = init_msr(); 1620bf21cd93STycho Nightingale if (error) { 1621bf21cd93STycho Nightingale fprintf(stderr, "init_msr error %d", error); 16224c87aefeSPatrick Mooney exit(4); 1623bf21cd93STycho Nightingale } 1624bf21cd93STycho Nightingale 1625bf21cd93STycho Nightingale init_mem(); 1626bf21cd93STycho Nightingale init_inout(); 1627154972afSPatrick Mooney #ifdef __FreeBSD__ 1628154972afSPatrick Mooney kernemu_dev_init(); 1629154972afSPatrick Mooney #endif 1630154972afSPatrick Mooney init_bootrom(ctx); 1631bf21cd93STycho Nightingale atkbdc_init(ctx); 1632bf21cd93STycho Nightingale pci_irq_init(ctx); 1633bf21cd93STycho Nightingale ioapic_init(ctx); 1634bf21cd93STycho Nightingale 16352b948146SAndy Fiddaman rtc_init(ctx); 16364c87aefeSPatrick Mooney sci_init(ctx); 16370e1453c3SPatrick Mooney #ifndef __FreeBSD__ 16380e1453c3SPatrick Mooney pmtmr_init(ctx); 16390e1453c3SPatrick Mooney #endif 1640bf21cd93STycho Nightingale 1641bf21cd93STycho Nightingale /* 16424c87aefeSPatrick Mooney * Exit if a device emulation finds an error in its initilization 1643bf21cd93STycho Nightingale */ 16444c87aefeSPatrick Mooney if (init_pci(ctx) != 0) { 16454c87aefeSPatrick Mooney perror("device emulation initialization error"); 16464c87aefeSPatrick Mooney exit(4); 16474c87aefeSPatrick Mooney } 1648bf21cd93STycho Nightingale 1649154972afSPatrick Mooney /* 1650154972afSPatrick Mooney * Initialize after PCI, to allow a bootrom file to reserve the high 1651154972afSPatrick Mooney * region. 1652154972afSPatrick Mooney */ 16532b948146SAndy Fiddaman if (get_config_bool("acpi_tables")) 1654154972afSPatrick Mooney vmgenc_init(ctx); 1655154972afSPatrick Mooney 1656c3d209caSPatrick Mooney #ifdef __FreeBSD__ 1657b0de25cbSAndy Fiddaman init_gdb(ctx); 1658c3d209caSPatrick Mooney #else 16592b948146SAndy Fiddaman if (value != NULL) { 16602b948146SAndy Fiddaman int port = atoi(value); 16612b948146SAndy Fiddaman 1662b0de25cbSAndy Fiddaman if (port < 0) 1663b0de25cbSAndy Fiddaman init_mdb(ctx); 1664b0de25cbSAndy Fiddaman else 1665b0de25cbSAndy Fiddaman init_gdb(ctx); 1666c3d209caSPatrick Mooney } 1667c3d209caSPatrick Mooney #endif 1668bf21cd93STycho Nightingale 16694c87aefeSPatrick Mooney vga_init(1); 16704c87aefeSPatrick Mooney 16714c87aefeSPatrick Mooney if (lpc_bootrom()) { 1672c3ae3afaSPatrick Mooney #ifdef __FreeBSD__ 16734c87aefeSPatrick Mooney if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 16744c87aefeSPatrick Mooney fprintf(stderr, "ROM boot failed: unrestricted guest " 16754c87aefeSPatrick Mooney "capability not available\n"); 16764c87aefeSPatrick Mooney exit(4); 16774c87aefeSPatrick Mooney } 1678c3ae3afaSPatrick Mooney #else 1679c3ae3afaSPatrick Mooney /* Unrestricted Guest is always enabled on illumos */ 1680c3ae3afaSPatrick Mooney #endif 16814c87aefeSPatrick Mooney error = vcpu_reset(ctx, BSP); 16824c87aefeSPatrick Mooney assert(error == 0); 16834c87aefeSPatrick Mooney } 1684bf21cd93STycho Nightingale 1685bf21cd93STycho Nightingale error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 1686bf21cd93STycho Nightingale assert(error == 0); 1687bf21cd93STycho Nightingale 1688bf21cd93STycho Nightingale /* 1689bf21cd93STycho Nightingale * build the guest tables, MP etc. 1690bf21cd93STycho Nightingale */ 16912b948146SAndy Fiddaman if (get_config_bool_default("x86.mptable", true)) { 16924c87aefeSPatrick Mooney error = mptable_build(ctx, guest_ncpus); 16934c87aefeSPatrick Mooney if (error) { 16944c87aefeSPatrick Mooney perror("error to build the guest tables"); 16954c87aefeSPatrick Mooney exit(4); 16964c87aefeSPatrick Mooney } 16974c87aefeSPatrick Mooney } 1698bf21cd93STycho Nightingale 16992b948146SAndy Fiddaman #ifndef __FreeBSD__ 17002b948146SAndy Fiddaman smbios_apply(); 17012b948146SAndy Fiddaman #endif 1702bf21cd93STycho Nightingale error = smbios_build(ctx); 1703bf21cd93STycho Nightingale assert(error == 0); 1704bf21cd93STycho Nightingale 17052b948146SAndy Fiddaman if (get_config_bool("acpi_tables")) { 1706bf21cd93STycho Nightingale error = acpi_build(ctx, guest_ncpus); 1707bf21cd93STycho Nightingale assert(error == 0); 1708bf21cd93STycho Nightingale } 1709bf21cd93STycho Nightingale 17104c87aefeSPatrick Mooney if (lpc_bootrom()) 17114c87aefeSPatrick Mooney fwctl_init(); 17124c87aefeSPatrick Mooney 1713bf21cd93STycho Nightingale /* 1714bf21cd93STycho Nightingale * Change the proc title to include the VM name. 1715bf21cd93STycho Nightingale */ 1716bf21cd93STycho Nightingale setproctitle("%s", vmname); 17174c87aefeSPatrick Mooney 17184c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 17194c87aefeSPatrick Mooney caph_cache_catpages(); 17204c87aefeSPatrick Mooney 17214c87aefeSPatrick Mooney if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) 17224c87aefeSPatrick Mooney errx(EX_OSERR, "Unable to apply rights for sandbox"); 17234c87aefeSPatrick Mooney 17244c87aefeSPatrick Mooney if (caph_enter() == -1) 17254c87aefeSPatrick Mooney errx(EX_OSERR, "cap_enter() failed"); 17264c87aefeSPatrick Mooney #endif 17274c87aefeSPatrick Mooney 17282817ebc2SAndy Fiddaman #ifndef __FreeBSD__ 17292817ebc2SAndy Fiddaman illumos_priv_lock(); 17302817ebc2SAndy Fiddaman #endif 17312817ebc2SAndy Fiddaman 17322606939dSPatrick Mooney #ifdef __FreeBSD__ 1733bf21cd93STycho Nightingale /* 1734bf21cd93STycho Nightingale * Add CPU 0 1735bf21cd93STycho Nightingale */ 1736bf21cd93STycho Nightingale fbsdrun_addcpu(ctx, BSP, BSP, rip); 17379c3024a3SHans Rosenfeld #else 17382606939dSPatrick Mooney /* Set BSP to run (unlike the APs which wait for INIT) */ 17392606939dSPatrick Mooney error = vm_set_run_state(ctx, BSP, VRS_RUN, 0); 17402606939dSPatrick Mooney assert(error == 0); 17412b948146SAndy Fiddaman fbsdrun_addcpu(ctx, BSP, rip, 17422b948146SAndy Fiddaman get_config_bool_default("suspend_at_boot", false)); 17432606939dSPatrick Mooney 17442606939dSPatrick Mooney /* Add subsequent CPUs, which will wait until INIT/SIPI-ed */ 17452606939dSPatrick Mooney for (uint_t i = 1; i < guest_ncpus; i++) { 17462606939dSPatrick Mooney spinup_halted_ap(ctx, i); 17472606939dSPatrick Mooney } 17489c3024a3SHans Rosenfeld #endif 1749bf21cd93STycho Nightingale /* 1750bf21cd93STycho Nightingale * Head off to the main event dispatch loop 1751bf21cd93STycho Nightingale */ 1752bf21cd93STycho Nightingale mevent_dispatch(); 1753bf21cd93STycho Nightingale 17544c87aefeSPatrick Mooney exit(4); 1755bf21cd93STycho Nightingale } 1756