xref: /freebsd/sys/amd64/vmm/vmm.c (revision 37a723a5b32a80570ee31bd1c272c630bbd0be09)
1366f6083SPeter Grehan /*-
2366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3366f6083SPeter Grehan  * All rights reserved.
4366f6083SPeter Grehan  *
5366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7366f6083SPeter Grehan  * are met:
8366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13366f6083SPeter Grehan  *
14366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24366f6083SPeter Grehan  * SUCH DAMAGE.
25366f6083SPeter Grehan  *
26366f6083SPeter Grehan  * $FreeBSD$
27366f6083SPeter Grehan  */
28366f6083SPeter Grehan 
29366f6083SPeter Grehan #include <sys/cdefs.h>
30366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31366f6083SPeter Grehan 
32366f6083SPeter Grehan #include <sys/param.h>
3338f1b189SPeter Grehan #include <sys/systm.h>
34366f6083SPeter Grehan #include <sys/kernel.h>
35366f6083SPeter Grehan #include <sys/module.h>
36366f6083SPeter Grehan #include <sys/sysctl.h>
37366f6083SPeter Grehan #include <sys/malloc.h>
38366f6083SPeter Grehan #include <sys/pcpu.h>
39366f6083SPeter Grehan #include <sys/lock.h>
40366f6083SPeter Grehan #include <sys/mutex.h>
41366f6083SPeter Grehan #include <sys/proc.h>
42318224bbSNeel Natu #include <sys/rwlock.h>
43366f6083SPeter Grehan #include <sys/sched.h>
44366f6083SPeter Grehan #include <sys/smp.h>
45366f6083SPeter Grehan #include <sys/systm.h>
46366f6083SPeter Grehan 
47366f6083SPeter Grehan #include <vm/vm.h>
48318224bbSNeel Natu #include <vm/vm_object.h>
49318224bbSNeel Natu #include <vm/vm_page.h>
50318224bbSNeel Natu #include <vm/pmap.h>
51318224bbSNeel Natu #include <vm/vm_map.h>
52318224bbSNeel Natu #include <vm/vm_extern.h>
53318224bbSNeel Natu #include <vm/vm_param.h>
54366f6083SPeter Grehan 
5563e62d39SJohn Baldwin #include <machine/cpu.h>
56366f6083SPeter Grehan #include <machine/vm.h>
57366f6083SPeter Grehan #include <machine/pcb.h>
5875dd3366SNeel Natu #include <machine/smp.h>
591c052192SNeel Natu #include <x86/psl.h>
6034a6b2d6SJohn Baldwin #include <x86/apicreg.h>
61318224bbSNeel Natu #include <machine/vmparam.h>
62366f6083SPeter Grehan 
63366f6083SPeter Grehan #include <machine/vmm.h>
64565bbb86SNeel Natu #include <machine/vmm_dev.h>
65565bbb86SNeel Natu 
66d17b5104SNeel Natu #include "vmm_ioport.h"
67318224bbSNeel Natu #include "vmm_ktr.h"
68b01c2033SNeel Natu #include "vmm_host.h"
69366f6083SPeter Grehan #include "vmm_mem.h"
70366f6083SPeter Grehan #include "vmm_util.h"
71762fd208STycho Nightingale #include "vatpic.h"
72e883c9bbSTycho Nightingale #include "vatpit.h"
7308e3ff32SNeel Natu #include "vhpet.h"
74565bbb86SNeel Natu #include "vioapic.h"
75366f6083SPeter Grehan #include "vlapic.h"
76366f6083SPeter Grehan #include "vmm_msr.h"
77366f6083SPeter Grehan #include "vmm_ipi.h"
78366f6083SPeter Grehan #include "vmm_stat.h"
79f76fc5d4SNeel Natu #include "vmm_lapic.h"
80366f6083SPeter Grehan 
81366f6083SPeter Grehan #include "io/ppt.h"
82366f6083SPeter Grehan #include "io/iommu.h"
83366f6083SPeter Grehan 
84366f6083SPeter Grehan struct vlapic;
85366f6083SPeter Grehan 
86366f6083SPeter Grehan struct vcpu {
87366f6083SPeter Grehan 	int		flags;
8875dd3366SNeel Natu 	enum vcpu_state	state;
8975dd3366SNeel Natu 	struct mtx	mtx;
90366f6083SPeter Grehan 	int		hostcpu;	/* host cpuid this vcpu last ran on */
91366f6083SPeter Grehan 	uint64_t	guest_msrs[VMM_MSR_NUM];
92366f6083SPeter Grehan 	struct vlapic	*vlapic;
93366f6083SPeter Grehan 	int		 vcpuid;
9438f1b189SPeter Grehan 	struct savefpu	*guestfpu;	/* guest fpu state */
95abb023fbSJohn Baldwin 	uint64_t	guest_xcr0;
96366f6083SPeter Grehan 	void		*stats;
9798ed632cSNeel Natu 	struct vm_exit	exitinfo;
98e9027382SNeel Natu 	enum x2apic_state x2apic_state;
99f352ff0cSNeel Natu 	int		nmi_pending;
1000775fbb4STycho Nightingale 	int		extint_pending;
101dc506506SNeel Natu 	struct vm_exception exception;
102dc506506SNeel Natu 	int		exception_pending;
103366f6083SPeter Grehan };
104366f6083SPeter Grehan 
105f76fc5d4SNeel Natu #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
106f76fc5d4SNeel Natu #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
107f76fc5d4SNeel Natu #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
108318224bbSNeel Natu #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
10975dd3366SNeel Natu 
110318224bbSNeel Natu struct mem_seg {
111318224bbSNeel Natu 	vm_paddr_t	gpa;
112318224bbSNeel Natu 	size_t		len;
113318224bbSNeel Natu 	boolean_t	wired;
114318224bbSNeel Natu 	vm_object_t	object;
115318224bbSNeel Natu };
116366f6083SPeter Grehan #define	VM_MAX_MEMORY_SEGMENTS	2
117366f6083SPeter Grehan 
118366f6083SPeter Grehan struct vm {
119366f6083SPeter Grehan 	void		*cookie;	/* processor-specific data */
120366f6083SPeter Grehan 	void		*iommu;		/* iommu-specific data */
12108e3ff32SNeel Natu 	struct vhpet	*vhpet;		/* virtual HPET */
122565bbb86SNeel Natu 	struct vioapic	*vioapic;	/* virtual ioapic */
123762fd208STycho Nightingale 	struct vatpic	*vatpic;	/* virtual atpic */
124e883c9bbSTycho Nightingale 	struct vatpit	*vatpit;	/* virtual atpit */
125318224bbSNeel Natu 	struct vmspace	*vmspace;	/* guest's address space */
126366f6083SPeter Grehan 	struct vcpu	vcpu[VM_MAXCPU];
127366f6083SPeter Grehan 	int		num_mem_segs;
128318224bbSNeel Natu 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
129366f6083SPeter Grehan 	char		name[VM_MAX_NAMELEN];
130366f6083SPeter Grehan 
131366f6083SPeter Grehan 	/*
132a5615c90SPeter Grehan 	 * Set of active vcpus.
133366f6083SPeter Grehan 	 * An active vcpu is one that has been started implicitly (BSP) or
134366f6083SPeter Grehan 	 * explicitly (AP) by sending it a startup ipi.
135366f6083SPeter Grehan 	 */
13622d822c6SNeel Natu 	volatile cpuset_t active_cpus;
1375b8a8cd1SNeel Natu 
1385b8a8cd1SNeel Natu 	struct mtx	rendezvous_mtx;
1395b8a8cd1SNeel Natu 	cpuset_t	rendezvous_req_cpus;
1405b8a8cd1SNeel Natu 	cpuset_t	rendezvous_done_cpus;
1415b8a8cd1SNeel Natu 	void		*rendezvous_arg;
1425b8a8cd1SNeel Natu 	vm_rendezvous_func_t rendezvous_func;
143b15a09c0SNeel Natu 
144b15a09c0SNeel Natu 	int		suspend;
145b15a09c0SNeel Natu 	volatile cpuset_t suspended_cpus;
146e50ce2aaSNeel Natu 
147e50ce2aaSNeel Natu 	volatile cpuset_t halted_cpus;
148366f6083SPeter Grehan };
149366f6083SPeter Grehan 
150d5408b1dSNeel Natu static int vmm_initialized;
151d5408b1dSNeel Natu 
152366f6083SPeter Grehan static struct vmm_ops *ops;
153add611fdSNeel Natu #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
154366f6083SPeter Grehan #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
15563e62d39SJohn Baldwin #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
156366f6083SPeter Grehan 
157318224bbSNeel Natu #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
158b15a09c0SNeel Natu #define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
159b15a09c0SNeel Natu 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
160366f6083SPeter Grehan #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
161318224bbSNeel Natu #define	VMSPACE_ALLOC(min, max) \
162318224bbSNeel Natu 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
163318224bbSNeel Natu #define	VMSPACE_FREE(vmspace) \
164318224bbSNeel Natu 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
165366f6083SPeter Grehan #define	VMGETREG(vmi, vcpu, num, retval)		\
166366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
167366f6083SPeter Grehan #define	VMSETREG(vmi, vcpu, num, val)		\
168366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
169366f6083SPeter Grehan #define	VMGETDESC(vmi, vcpu, num, desc)		\
170366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
171366f6083SPeter Grehan #define	VMSETDESC(vmi, vcpu, num, desc)		\
172366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
173366f6083SPeter Grehan #define	VMGETCAP(vmi, vcpu, num, retval)	\
174366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
175366f6083SPeter Grehan #define	VMSETCAP(vmi, vcpu, num, val)		\
176366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
177de5ea6b6SNeel Natu #define	VLAPIC_INIT(vmi, vcpu)			\
178de5ea6b6SNeel Natu 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
179de5ea6b6SNeel Natu #define	VLAPIC_CLEANUP(vmi, vlapic)		\
180de5ea6b6SNeel Natu 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
181366f6083SPeter Grehan 
182014a52f3SNeel Natu #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
183014a52f3SNeel Natu #define	fpu_stop_emulating()	clts()
184366f6083SPeter Grehan 
185366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm");
186366f6083SPeter Grehan CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
187366f6083SPeter Grehan 
188366f6083SPeter Grehan /* statistics */
18961592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
190366f6083SPeter Grehan 
191add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
192add611fdSNeel Natu 
193055fc2cbSNeel Natu /*
194055fc2cbSNeel Natu  * Halt the guest if all vcpus are executing a HLT instruction with
195055fc2cbSNeel Natu  * interrupts disabled.
196055fc2cbSNeel Natu  */
197055fc2cbSNeel Natu static int halt_detection_enabled = 1;
198055fc2cbSNeel Natu TUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
199055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
200055fc2cbSNeel Natu     &halt_detection_enabled, 0,
201055fc2cbSNeel Natu     "Halt VM if all vcpus execute HLT with interrupts disabled");
202055fc2cbSNeel Natu 
203add611fdSNeel Natu static int vmm_ipinum;
204add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
205add611fdSNeel Natu     "IPI vector used for vcpu notifications");
206add611fdSNeel Natu 
207366f6083SPeter Grehan static void
208de5ea6b6SNeel Natu vcpu_cleanup(struct vm *vm, int i)
209366f6083SPeter Grehan {
210de5ea6b6SNeel Natu 	struct vcpu *vcpu = &vm->vcpu[i];
211de5ea6b6SNeel Natu 
212de5ea6b6SNeel Natu 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
213366f6083SPeter Grehan 	vmm_stat_free(vcpu->stats);
21438f1b189SPeter Grehan 	fpu_save_area_free(vcpu->guestfpu);
215366f6083SPeter Grehan }
216366f6083SPeter Grehan 
217366f6083SPeter Grehan static void
218366f6083SPeter Grehan vcpu_init(struct vm *vm, uint32_t vcpu_id)
219366f6083SPeter Grehan {
220366f6083SPeter Grehan 	struct vcpu *vcpu;
221366f6083SPeter Grehan 
222366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpu_id];
223366f6083SPeter Grehan 
22475dd3366SNeel Natu 	vcpu_lock_init(vcpu);
22575dd3366SNeel Natu 	vcpu->hostcpu = NOCPU;
226366f6083SPeter Grehan 	vcpu->vcpuid = vcpu_id;
227de5ea6b6SNeel Natu 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
22852e5c8a2SNeel Natu 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
229abb023fbSJohn Baldwin 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
23038f1b189SPeter Grehan 	vcpu->guestfpu = fpu_save_area_alloc();
23138f1b189SPeter Grehan 	fpu_save_area_reset(vcpu->guestfpu);
232366f6083SPeter Grehan 	vcpu->stats = vmm_stat_alloc();
233366f6083SPeter Grehan }
234366f6083SPeter Grehan 
23598ed632cSNeel Natu struct vm_exit *
23698ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid)
23798ed632cSNeel Natu {
23898ed632cSNeel Natu 	struct vcpu *vcpu;
23998ed632cSNeel Natu 
24098ed632cSNeel Natu 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
24198ed632cSNeel Natu 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
24298ed632cSNeel Natu 
24398ed632cSNeel Natu 	vcpu = &vm->vcpu[cpuid];
24498ed632cSNeel Natu 
24598ed632cSNeel Natu 	return (&vcpu->exitinfo);
24698ed632cSNeel Natu }
24798ed632cSNeel Natu 
24863e62d39SJohn Baldwin static void
24963e62d39SJohn Baldwin vmm_resume(void)
25063e62d39SJohn Baldwin {
25163e62d39SJohn Baldwin 	VMM_RESUME();
25263e62d39SJohn Baldwin }
25363e62d39SJohn Baldwin 
254366f6083SPeter Grehan static int
255366f6083SPeter Grehan vmm_init(void)
256366f6083SPeter Grehan {
257366f6083SPeter Grehan 	int error;
258366f6083SPeter Grehan 
259b01c2033SNeel Natu 	vmm_host_state_init();
260add611fdSNeel Natu 
261add611fdSNeel Natu 	vmm_ipinum = vmm_ipi_alloc();
262add611fdSNeel Natu 	if (vmm_ipinum == 0)
263add611fdSNeel Natu 		vmm_ipinum = IPI_AST;
264366f6083SPeter Grehan 
265366f6083SPeter Grehan 	error = vmm_mem_init();
266366f6083SPeter Grehan 	if (error)
267366f6083SPeter Grehan 		return (error);
268366f6083SPeter Grehan 
269366f6083SPeter Grehan 	if (vmm_is_intel())
270366f6083SPeter Grehan 		ops = &vmm_ops_intel;
271366f6083SPeter Grehan 	else if (vmm_is_amd())
272366f6083SPeter Grehan 		ops = &vmm_ops_amd;
273366f6083SPeter Grehan 	else
274366f6083SPeter Grehan 		return (ENXIO);
275366f6083SPeter Grehan 
276366f6083SPeter Grehan 	vmm_msr_init();
27763e62d39SJohn Baldwin 	vmm_resume_p = vmm_resume;
278366f6083SPeter Grehan 
279add611fdSNeel Natu 	return (VMM_INIT(vmm_ipinum));
280366f6083SPeter Grehan }
281366f6083SPeter Grehan 
282366f6083SPeter Grehan static int
283366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg)
284366f6083SPeter Grehan {
285366f6083SPeter Grehan 	int error;
286366f6083SPeter Grehan 
287366f6083SPeter Grehan 	switch (what) {
288366f6083SPeter Grehan 	case MOD_LOAD:
289366f6083SPeter Grehan 		vmmdev_init();
29051f45d01SNeel Natu 		if (ppt_avail_devices() > 0)
291366f6083SPeter Grehan 			iommu_init();
292366f6083SPeter Grehan 		error = vmm_init();
293d5408b1dSNeel Natu 		if (error == 0)
294d5408b1dSNeel Natu 			vmm_initialized = 1;
295366f6083SPeter Grehan 		break;
296366f6083SPeter Grehan 	case MOD_UNLOAD:
297cdc5b9e7SNeel Natu 		error = vmmdev_cleanup();
298cdc5b9e7SNeel Natu 		if (error == 0) {
29963e62d39SJohn Baldwin 			vmm_resume_p = NULL;
300366f6083SPeter Grehan 			iommu_cleanup();
301add611fdSNeel Natu 			if (vmm_ipinum != IPI_AST)
302add611fdSNeel Natu 				vmm_ipi_free(vmm_ipinum);
303366f6083SPeter Grehan 			error = VMM_CLEANUP();
30481ef6611SPeter Grehan 			/*
30581ef6611SPeter Grehan 			 * Something bad happened - prevent new
30681ef6611SPeter Grehan 			 * VMs from being created
30781ef6611SPeter Grehan 			 */
30881ef6611SPeter Grehan 			if (error)
309d5408b1dSNeel Natu 				vmm_initialized = 0;
31081ef6611SPeter Grehan 		}
311366f6083SPeter Grehan 		break;
312366f6083SPeter Grehan 	default:
313366f6083SPeter Grehan 		error = 0;
314366f6083SPeter Grehan 		break;
315366f6083SPeter Grehan 	}
316366f6083SPeter Grehan 	return (error);
317366f6083SPeter Grehan }
318366f6083SPeter Grehan 
319366f6083SPeter Grehan static moduledata_t vmm_kmod = {
320366f6083SPeter Grehan 	"vmm",
321366f6083SPeter Grehan 	vmm_handler,
322366f6083SPeter Grehan 	NULL
323366f6083SPeter Grehan };
324366f6083SPeter Grehan 
325366f6083SPeter Grehan /*
326e3f0800bSNeel Natu  * vmm initialization has the following dependencies:
327e3f0800bSNeel Natu  *
328e3f0800bSNeel Natu  * - iommu initialization must happen after the pci passthru driver has had
329e3f0800bSNeel Natu  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
330e3f0800bSNeel Natu  *
331e3f0800bSNeel Natu  * - VT-x initialization requires smp_rendezvous() and therefore must happen
332e3f0800bSNeel Natu  *   after SMP is fully functional (after SI_SUB_SMP).
333366f6083SPeter Grehan  */
334e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
335366f6083SPeter Grehan MODULE_VERSION(vmm, 1);
336366f6083SPeter Grehan 
337d5408b1dSNeel Natu int
338d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm)
339366f6083SPeter Grehan {
340366f6083SPeter Grehan 	int i;
341366f6083SPeter Grehan 	struct vm *vm;
342318224bbSNeel Natu 	struct vmspace *vmspace;
343366f6083SPeter Grehan 
344366f6083SPeter Grehan 	const int BSP = 0;
345366f6083SPeter Grehan 
346d5408b1dSNeel Natu 	/*
347d5408b1dSNeel Natu 	 * If vmm.ko could not be successfully initialized then don't attempt
348d5408b1dSNeel Natu 	 * to create the virtual machine.
349d5408b1dSNeel Natu 	 */
350d5408b1dSNeel Natu 	if (!vmm_initialized)
351d5408b1dSNeel Natu 		return (ENXIO);
352d5408b1dSNeel Natu 
353366f6083SPeter Grehan 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
354d5408b1dSNeel Natu 		return (EINVAL);
355366f6083SPeter Grehan 
356318224bbSNeel Natu 	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
357318224bbSNeel Natu 	if (vmspace == NULL)
358318224bbSNeel Natu 		return (ENOMEM);
359318224bbSNeel Natu 
360366f6083SPeter Grehan 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
361366f6083SPeter Grehan 	strcpy(vm->name, name);
36288c4b8d1SNeel Natu 	vm->vmspace = vmspace;
3635b8a8cd1SNeel Natu 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
364318224bbSNeel Natu 	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
365565bbb86SNeel Natu 	vm->vioapic = vioapic_init(vm);
36608e3ff32SNeel Natu 	vm->vhpet = vhpet_init(vm);
367762fd208STycho Nightingale 	vm->vatpic = vatpic_init(vm);
368e883c9bbSTycho Nightingale 	vm->vatpit = vatpit_init(vm);
369366f6083SPeter Grehan 
370366f6083SPeter Grehan 	for (i = 0; i < VM_MAXCPU; i++) {
371366f6083SPeter Grehan 		vcpu_init(vm, i);
372366f6083SPeter Grehan 		guest_msrs_init(vm, i);
373366f6083SPeter Grehan 	}
374366f6083SPeter Grehan 
375366f6083SPeter Grehan 	vm_activate_cpu(vm, BSP);
376366f6083SPeter Grehan 
377d5408b1dSNeel Natu 	*retvm = vm;
378d5408b1dSNeel Natu 	return (0);
379366f6083SPeter Grehan }
380366f6083SPeter Grehan 
381f7d51510SNeel Natu static void
382318224bbSNeel Natu vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
383f7d51510SNeel Natu {
3847ce04d0aSNeel Natu 
385318224bbSNeel Natu 	if (seg->object != NULL)
386318224bbSNeel Natu 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
387f7d51510SNeel Natu 
388318224bbSNeel Natu 	bzero(seg, sizeof(*seg));
389f7d51510SNeel Natu }
390f7d51510SNeel Natu 
391366f6083SPeter Grehan void
392366f6083SPeter Grehan vm_destroy(struct vm *vm)
393366f6083SPeter Grehan {
394366f6083SPeter Grehan 	int i;
395366f6083SPeter Grehan 
396366f6083SPeter Grehan 	ppt_unassign_all(vm);
397366f6083SPeter Grehan 
398318224bbSNeel Natu 	if (vm->iommu != NULL)
399318224bbSNeel Natu 		iommu_destroy_domain(vm->iommu);
400318224bbSNeel Natu 
401e883c9bbSTycho Nightingale 	vatpit_cleanup(vm->vatpit);
40208e3ff32SNeel Natu 	vhpet_cleanup(vm->vhpet);
403762fd208STycho Nightingale 	vatpic_cleanup(vm->vatpic);
40408e3ff32SNeel Natu 	vioapic_cleanup(vm->vioapic);
40508e3ff32SNeel Natu 
406366f6083SPeter Grehan 	for (i = 0; i < vm->num_mem_segs; i++)
407f7d51510SNeel Natu 		vm_free_mem_seg(vm, &vm->mem_segs[i]);
408f7d51510SNeel Natu 
409f7d51510SNeel Natu 	vm->num_mem_segs = 0;
410366f6083SPeter Grehan 
411366f6083SPeter Grehan 	for (i = 0; i < VM_MAXCPU; i++)
412de5ea6b6SNeel Natu 		vcpu_cleanup(vm, i);
413366f6083SPeter Grehan 
414318224bbSNeel Natu 	VMSPACE_FREE(vm->vmspace);
415366f6083SPeter Grehan 
416366f6083SPeter Grehan 	VMCLEANUP(vm->cookie);
417366f6083SPeter Grehan 
418366f6083SPeter Grehan 	free(vm, M_VM);
419366f6083SPeter Grehan }
420366f6083SPeter Grehan 
421366f6083SPeter Grehan const char *
422366f6083SPeter Grehan vm_name(struct vm *vm)
423366f6083SPeter Grehan {
424366f6083SPeter Grehan 	return (vm->name);
425366f6083SPeter Grehan }
426366f6083SPeter Grehan 
427366f6083SPeter Grehan int
428366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
429366f6083SPeter Grehan {
430318224bbSNeel Natu 	vm_object_t obj;
431366f6083SPeter Grehan 
432318224bbSNeel Natu 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
433318224bbSNeel Natu 		return (ENOMEM);
434318224bbSNeel Natu 	else
435318224bbSNeel Natu 		return (0);
436366f6083SPeter Grehan }
437366f6083SPeter Grehan 
438366f6083SPeter Grehan int
439366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
440366f6083SPeter Grehan {
441366f6083SPeter Grehan 
442318224bbSNeel Natu 	vmm_mmio_free(vm->vmspace, gpa, len);
443318224bbSNeel Natu 	return (0);
444366f6083SPeter Grehan }
445366f6083SPeter Grehan 
446318224bbSNeel Natu boolean_t
447318224bbSNeel Natu vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
448366f6083SPeter Grehan {
449341f19c9SNeel Natu 	int i;
450341f19c9SNeel Natu 	vm_paddr_t gpabase, gpalimit;
451341f19c9SNeel Natu 
452341f19c9SNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
453341f19c9SNeel Natu 		gpabase = vm->mem_segs[i].gpa;
454341f19c9SNeel Natu 		gpalimit = gpabase + vm->mem_segs[i].len;
455341f19c9SNeel Natu 		if (gpa >= gpabase && gpa < gpalimit)
456318224bbSNeel Natu 			return (TRUE);		/* 'gpa' is regular memory */
457341f19c9SNeel Natu 	}
458341f19c9SNeel Natu 
459318224bbSNeel Natu 	if (ppt_is_mmio(vm, gpa))
460318224bbSNeel Natu 		return (TRUE);			/* 'gpa' is pci passthru mmio */
461318224bbSNeel Natu 
462318224bbSNeel Natu 	return (FALSE);
463341f19c9SNeel Natu }
464341f19c9SNeel Natu 
465341f19c9SNeel Natu int
466341f19c9SNeel Natu vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
467341f19c9SNeel Natu {
468318224bbSNeel Natu 	int available, allocated;
469318224bbSNeel Natu 	struct mem_seg *seg;
470318224bbSNeel Natu 	vm_object_t object;
471318224bbSNeel Natu 	vm_paddr_t g;
472366f6083SPeter Grehan 
473341f19c9SNeel Natu 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
474341f19c9SNeel Natu 		return (EINVAL);
475341f19c9SNeel Natu 
476341f19c9SNeel Natu 	available = allocated = 0;
477341f19c9SNeel Natu 	g = gpa;
478341f19c9SNeel Natu 	while (g < gpa + len) {
479318224bbSNeel Natu 		if (vm_mem_allocated(vm, g))
480341f19c9SNeel Natu 			allocated++;
481318224bbSNeel Natu 		else
482318224bbSNeel Natu 			available++;
483341f19c9SNeel Natu 
484341f19c9SNeel Natu 		g += PAGE_SIZE;
485341f19c9SNeel Natu 	}
486341f19c9SNeel Natu 
487366f6083SPeter Grehan 	/*
488341f19c9SNeel Natu 	 * If there are some allocated and some available pages in the address
489341f19c9SNeel Natu 	 * range then it is an error.
490366f6083SPeter Grehan 	 */
491341f19c9SNeel Natu 	if (allocated && available)
492341f19c9SNeel Natu 		return (EINVAL);
493341f19c9SNeel Natu 
494341f19c9SNeel Natu 	/*
495341f19c9SNeel Natu 	 * If the entire address range being requested has already been
496341f19c9SNeel Natu 	 * allocated then there isn't anything more to do.
497341f19c9SNeel Natu 	 */
498341f19c9SNeel Natu 	if (allocated && available == 0)
499341f19c9SNeel Natu 		return (0);
500366f6083SPeter Grehan 
501366f6083SPeter Grehan 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
502366f6083SPeter Grehan 		return (E2BIG);
503366f6083SPeter Grehan 
504f7d51510SNeel Natu 	seg = &vm->mem_segs[vm->num_mem_segs];
505366f6083SPeter Grehan 
506318224bbSNeel Natu 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
507318224bbSNeel Natu 		return (ENOMEM);
508318224bbSNeel Natu 
509f7d51510SNeel Natu 	seg->gpa = gpa;
510318224bbSNeel Natu 	seg->len = len;
511318224bbSNeel Natu 	seg->object = object;
512318224bbSNeel Natu 	seg->wired = FALSE;
5137ce04d0aSNeel Natu 
514366f6083SPeter Grehan 	vm->num_mem_segs++;
515341f19c9SNeel Natu 
516366f6083SPeter Grehan 	return (0);
517366f6083SPeter Grehan }
518366f6083SPeter Grehan 
519318224bbSNeel Natu static void
520318224bbSNeel Natu vm_gpa_unwire(struct vm *vm)
521366f6083SPeter Grehan {
522318224bbSNeel Natu 	int i, rv;
523318224bbSNeel Natu 	struct mem_seg *seg;
5244db4fb2cSNeel Natu 
525318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
526318224bbSNeel Natu 		seg = &vm->mem_segs[i];
527318224bbSNeel Natu 		if (!seg->wired)
528318224bbSNeel Natu 			continue;
529366f6083SPeter Grehan 
530318224bbSNeel Natu 		rv = vm_map_unwire(&vm->vmspace->vm_map,
531318224bbSNeel Natu 				   seg->gpa, seg->gpa + seg->len,
532318224bbSNeel Natu 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
533318224bbSNeel Natu 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
534318224bbSNeel Natu 		    "%#lx/%ld could not be unwired: %d",
535318224bbSNeel Natu 		    vm_name(vm), seg->gpa, seg->len, rv));
536318224bbSNeel Natu 
537318224bbSNeel Natu 		seg->wired = FALSE;
538318224bbSNeel Natu 	}
539318224bbSNeel Natu }
540318224bbSNeel Natu 
541318224bbSNeel Natu static int
542318224bbSNeel Natu vm_gpa_wire(struct vm *vm)
543318224bbSNeel Natu {
544318224bbSNeel Natu 	int i, rv;
545318224bbSNeel Natu 	struct mem_seg *seg;
546318224bbSNeel Natu 
547318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
548318224bbSNeel Natu 		seg = &vm->mem_segs[i];
549318224bbSNeel Natu 		if (seg->wired)
550318224bbSNeel Natu 			continue;
551318224bbSNeel Natu 
552318224bbSNeel Natu 		/* XXX rlimits? */
553318224bbSNeel Natu 		rv = vm_map_wire(&vm->vmspace->vm_map,
554318224bbSNeel Natu 				 seg->gpa, seg->gpa + seg->len,
555318224bbSNeel Natu 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
556318224bbSNeel Natu 		if (rv != KERN_SUCCESS)
557318224bbSNeel Natu 			break;
558318224bbSNeel Natu 
559318224bbSNeel Natu 		seg->wired = TRUE;
560318224bbSNeel Natu 	}
561318224bbSNeel Natu 
562318224bbSNeel Natu 	if (i < vm->num_mem_segs) {
563318224bbSNeel Natu 		/*
564318224bbSNeel Natu 		 * Undo the wiring before returning an error.
565318224bbSNeel Natu 		 */
566318224bbSNeel Natu 		vm_gpa_unwire(vm);
567318224bbSNeel Natu 		return (EAGAIN);
568318224bbSNeel Natu 	}
569318224bbSNeel Natu 
570318224bbSNeel Natu 	return (0);
571318224bbSNeel Natu }
572318224bbSNeel Natu 
573318224bbSNeel Natu static void
574318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map)
575318224bbSNeel Natu {
576318224bbSNeel Natu 	int i, sz;
577318224bbSNeel Natu 	vm_paddr_t gpa, hpa;
578318224bbSNeel Natu 	struct mem_seg *seg;
579318224bbSNeel Natu 	void *vp, *cookie, *host_domain;
580318224bbSNeel Natu 
581318224bbSNeel Natu 	sz = PAGE_SIZE;
582318224bbSNeel Natu 	host_domain = iommu_host_domain();
583318224bbSNeel Natu 
584318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
585318224bbSNeel Natu 		seg = &vm->mem_segs[i];
586318224bbSNeel Natu 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
587318224bbSNeel Natu 		    vm_name(vm), seg->gpa, seg->len));
588318224bbSNeel Natu 
589318224bbSNeel Natu 		gpa = seg->gpa;
590318224bbSNeel Natu 		while (gpa < seg->gpa + seg->len) {
591318224bbSNeel Natu 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
592318224bbSNeel Natu 					 &cookie);
593318224bbSNeel Natu 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
594318224bbSNeel Natu 			    vm_name(vm), gpa));
595318224bbSNeel Natu 
596318224bbSNeel Natu 			vm_gpa_release(cookie);
597318224bbSNeel Natu 
598318224bbSNeel Natu 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
599318224bbSNeel Natu 			if (map) {
600318224bbSNeel Natu 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
601318224bbSNeel Natu 				iommu_remove_mapping(host_domain, hpa, sz);
602318224bbSNeel Natu 			} else {
603318224bbSNeel Natu 				iommu_remove_mapping(vm->iommu, gpa, sz);
604318224bbSNeel Natu 				iommu_create_mapping(host_domain, hpa, hpa, sz);
605318224bbSNeel Natu 			}
606318224bbSNeel Natu 
607318224bbSNeel Natu 			gpa += PAGE_SIZE;
608318224bbSNeel Natu 		}
609318224bbSNeel Natu 	}
610318224bbSNeel Natu 
611318224bbSNeel Natu 	/*
612318224bbSNeel Natu 	 * Invalidate the cached translations associated with the domain
613318224bbSNeel Natu 	 * from which pages were removed.
614318224bbSNeel Natu 	 */
615318224bbSNeel Natu 	if (map)
616318224bbSNeel Natu 		iommu_invalidate_tlb(host_domain);
617318224bbSNeel Natu 	else
618318224bbSNeel Natu 		iommu_invalidate_tlb(vm->iommu);
619318224bbSNeel Natu }
620318224bbSNeel Natu 
621318224bbSNeel Natu #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
622318224bbSNeel Natu #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
623318224bbSNeel Natu 
624318224bbSNeel Natu int
625318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
626318224bbSNeel Natu {
627318224bbSNeel Natu 	int error;
628318224bbSNeel Natu 
629318224bbSNeel Natu 	error = ppt_unassign_device(vm, bus, slot, func);
630318224bbSNeel Natu 	if (error)
631318224bbSNeel Natu 		return (error);
632318224bbSNeel Natu 
63351f45d01SNeel Natu 	if (ppt_assigned_devices(vm) == 0) {
634318224bbSNeel Natu 		vm_iommu_unmap(vm);
635318224bbSNeel Natu 		vm_gpa_unwire(vm);
636318224bbSNeel Natu 	}
637318224bbSNeel Natu 	return (0);
638318224bbSNeel Natu }
639318224bbSNeel Natu 
640318224bbSNeel Natu int
641318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
642318224bbSNeel Natu {
643318224bbSNeel Natu 	int error;
644318224bbSNeel Natu 	vm_paddr_t maxaddr;
645318224bbSNeel Natu 
646318224bbSNeel Natu 	/*
647318224bbSNeel Natu 	 * Virtual machines with pci passthru devices get special treatment:
648318224bbSNeel Natu 	 * - the guest physical memory is wired
649318224bbSNeel Natu 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
650318224bbSNeel Natu 	 *
651318224bbSNeel Natu 	 * We need to do this before the first pci passthru device is attached.
652318224bbSNeel Natu 	 */
65351f45d01SNeel Natu 	if (ppt_assigned_devices(vm) == 0) {
654318224bbSNeel Natu 		KASSERT(vm->iommu == NULL,
655318224bbSNeel Natu 		    ("vm_assign_pptdev: iommu must be NULL"));
656318224bbSNeel Natu 		maxaddr = vmm_mem_maxaddr();
657318224bbSNeel Natu 		vm->iommu = iommu_create_domain(maxaddr);
658318224bbSNeel Natu 
659318224bbSNeel Natu 		error = vm_gpa_wire(vm);
660318224bbSNeel Natu 		if (error)
661318224bbSNeel Natu 			return (error);
662318224bbSNeel Natu 
663318224bbSNeel Natu 		vm_iommu_map(vm);
664318224bbSNeel Natu 	}
665318224bbSNeel Natu 
666318224bbSNeel Natu 	error = ppt_assign_device(vm, bus, slot, func);
667318224bbSNeel Natu 	return (error);
668318224bbSNeel Natu }
669318224bbSNeel Natu 
670318224bbSNeel Natu void *
671318224bbSNeel Natu vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
672318224bbSNeel Natu 	    void **cookie)
673318224bbSNeel Natu {
674318224bbSNeel Natu 	int count, pageoff;
675318224bbSNeel Natu 	vm_page_t m;
676318224bbSNeel Natu 
677318224bbSNeel Natu 	pageoff = gpa & PAGE_MASK;
678318224bbSNeel Natu 	if (len > PAGE_SIZE - pageoff)
679318224bbSNeel Natu 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
680318224bbSNeel Natu 
681318224bbSNeel Natu 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
682318224bbSNeel Natu 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
683318224bbSNeel Natu 
684318224bbSNeel Natu 	if (count == 1) {
685318224bbSNeel Natu 		*cookie = m;
686318224bbSNeel Natu 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
687318224bbSNeel Natu 	} else {
688318224bbSNeel Natu 		*cookie = NULL;
689318224bbSNeel Natu 		return (NULL);
690318224bbSNeel Natu 	}
691318224bbSNeel Natu }
692318224bbSNeel Natu 
693318224bbSNeel Natu void
694318224bbSNeel Natu vm_gpa_release(void *cookie)
695318224bbSNeel Natu {
696318224bbSNeel Natu 	vm_page_t m = cookie;
697318224bbSNeel Natu 
698318224bbSNeel Natu 	vm_page_lock(m);
699318224bbSNeel Natu 	vm_page_unhold(m);
700318224bbSNeel Natu 	vm_page_unlock(m);
701366f6083SPeter Grehan }
702366f6083SPeter Grehan 
703366f6083SPeter Grehan int
704366f6083SPeter Grehan vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
705366f6083SPeter Grehan 		  struct vm_memory_segment *seg)
706366f6083SPeter Grehan {
707366f6083SPeter Grehan 	int i;
708366f6083SPeter Grehan 
709366f6083SPeter Grehan 	for (i = 0; i < vm->num_mem_segs; i++) {
710366f6083SPeter Grehan 		if (gpabase == vm->mem_segs[i].gpa) {
711318224bbSNeel Natu 			seg->gpa = vm->mem_segs[i].gpa;
712318224bbSNeel Natu 			seg->len = vm->mem_segs[i].len;
713318224bbSNeel Natu 			seg->wired = vm->mem_segs[i].wired;
714366f6083SPeter Grehan 			return (0);
715366f6083SPeter Grehan 		}
716366f6083SPeter Grehan 	}
717366f6083SPeter Grehan 	return (-1);
718366f6083SPeter Grehan }
719366f6083SPeter Grehan 
720366f6083SPeter Grehan int
721318224bbSNeel Natu vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
722318224bbSNeel Natu 	      vm_offset_t *offset, struct vm_object **object)
723318224bbSNeel Natu {
724318224bbSNeel Natu 	int i;
725318224bbSNeel Natu 	size_t seg_len;
726318224bbSNeel Natu 	vm_paddr_t seg_gpa;
727318224bbSNeel Natu 	vm_object_t seg_obj;
728318224bbSNeel Natu 
729318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
730318224bbSNeel Natu 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
731318224bbSNeel Natu 			continue;
732318224bbSNeel Natu 
733318224bbSNeel Natu 		seg_gpa = vm->mem_segs[i].gpa;
734318224bbSNeel Natu 		seg_len = vm->mem_segs[i].len;
735318224bbSNeel Natu 
736318224bbSNeel Natu 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
737318224bbSNeel Natu 			*offset = gpa - seg_gpa;
738318224bbSNeel Natu 			*object = seg_obj;
739318224bbSNeel Natu 			vm_object_reference(seg_obj);
740318224bbSNeel Natu 			return (0);
741318224bbSNeel Natu 		}
742318224bbSNeel Natu 	}
743318224bbSNeel Natu 
744318224bbSNeel Natu 	return (EINVAL);
745318224bbSNeel Natu }
746318224bbSNeel Natu 
747318224bbSNeel Natu int
748366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
749366f6083SPeter Grehan {
750366f6083SPeter Grehan 
751366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
752366f6083SPeter Grehan 		return (EINVAL);
753366f6083SPeter Grehan 
754366f6083SPeter Grehan 	if (reg >= VM_REG_LAST)
755366f6083SPeter Grehan 		return (EINVAL);
756366f6083SPeter Grehan 
757366f6083SPeter Grehan 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
758366f6083SPeter Grehan }
759366f6083SPeter Grehan 
760366f6083SPeter Grehan int
761366f6083SPeter Grehan vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
762366f6083SPeter Grehan {
763366f6083SPeter Grehan 
764366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
765366f6083SPeter Grehan 		return (EINVAL);
766366f6083SPeter Grehan 
767366f6083SPeter Grehan 	if (reg >= VM_REG_LAST)
768366f6083SPeter Grehan 		return (EINVAL);
769366f6083SPeter Grehan 
770366f6083SPeter Grehan 	return (VMSETREG(vm->cookie, vcpu, reg, val));
771366f6083SPeter Grehan }
772366f6083SPeter Grehan 
773366f6083SPeter Grehan static boolean_t
774366f6083SPeter Grehan is_descriptor_table(int reg)
775366f6083SPeter Grehan {
776366f6083SPeter Grehan 
777366f6083SPeter Grehan 	switch (reg) {
778366f6083SPeter Grehan 	case VM_REG_GUEST_IDTR:
779366f6083SPeter Grehan 	case VM_REG_GUEST_GDTR:
780366f6083SPeter Grehan 		return (TRUE);
781366f6083SPeter Grehan 	default:
782366f6083SPeter Grehan 		return (FALSE);
783366f6083SPeter Grehan 	}
784366f6083SPeter Grehan }
785366f6083SPeter Grehan 
786366f6083SPeter Grehan static boolean_t
787366f6083SPeter Grehan is_segment_register(int reg)
788366f6083SPeter Grehan {
789366f6083SPeter Grehan 
790366f6083SPeter Grehan 	switch (reg) {
791366f6083SPeter Grehan 	case VM_REG_GUEST_ES:
792366f6083SPeter Grehan 	case VM_REG_GUEST_CS:
793366f6083SPeter Grehan 	case VM_REG_GUEST_SS:
794366f6083SPeter Grehan 	case VM_REG_GUEST_DS:
795366f6083SPeter Grehan 	case VM_REG_GUEST_FS:
796366f6083SPeter Grehan 	case VM_REG_GUEST_GS:
797366f6083SPeter Grehan 	case VM_REG_GUEST_TR:
798366f6083SPeter Grehan 	case VM_REG_GUEST_LDTR:
799366f6083SPeter Grehan 		return (TRUE);
800366f6083SPeter Grehan 	default:
801366f6083SPeter Grehan 		return (FALSE);
802366f6083SPeter Grehan 	}
803366f6083SPeter Grehan }
804366f6083SPeter Grehan 
805366f6083SPeter Grehan int
806366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
807366f6083SPeter Grehan 		struct seg_desc *desc)
808366f6083SPeter Grehan {
809366f6083SPeter Grehan 
810366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
811366f6083SPeter Grehan 		return (EINVAL);
812366f6083SPeter Grehan 
813366f6083SPeter Grehan 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
814366f6083SPeter Grehan 		return (EINVAL);
815366f6083SPeter Grehan 
816366f6083SPeter Grehan 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
817366f6083SPeter Grehan }
818366f6083SPeter Grehan 
819366f6083SPeter Grehan int
820366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
821366f6083SPeter Grehan 		struct seg_desc *desc)
822366f6083SPeter Grehan {
823366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
824366f6083SPeter Grehan 		return (EINVAL);
825366f6083SPeter Grehan 
826366f6083SPeter Grehan 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
827366f6083SPeter Grehan 		return (EINVAL);
828366f6083SPeter Grehan 
829366f6083SPeter Grehan 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
830366f6083SPeter Grehan }
831366f6083SPeter Grehan 
832366f6083SPeter Grehan static void
833366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu)
834366f6083SPeter Grehan {
835366f6083SPeter Grehan 
83638f1b189SPeter Grehan 	/* flush host state to the pcb */
83738f1b189SPeter Grehan 	fpuexit(curthread);
838bd8572e0SNeel Natu 
839bd8572e0SNeel Natu 	/* restore guest FPU state */
840366f6083SPeter Grehan 	fpu_stop_emulating();
84138f1b189SPeter Grehan 	fpurestore(vcpu->guestfpu);
842bd8572e0SNeel Natu 
843abb023fbSJohn Baldwin 	/* restore guest XCR0 if XSAVE is enabled in the host */
844abb023fbSJohn Baldwin 	if (rcr4() & CR4_XSAVE)
845abb023fbSJohn Baldwin 		load_xcr(0, vcpu->guest_xcr0);
846abb023fbSJohn Baldwin 
847bd8572e0SNeel Natu 	/*
848bd8572e0SNeel Natu 	 * The FPU is now "dirty" with the guest's state so turn on emulation
849bd8572e0SNeel Natu 	 * to trap any access to the FPU by the host.
850bd8572e0SNeel Natu 	 */
851bd8572e0SNeel Natu 	fpu_start_emulating();
852366f6083SPeter Grehan }
853366f6083SPeter Grehan 
854366f6083SPeter Grehan static void
855366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu)
856366f6083SPeter Grehan {
857366f6083SPeter Grehan 
858bd8572e0SNeel Natu 	if ((rcr0() & CR0_TS) == 0)
859bd8572e0SNeel Natu 		panic("fpu emulation not enabled in host!");
860bd8572e0SNeel Natu 
861abb023fbSJohn Baldwin 	/* save guest XCR0 and restore host XCR0 */
862abb023fbSJohn Baldwin 	if (rcr4() & CR4_XSAVE) {
863abb023fbSJohn Baldwin 		vcpu->guest_xcr0 = rxcr(0);
864abb023fbSJohn Baldwin 		load_xcr(0, vmm_get_host_xcr0());
865abb023fbSJohn Baldwin 	}
866abb023fbSJohn Baldwin 
867bd8572e0SNeel Natu 	/* save guest FPU state */
868bd8572e0SNeel Natu 	fpu_stop_emulating();
86938f1b189SPeter Grehan 	fpusave(vcpu->guestfpu);
870366f6083SPeter Grehan 	fpu_start_emulating();
871366f6083SPeter Grehan }
872366f6083SPeter Grehan 
87361592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
874f76fc5d4SNeel Natu 
875318224bbSNeel Natu static int
876f80330a8SNeel Natu vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
877f80330a8SNeel Natu     bool from_idle)
878366f6083SPeter Grehan {
879318224bbSNeel Natu 	int error;
880366f6083SPeter Grehan 
881318224bbSNeel Natu 	vcpu_assert_locked(vcpu);
882366f6083SPeter Grehan 
883f76fc5d4SNeel Natu 	/*
884f80330a8SNeel Natu 	 * State transitions from the vmmdev_ioctl() must always begin from
885f80330a8SNeel Natu 	 * the VCPU_IDLE state. This guarantees that there is only a single
886f80330a8SNeel Natu 	 * ioctl() operating on a vcpu at any point.
887f80330a8SNeel Natu 	 */
888f80330a8SNeel Natu 	if (from_idle) {
889f80330a8SNeel Natu 		while (vcpu->state != VCPU_IDLE)
890f80330a8SNeel Natu 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
891f80330a8SNeel Natu 	} else {
892f80330a8SNeel Natu 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
893f80330a8SNeel Natu 		    "vcpu idle state"));
894f80330a8SNeel Natu 	}
895f80330a8SNeel Natu 
896ef39d7e9SNeel Natu 	if (vcpu->state == VCPU_RUNNING) {
897ef39d7e9SNeel Natu 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
898ef39d7e9SNeel Natu 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
899ef39d7e9SNeel Natu 	} else {
900ef39d7e9SNeel Natu 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
901ef39d7e9SNeel Natu 		    "vcpu that is not running", vcpu->hostcpu));
902ef39d7e9SNeel Natu 	}
903ef39d7e9SNeel Natu 
904f80330a8SNeel Natu 	/*
905318224bbSNeel Natu 	 * The following state transitions are allowed:
906318224bbSNeel Natu 	 * IDLE -> FROZEN -> IDLE
907318224bbSNeel Natu 	 * FROZEN -> RUNNING -> FROZEN
908318224bbSNeel Natu 	 * FROZEN -> SLEEPING -> FROZEN
909f76fc5d4SNeel Natu 	 */
910318224bbSNeel Natu 	switch (vcpu->state) {
911318224bbSNeel Natu 	case VCPU_IDLE:
912318224bbSNeel Natu 	case VCPU_RUNNING:
913318224bbSNeel Natu 	case VCPU_SLEEPING:
914318224bbSNeel Natu 		error = (newstate != VCPU_FROZEN);
915318224bbSNeel Natu 		break;
916318224bbSNeel Natu 	case VCPU_FROZEN:
917318224bbSNeel Natu 		error = (newstate == VCPU_FROZEN);
918318224bbSNeel Natu 		break;
919318224bbSNeel Natu 	default:
920318224bbSNeel Natu 		error = 1;
921318224bbSNeel Natu 		break;
922318224bbSNeel Natu 	}
923318224bbSNeel Natu 
924f80330a8SNeel Natu 	if (error)
925f80330a8SNeel Natu 		return (EBUSY);
926318224bbSNeel Natu 
927f80330a8SNeel Natu 	vcpu->state = newstate;
928ef39d7e9SNeel Natu 	if (newstate == VCPU_RUNNING)
929ef39d7e9SNeel Natu 		vcpu->hostcpu = curcpu;
930ef39d7e9SNeel Natu 	else
931ef39d7e9SNeel Natu 		vcpu->hostcpu = NOCPU;
932ef39d7e9SNeel Natu 
933f80330a8SNeel Natu 	if (newstate == VCPU_IDLE)
934f80330a8SNeel Natu 		wakeup(&vcpu->state);
935f80330a8SNeel Natu 
936f80330a8SNeel Natu 	return (0);
937318224bbSNeel Natu }
938318224bbSNeel Natu 
939318224bbSNeel Natu static void
940318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
941318224bbSNeel Natu {
942318224bbSNeel Natu 	int error;
943318224bbSNeel Natu 
944f80330a8SNeel Natu 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
945318224bbSNeel Natu 		panic("Error %d setting state to %d\n", error, newstate);
946318224bbSNeel Natu }
947318224bbSNeel Natu 
948318224bbSNeel Natu static void
949318224bbSNeel Natu vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
950318224bbSNeel Natu {
951318224bbSNeel Natu 	int error;
952318224bbSNeel Natu 
953f80330a8SNeel Natu 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
954318224bbSNeel Natu 		panic("Error %d setting state to %d", error, newstate);
955318224bbSNeel Natu }
956318224bbSNeel Natu 
9575b8a8cd1SNeel Natu static void
9585b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
9595b8a8cd1SNeel Natu {
9605b8a8cd1SNeel Natu 
9615b8a8cd1SNeel Natu 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
9625b8a8cd1SNeel Natu 
9635b8a8cd1SNeel Natu 	/*
9645b8a8cd1SNeel Natu 	 * Update 'rendezvous_func' and execute a write memory barrier to
9655b8a8cd1SNeel Natu 	 * ensure that it is visible across all host cpus. This is not needed
9665b8a8cd1SNeel Natu 	 * for correctness but it does ensure that all the vcpus will notice
9675b8a8cd1SNeel Natu 	 * that the rendezvous is requested immediately.
9685b8a8cd1SNeel Natu 	 */
9695b8a8cd1SNeel Natu 	vm->rendezvous_func = func;
9705b8a8cd1SNeel Natu 	wmb();
9715b8a8cd1SNeel Natu }
9725b8a8cd1SNeel Natu 
9735b8a8cd1SNeel Natu #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
9745b8a8cd1SNeel Natu 	do {								\
9755b8a8cd1SNeel Natu 		if (vcpuid >= 0)					\
9765b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, fmt);			\
9775b8a8cd1SNeel Natu 		else							\
9785b8a8cd1SNeel Natu 			VM_CTR0(vm, fmt);				\
9795b8a8cd1SNeel Natu 	} while (0)
9805b8a8cd1SNeel Natu 
9815b8a8cd1SNeel Natu static void
9825b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid)
9835b8a8cd1SNeel Natu {
9845b8a8cd1SNeel Natu 
9855b8a8cd1SNeel Natu 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
9865b8a8cd1SNeel Natu 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
9875b8a8cd1SNeel Natu 
9885b8a8cd1SNeel Natu 	mtx_lock(&vm->rendezvous_mtx);
9895b8a8cd1SNeel Natu 	while (vm->rendezvous_func != NULL) {
99022d822c6SNeel Natu 		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
99122d822c6SNeel Natu 		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
99222d822c6SNeel Natu 
9935b8a8cd1SNeel Natu 		if (vcpuid != -1 &&
99422d822c6SNeel Natu 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
99522d822c6SNeel Natu 		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
9965b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
9975b8a8cd1SNeel Natu 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
9985b8a8cd1SNeel Natu 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
9995b8a8cd1SNeel Natu 		}
10005b8a8cd1SNeel Natu 		if (CPU_CMP(&vm->rendezvous_req_cpus,
10015b8a8cd1SNeel Natu 		    &vm->rendezvous_done_cpus) == 0) {
10025b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
10035b8a8cd1SNeel Natu 			vm_set_rendezvous_func(vm, NULL);
10045b8a8cd1SNeel Natu 			wakeup(&vm->rendezvous_func);
10055b8a8cd1SNeel Natu 			break;
10065b8a8cd1SNeel Natu 		}
10075b8a8cd1SNeel Natu 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
10085b8a8cd1SNeel Natu 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
10095b8a8cd1SNeel Natu 		    "vmrndv", 0);
10105b8a8cd1SNeel Natu 	}
10115b8a8cd1SNeel Natu 	mtx_unlock(&vm->rendezvous_mtx);
10125b8a8cd1SNeel Natu }
10135b8a8cd1SNeel Natu 
1014318224bbSNeel Natu /*
1015318224bbSNeel Natu  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1016318224bbSNeel Natu  */
1017318224bbSNeel Natu static int
1018becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1019318224bbSNeel Natu {
1020318224bbSNeel Natu 	struct vcpu *vcpu;
1021c6a0cc2eSNeel Natu 	const char *wmesg;
1022e50ce2aaSNeel Natu 	int t, vcpu_halted, vm_halted;
1023e50ce2aaSNeel Natu 
1024e50ce2aaSNeel Natu 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1025318224bbSNeel Natu 
1026318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1027e50ce2aaSNeel Natu 	vcpu_halted = 0;
1028e50ce2aaSNeel Natu 	vm_halted = 0;
1029318224bbSNeel Natu 
1030f76fc5d4SNeel Natu 	vcpu_lock(vcpu);
1031c6a0cc2eSNeel Natu 	while (1) {
1032f76fc5d4SNeel Natu 		/*
1033f76fc5d4SNeel Natu 		 * Do a final check for pending NMI or interrupts before
1034c6a0cc2eSNeel Natu 		 * really putting this thread to sleep. Also check for
1035c6a0cc2eSNeel Natu 		 * software events that would cause this vcpu to wakeup.
1036f76fc5d4SNeel Natu 		 *
1037c6a0cc2eSNeel Natu 		 * These interrupts/events could have happened after the
1038c6a0cc2eSNeel Natu 		 * vcpu returned from VMRUN() and before it acquired the
1039c6a0cc2eSNeel Natu 		 * vcpu lock above.
1040f76fc5d4SNeel Natu 		 */
1041c6a0cc2eSNeel Natu 		if (vm->rendezvous_func != NULL || vm->suspend)
1042c6a0cc2eSNeel Natu 			break;
1043c6a0cc2eSNeel Natu 		if (vm_nmi_pending(vm, vcpuid))
1044c6a0cc2eSNeel Natu 			break;
1045c6a0cc2eSNeel Natu 		if (!intr_disabled) {
1046c6a0cc2eSNeel Natu 			if (vm_extint_pending(vm, vcpuid) ||
1047c6a0cc2eSNeel Natu 			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1048c6a0cc2eSNeel Natu 				break;
1049c6a0cc2eSNeel Natu 			}
1050c6a0cc2eSNeel Natu 		}
1051c6a0cc2eSNeel Natu 
1052e50ce2aaSNeel Natu 		/*
1053e50ce2aaSNeel Natu 		 * Some Linux guests implement "halt" by having all vcpus
1054e50ce2aaSNeel Natu 		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
1055e50ce2aaSNeel Natu 		 * track of the vcpus that have entered this state. When all
1056e50ce2aaSNeel Natu 		 * vcpus enter the halted state the virtual machine is halted.
1057e50ce2aaSNeel Natu 		 */
1058e50ce2aaSNeel Natu 		if (intr_disabled) {
1059c6a0cc2eSNeel Natu 			wmesg = "vmhalt";
1060e50ce2aaSNeel Natu 			VCPU_CTR0(vm, vcpuid, "Halted");
1061055fc2cbSNeel Natu 			if (!vcpu_halted && halt_detection_enabled) {
1062e50ce2aaSNeel Natu 				vcpu_halted = 1;
1063e50ce2aaSNeel Natu 				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1064e50ce2aaSNeel Natu 			}
1065e50ce2aaSNeel Natu 			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1066e50ce2aaSNeel Natu 				vm_halted = 1;
1067e50ce2aaSNeel Natu 				break;
1068e50ce2aaSNeel Natu 			}
1069e50ce2aaSNeel Natu 		} else {
1070e50ce2aaSNeel Natu 			wmesg = "vmidle";
1071e50ce2aaSNeel Natu 		}
1072c6a0cc2eSNeel Natu 
1073f76fc5d4SNeel Natu 		t = ticks;
1074318224bbSNeel Natu 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1075c6a0cc2eSNeel Natu 		msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
107622d822c6SNeel Natu 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
107722d822c6SNeel Natu 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
107822d822c6SNeel Natu 	}
1079e50ce2aaSNeel Natu 
1080e50ce2aaSNeel Natu 	if (vcpu_halted)
1081e50ce2aaSNeel Natu 		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1082e50ce2aaSNeel Natu 
108322d822c6SNeel Natu 	vcpu_unlock(vcpu);
108422d822c6SNeel Natu 
1085e50ce2aaSNeel Natu 	if (vm_halted)
1086e50ce2aaSNeel Natu 		vm_suspend(vm, VM_SUSPEND_HALT);
1087e50ce2aaSNeel Natu 
1088318224bbSNeel Natu 	return (0);
1089318224bbSNeel Natu }
1090318224bbSNeel Natu 
1091318224bbSNeel Natu static int
1092becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1093318224bbSNeel Natu {
1094318224bbSNeel Natu 	int rv, ftype;
1095318224bbSNeel Natu 	struct vm_map *map;
1096318224bbSNeel Natu 	struct vcpu *vcpu;
1097318224bbSNeel Natu 	struct vm_exit *vme;
1098318224bbSNeel Natu 
1099318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1100318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1101318224bbSNeel Natu 
1102318224bbSNeel Natu 	ftype = vme->u.paging.fault_type;
1103318224bbSNeel Natu 	KASSERT(ftype == VM_PROT_READ ||
1104318224bbSNeel Natu 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1105318224bbSNeel Natu 	    ("vm_handle_paging: invalid fault_type %d", ftype));
1106318224bbSNeel Natu 
1107318224bbSNeel Natu 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1108318224bbSNeel Natu 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1109318224bbSNeel Natu 		    vme->u.paging.gpa, ftype);
1110318224bbSNeel Natu 		if (rv == 0)
1111318224bbSNeel Natu 			goto done;
1112318224bbSNeel Natu 	}
1113318224bbSNeel Natu 
1114318224bbSNeel Natu 	map = &vm->vmspace->vm_map;
1115318224bbSNeel Natu 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1116318224bbSNeel Natu 
1117513c8d33SNeel Natu 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1118513c8d33SNeel Natu 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1119318224bbSNeel Natu 
1120318224bbSNeel Natu 	if (rv != KERN_SUCCESS)
1121318224bbSNeel Natu 		return (EFAULT);
1122318224bbSNeel Natu done:
1123318224bbSNeel Natu 	/* restart execution at the faulting instruction */
1124318224bbSNeel Natu 	vme->inst_length = 0;
1125318224bbSNeel Natu 
1126318224bbSNeel Natu 	return (0);
1127318224bbSNeel Natu }
1128318224bbSNeel Natu 
1129318224bbSNeel Natu static int
1130becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1131318224bbSNeel Natu {
1132318224bbSNeel Natu 	struct vie *vie;
1133318224bbSNeel Natu 	struct vcpu *vcpu;
1134318224bbSNeel Natu 	struct vm_exit *vme;
1135e4c8a13dSNeel Natu 	int cpl, error, inst_length;
1136318224bbSNeel Natu 	uint64_t rip, gla, gpa, cr3;
113700f3efe1SJohn Baldwin 	enum vie_cpu_mode cpu_mode;
113800f3efe1SJohn Baldwin 	enum vie_paging_mode paging_mode;
1139565bbb86SNeel Natu 	mem_region_read_t mread;
1140565bbb86SNeel Natu 	mem_region_write_t mwrite;
1141318224bbSNeel Natu 
1142318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1143318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1144318224bbSNeel Natu 
1145318224bbSNeel Natu 	rip = vme->rip;
1146318224bbSNeel Natu 	inst_length = vme->inst_length;
1147318224bbSNeel Natu 
1148318224bbSNeel Natu 	gla = vme->u.inst_emul.gla;
1149318224bbSNeel Natu 	gpa = vme->u.inst_emul.gpa;
1150318224bbSNeel Natu 	cr3 = vme->u.inst_emul.cr3;
1151e4c8a13dSNeel Natu 	cpl = vme->u.inst_emul.cpl;
115200f3efe1SJohn Baldwin 	cpu_mode = vme->u.inst_emul.cpu_mode;
115300f3efe1SJohn Baldwin 	paging_mode = vme->u.inst_emul.paging_mode;
1154318224bbSNeel Natu 	vie = &vme->u.inst_emul.vie;
1155318224bbSNeel Natu 
1156318224bbSNeel Natu 	vie_init(vie);
1157318224bbSNeel Natu 
1158318224bbSNeel Natu 	/* Fetch, decode and emulate the faulting instruction */
1159fd949af6SNeel Natu 	error = vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
1160fd949af6SNeel Natu 	    paging_mode, cpl, vie);
1161fd949af6SNeel Natu 	if (error == 1)
1162fd949af6SNeel Natu 		return (0);		/* Resume guest to handle page fault */
1163fd949af6SNeel Natu 	else if (error == -1)
1164318224bbSNeel Natu 		return (EFAULT);
1165fd949af6SNeel Natu 	else if (error != 0)
1166fd949af6SNeel Natu 		panic("%s: vmm_fetch_instruction error %d", __func__, error);
1167318224bbSNeel Natu 
116800f3efe1SJohn Baldwin 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
1169318224bbSNeel Natu 		return (EFAULT);
1170318224bbSNeel Natu 
117108e3ff32SNeel Natu 	/* return to userland unless this is an in-kernel emulated device */
1172565bbb86SNeel Natu 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1173565bbb86SNeel Natu 		mread = lapic_mmio_read;
1174565bbb86SNeel Natu 		mwrite = lapic_mmio_write;
1175565bbb86SNeel Natu 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1176565bbb86SNeel Natu 		mread = vioapic_mmio_read;
1177565bbb86SNeel Natu 		mwrite = vioapic_mmio_write;
117808e3ff32SNeel Natu 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
117908e3ff32SNeel Natu 		mread = vhpet_mmio_read;
118008e3ff32SNeel Natu 		mwrite = vhpet_mmio_write;
1181565bbb86SNeel Natu 	} else {
1182becd9849SNeel Natu 		*retu = true;
1183318224bbSNeel Natu 		return (0);
1184318224bbSNeel Natu 	}
1185318224bbSNeel Natu 
1186becd9849SNeel Natu 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1187becd9849SNeel Natu 	    retu);
1188318224bbSNeel Natu 
1189318224bbSNeel Natu 	return (error);
1190318224bbSNeel Natu }
1191318224bbSNeel Natu 
1192b15a09c0SNeel Natu static int
1193b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1194b15a09c0SNeel Natu {
1195b15a09c0SNeel Natu 	int i, done;
1196b15a09c0SNeel Natu 	struct vcpu *vcpu;
1197b15a09c0SNeel Natu 
1198b15a09c0SNeel Natu 	done = 0;
1199b15a09c0SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1200b15a09c0SNeel Natu 
1201b15a09c0SNeel Natu 	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1202b15a09c0SNeel Natu 
1203b15a09c0SNeel Natu 	/*
1204b15a09c0SNeel Natu 	 * Wait until all 'active_cpus' have suspended themselves.
1205b15a09c0SNeel Natu 	 *
1206b15a09c0SNeel Natu 	 * Since a VM may be suspended at any time including when one or
1207b15a09c0SNeel Natu 	 * more vcpus are doing a rendezvous we need to call the rendezvous
1208b15a09c0SNeel Natu 	 * handler while we are waiting to prevent a deadlock.
1209b15a09c0SNeel Natu 	 */
1210b15a09c0SNeel Natu 	vcpu_lock(vcpu);
1211b15a09c0SNeel Natu 	while (1) {
1212b15a09c0SNeel Natu 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1213b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1214b15a09c0SNeel Natu 			break;
1215b15a09c0SNeel Natu 		}
1216b15a09c0SNeel Natu 
1217b15a09c0SNeel Natu 		if (vm->rendezvous_func == NULL) {
1218b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1219b15a09c0SNeel Natu 			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1220b15a09c0SNeel Natu 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1221b15a09c0SNeel Natu 			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1222b15a09c0SNeel Natu 		} else {
1223b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1224b15a09c0SNeel Natu 			vcpu_unlock(vcpu);
1225b15a09c0SNeel Natu 			vm_handle_rendezvous(vm, vcpuid);
1226b15a09c0SNeel Natu 			vcpu_lock(vcpu);
1227b15a09c0SNeel Natu 		}
1228b15a09c0SNeel Natu 	}
1229b15a09c0SNeel Natu 	vcpu_unlock(vcpu);
1230b15a09c0SNeel Natu 
1231b15a09c0SNeel Natu 	/*
1232b15a09c0SNeel Natu 	 * Wakeup the other sleeping vcpus and return to userspace.
1233b15a09c0SNeel Natu 	 */
1234b15a09c0SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1235b15a09c0SNeel Natu 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1236b15a09c0SNeel Natu 			vcpu_notify_event(vm, i, false);
1237b15a09c0SNeel Natu 		}
1238b15a09c0SNeel Natu 	}
1239b15a09c0SNeel Natu 
1240b15a09c0SNeel Natu 	*retu = true;
1241b15a09c0SNeel Natu 	return (0);
1242b15a09c0SNeel Natu }
1243b15a09c0SNeel Natu 
1244b15a09c0SNeel Natu int
1245f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how)
1246b15a09c0SNeel Natu {
1247f0fdcfe2SNeel Natu 	int i;
1248b15a09c0SNeel Natu 
1249f0fdcfe2SNeel Natu 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1250f0fdcfe2SNeel Natu 		return (EINVAL);
1251f0fdcfe2SNeel Natu 
1252f0fdcfe2SNeel Natu 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1253f0fdcfe2SNeel Natu 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1254f0fdcfe2SNeel Natu 		    vm->suspend, how);
1255b15a09c0SNeel Natu 		return (EALREADY);
1256b15a09c0SNeel Natu 	}
1257f0fdcfe2SNeel Natu 
1258f0fdcfe2SNeel Natu 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1259f0fdcfe2SNeel Natu 
1260f0fdcfe2SNeel Natu 	/*
1261f0fdcfe2SNeel Natu 	 * Notify all active vcpus that they are now suspended.
1262f0fdcfe2SNeel Natu 	 */
1263f0fdcfe2SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1264f0fdcfe2SNeel Natu 		if (CPU_ISSET(i, &vm->active_cpus))
1265f0fdcfe2SNeel Natu 			vcpu_notify_event(vm, i, false);
1266f0fdcfe2SNeel Natu 	}
1267f0fdcfe2SNeel Natu 
1268f0fdcfe2SNeel Natu 	return (0);
1269f0fdcfe2SNeel Natu }
1270f0fdcfe2SNeel Natu 
1271f0fdcfe2SNeel Natu void
1272f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1273f0fdcfe2SNeel Natu {
1274f0fdcfe2SNeel Natu 	struct vm_exit *vmexit;
1275f0fdcfe2SNeel Natu 
1276f0fdcfe2SNeel Natu 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1277f0fdcfe2SNeel Natu 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1278f0fdcfe2SNeel Natu 
1279f0fdcfe2SNeel Natu 	vmexit = vm_exitinfo(vm, vcpuid);
1280f0fdcfe2SNeel Natu 	vmexit->rip = rip;
1281f0fdcfe2SNeel Natu 	vmexit->inst_length = 0;
1282f0fdcfe2SNeel Natu 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1283f0fdcfe2SNeel Natu 	vmexit->u.suspended.how = vm->suspend;
1284b15a09c0SNeel Natu }
1285b15a09c0SNeel Natu 
1286318224bbSNeel Natu int
1287318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun)
1288318224bbSNeel Natu {
1289318224bbSNeel Natu 	int error, vcpuid;
1290318224bbSNeel Natu 	struct vcpu *vcpu;
1291318224bbSNeel Natu 	struct pcb *pcb;
1292318224bbSNeel Natu 	uint64_t tscval, rip;
1293318224bbSNeel Natu 	struct vm_exit *vme;
1294becd9849SNeel Natu 	bool retu, intr_disabled;
1295318224bbSNeel Natu 	pmap_t pmap;
1296b15a09c0SNeel Natu 	void *rptr, *sptr;
1297318224bbSNeel Natu 
1298318224bbSNeel Natu 	vcpuid = vmrun->cpuid;
1299318224bbSNeel Natu 
1300318224bbSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1301318224bbSNeel Natu 		return (EINVAL);
1302318224bbSNeel Natu 
1303b15a09c0SNeel Natu 	rptr = &vm->rendezvous_func;
1304b15a09c0SNeel Natu 	sptr = &vm->suspend;
1305318224bbSNeel Natu 	pmap = vmspace_pmap(vm->vmspace);
1306318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1307318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1308318224bbSNeel Natu 	rip = vmrun->rip;
1309318224bbSNeel Natu restart:
1310318224bbSNeel Natu 	critical_enter();
1311318224bbSNeel Natu 
1312318224bbSNeel Natu 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1313318224bbSNeel Natu 	    ("vm_run: absurd pm_active"));
1314318224bbSNeel Natu 
1315318224bbSNeel Natu 	tscval = rdtsc();
1316318224bbSNeel Natu 
1317318224bbSNeel Natu 	pcb = PCPU_GET(curpcb);
1318318224bbSNeel Natu 	set_pcb_flags(pcb, PCB_FULL_IRET);
1319318224bbSNeel Natu 
1320318224bbSNeel Natu 	restore_guest_msrs(vm, vcpuid);
1321318224bbSNeel Natu 	restore_guest_fpustate(vcpu);
1322318224bbSNeel Natu 
1323318224bbSNeel Natu 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1324b15a09c0SNeel Natu 	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1325318224bbSNeel Natu 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1326318224bbSNeel Natu 
1327318224bbSNeel Natu 	save_guest_fpustate(vcpu);
1328318224bbSNeel Natu 	restore_host_msrs(vm, vcpuid);
1329318224bbSNeel Natu 
1330318224bbSNeel Natu 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1331318224bbSNeel Natu 
1332318224bbSNeel Natu 	critical_exit();
1333318224bbSNeel Natu 
1334318224bbSNeel Natu 	if (error == 0) {
1335becd9849SNeel Natu 		retu = false;
1336318224bbSNeel Natu 		switch (vme->exitcode) {
1337b15a09c0SNeel Natu 		case VM_EXITCODE_SUSPENDED:
1338b15a09c0SNeel Natu 			error = vm_handle_suspend(vm, vcpuid, &retu);
1339b15a09c0SNeel Natu 			break;
134030b94db8SNeel Natu 		case VM_EXITCODE_IOAPIC_EOI:
134130b94db8SNeel Natu 			vioapic_process_eoi(vm, vcpuid,
134230b94db8SNeel Natu 			    vme->u.ioapic_eoi.vector);
134330b94db8SNeel Natu 			break;
13445b8a8cd1SNeel Natu 		case VM_EXITCODE_RENDEZVOUS:
13455b8a8cd1SNeel Natu 			vm_handle_rendezvous(vm, vcpuid);
13465b8a8cd1SNeel Natu 			error = 0;
13475b8a8cd1SNeel Natu 			break;
1348318224bbSNeel Natu 		case VM_EXITCODE_HLT:
1349becd9849SNeel Natu 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
13501c052192SNeel Natu 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1351318224bbSNeel Natu 			break;
1352318224bbSNeel Natu 		case VM_EXITCODE_PAGING:
1353318224bbSNeel Natu 			error = vm_handle_paging(vm, vcpuid, &retu);
1354318224bbSNeel Natu 			break;
1355318224bbSNeel Natu 		case VM_EXITCODE_INST_EMUL:
1356318224bbSNeel Natu 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1357318224bbSNeel Natu 			break;
1358d17b5104SNeel Natu 		case VM_EXITCODE_INOUT:
1359d17b5104SNeel Natu 		case VM_EXITCODE_INOUT_STR:
1360d17b5104SNeel Natu 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
1361d17b5104SNeel Natu 			break;
1362318224bbSNeel Natu 		default:
1363becd9849SNeel Natu 			retu = true;	/* handled in userland */
1364318224bbSNeel Natu 			break;
1365318224bbSNeel Natu 		}
1366318224bbSNeel Natu 	}
1367318224bbSNeel Natu 
1368becd9849SNeel Natu 	if (error == 0 && retu == false) {
1369f76fc5d4SNeel Natu 		rip = vme->rip + vme->inst_length;
1370f76fc5d4SNeel Natu 		goto restart;
1371f76fc5d4SNeel Natu 	}
1372f76fc5d4SNeel Natu 
1373318224bbSNeel Natu 	/* copy the exit information */
1374318224bbSNeel Natu 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1375366f6083SPeter Grehan 	return (error);
1376366f6083SPeter Grehan }
1377366f6083SPeter Grehan 
1378366f6083SPeter Grehan int
1379dc506506SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1380366f6083SPeter Grehan {
1381dc506506SNeel Natu 	struct vcpu *vcpu;
1382dc506506SNeel Natu 
1383366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1384366f6083SPeter Grehan 		return (EINVAL);
1385366f6083SPeter Grehan 
1386dc506506SNeel Natu 	if (exception->vector < 0 || exception->vector >= 32)
1387366f6083SPeter Grehan 		return (EINVAL);
1388366f6083SPeter Grehan 
1389dc506506SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1390366f6083SPeter Grehan 
1391dc506506SNeel Natu 	if (vcpu->exception_pending) {
1392dc506506SNeel Natu 		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1393dc506506SNeel Natu 		    "pending exception %d", exception->vector,
1394dc506506SNeel Natu 		    vcpu->exception.vector);
1395dc506506SNeel Natu 		return (EBUSY);
1396dc506506SNeel Natu 	}
1397dc506506SNeel Natu 
1398dc506506SNeel Natu 	vcpu->exception_pending = 1;
1399dc506506SNeel Natu 	vcpu->exception = *exception;
1400dc506506SNeel Natu 	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1401dc506506SNeel Natu 	return (0);
1402dc506506SNeel Natu }
1403dc506506SNeel Natu 
1404dc506506SNeel Natu int
1405dc506506SNeel Natu vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
1406dc506506SNeel Natu {
1407dc506506SNeel Natu 	struct vcpu *vcpu;
1408dc506506SNeel Natu 	int pending;
1409dc506506SNeel Natu 
1410dc506506SNeel Natu 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1411dc506506SNeel Natu 
1412dc506506SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1413dc506506SNeel Natu 	pending = vcpu->exception_pending;
1414dc506506SNeel Natu 	if (pending) {
1415dc506506SNeel Natu 		vcpu->exception_pending = 0;
1416dc506506SNeel Natu 		*exception = vcpu->exception;
1417dc506506SNeel Natu 		VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
1418dc506506SNeel Natu 		    exception->vector);
1419dc506506SNeel Natu 	}
1420dc506506SNeel Natu 	return (pending);
1421dc506506SNeel Natu }
1422dc506506SNeel Natu 
1423dc506506SNeel Natu static void
1424dc506506SNeel Natu vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
1425dc506506SNeel Natu {
1426dc506506SNeel Natu 	struct vm_exit *vmexit;
1427dc506506SNeel Natu 	int error;
1428dc506506SNeel Natu 
1429dc506506SNeel Natu 	error = vm_inject_exception(vm, vcpuid, exception);
1430dc506506SNeel Natu 	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1431dc506506SNeel Natu 
1432dc506506SNeel Natu 	/*
1433dc506506SNeel Natu 	 * A fault-like exception allows the instruction to be restarted
1434dc506506SNeel Natu 	 * after the exception handler returns.
1435dc506506SNeel Natu 	 *
1436dc506506SNeel Natu 	 * By setting the inst_length to 0 we ensure that the instruction
1437dc506506SNeel Natu 	 * pointer remains at the faulting instruction.
1438dc506506SNeel Natu 	 */
1439dc506506SNeel Natu 	vmexit = vm_exitinfo(vm, vcpuid);
1440dc506506SNeel Natu 	vmexit->inst_length = 0;
1441dc506506SNeel Natu }
1442dc506506SNeel Natu 
1443dc506506SNeel Natu void
1444*37a723a5SNeel Natu vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
1445fd949af6SNeel Natu {
1446fd949af6SNeel Natu 	struct vm_exception pf = {
1447fd949af6SNeel Natu 		.vector = IDT_PF,
1448fd949af6SNeel Natu 		.error_code_valid = 1,
1449fd949af6SNeel Natu 		.error_code = error_code
1450fd949af6SNeel Natu 	};
1451*37a723a5SNeel Natu 	int error;
1452*37a723a5SNeel Natu 
1453*37a723a5SNeel Natu 	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
1454*37a723a5SNeel Natu 	    error_code, cr2);
1455*37a723a5SNeel Natu 
1456*37a723a5SNeel Natu 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
1457*37a723a5SNeel Natu 	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
1458fd949af6SNeel Natu 
1459fd949af6SNeel Natu 	vm_inject_fault(vm, vcpuid, &pf);
1460fd949af6SNeel Natu }
1461fd949af6SNeel Natu 
1462fd949af6SNeel Natu void
1463dc506506SNeel Natu vm_inject_gp(struct vm *vm, int vcpuid)
1464dc506506SNeel Natu {
1465dc506506SNeel Natu 	struct vm_exception gpf = {
1466dc506506SNeel Natu 		.vector = IDT_GP,
1467dc506506SNeel Natu 		.error_code_valid = 1,
1468dc506506SNeel Natu 		.error_code = 0
1469dc506506SNeel Natu 	};
1470dc506506SNeel Natu 
1471dc506506SNeel Natu 	vm_inject_fault(vm, vcpuid, &gpf);
1472dc506506SNeel Natu }
1473dc506506SNeel Natu 
1474dc506506SNeel Natu void
1475dc506506SNeel Natu vm_inject_ud(struct vm *vm, int vcpuid)
1476dc506506SNeel Natu {
1477dc506506SNeel Natu 	struct vm_exception udf = {
1478dc506506SNeel Natu 		.vector = IDT_UD,
1479dc506506SNeel Natu 		.error_code_valid = 0
1480dc506506SNeel Natu 	};
1481dc506506SNeel Natu 
1482dc506506SNeel Natu 	vm_inject_fault(vm, vcpuid, &udf);
1483366f6083SPeter Grehan }
1484366f6083SPeter Grehan 
148561592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1486366f6083SPeter Grehan 
1487f352ff0cSNeel Natu int
1488f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid)
1489f352ff0cSNeel Natu {
1490f352ff0cSNeel Natu 	struct vcpu *vcpu;
1491f352ff0cSNeel Natu 
1492f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1493366f6083SPeter Grehan 		return (EINVAL);
1494366f6083SPeter Grehan 
1495f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1496f352ff0cSNeel Natu 
1497f352ff0cSNeel Natu 	vcpu->nmi_pending = 1;
1498de5ea6b6SNeel Natu 	vcpu_notify_event(vm, vcpuid, false);
1499f352ff0cSNeel Natu 	return (0);
1500f352ff0cSNeel Natu }
1501f352ff0cSNeel Natu 
1502f352ff0cSNeel Natu int
1503f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid)
1504f352ff0cSNeel Natu {
1505f352ff0cSNeel Natu 	struct vcpu *vcpu;
1506f352ff0cSNeel Natu 
1507f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1508f352ff0cSNeel Natu 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1509f352ff0cSNeel Natu 
1510f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1511f352ff0cSNeel Natu 
1512f352ff0cSNeel Natu 	return (vcpu->nmi_pending);
1513f352ff0cSNeel Natu }
1514f352ff0cSNeel Natu 
1515f352ff0cSNeel Natu void
1516f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid)
1517f352ff0cSNeel Natu {
1518f352ff0cSNeel Natu 	struct vcpu *vcpu;
1519f352ff0cSNeel Natu 
1520f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1521f352ff0cSNeel Natu 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1522f352ff0cSNeel Natu 
1523f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1524f352ff0cSNeel Natu 
1525f352ff0cSNeel Natu 	if (vcpu->nmi_pending == 0)
1526f352ff0cSNeel Natu 		panic("vm_nmi_clear: inconsistent nmi_pending state");
1527f352ff0cSNeel Natu 
1528f352ff0cSNeel Natu 	vcpu->nmi_pending = 0;
1529f352ff0cSNeel Natu 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1530366f6083SPeter Grehan }
1531366f6083SPeter Grehan 
15320775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
15330775fbb4STycho Nightingale 
15340775fbb4STycho Nightingale int
15350775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid)
15360775fbb4STycho Nightingale {
15370775fbb4STycho Nightingale 	struct vcpu *vcpu;
15380775fbb4STycho Nightingale 
15390775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
15400775fbb4STycho Nightingale 		return (EINVAL);
15410775fbb4STycho Nightingale 
15420775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
15430775fbb4STycho Nightingale 
15440775fbb4STycho Nightingale 	vcpu->extint_pending = 1;
15450775fbb4STycho Nightingale 	vcpu_notify_event(vm, vcpuid, false);
15460775fbb4STycho Nightingale 	return (0);
15470775fbb4STycho Nightingale }
15480775fbb4STycho Nightingale 
15490775fbb4STycho Nightingale int
15500775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid)
15510775fbb4STycho Nightingale {
15520775fbb4STycho Nightingale 	struct vcpu *vcpu;
15530775fbb4STycho Nightingale 
15540775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
15550775fbb4STycho Nightingale 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
15560775fbb4STycho Nightingale 
15570775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
15580775fbb4STycho Nightingale 
15590775fbb4STycho Nightingale 	return (vcpu->extint_pending);
15600775fbb4STycho Nightingale }
15610775fbb4STycho Nightingale 
15620775fbb4STycho Nightingale void
15630775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid)
15640775fbb4STycho Nightingale {
15650775fbb4STycho Nightingale 	struct vcpu *vcpu;
15660775fbb4STycho Nightingale 
15670775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
15680775fbb4STycho Nightingale 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
15690775fbb4STycho Nightingale 
15700775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
15710775fbb4STycho Nightingale 
15720775fbb4STycho Nightingale 	if (vcpu->extint_pending == 0)
15730775fbb4STycho Nightingale 		panic("vm_extint_clear: inconsistent extint_pending state");
15740775fbb4STycho Nightingale 
15750775fbb4STycho Nightingale 	vcpu->extint_pending = 0;
15760775fbb4STycho Nightingale 	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
15770775fbb4STycho Nightingale }
15780775fbb4STycho Nightingale 
1579366f6083SPeter Grehan int
1580366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1581366f6083SPeter Grehan {
1582366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1583366f6083SPeter Grehan 		return (EINVAL);
1584366f6083SPeter Grehan 
1585366f6083SPeter Grehan 	if (type < 0 || type >= VM_CAP_MAX)
1586366f6083SPeter Grehan 		return (EINVAL);
1587366f6083SPeter Grehan 
1588366f6083SPeter Grehan 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1589366f6083SPeter Grehan }
1590366f6083SPeter Grehan 
1591366f6083SPeter Grehan int
1592366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val)
1593366f6083SPeter Grehan {
1594366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1595366f6083SPeter Grehan 		return (EINVAL);
1596366f6083SPeter Grehan 
1597366f6083SPeter Grehan 	if (type < 0 || type >= VM_CAP_MAX)
1598366f6083SPeter Grehan 		return (EINVAL);
1599366f6083SPeter Grehan 
1600366f6083SPeter Grehan 	return (VMSETCAP(vm->cookie, vcpu, type, val));
1601366f6083SPeter Grehan }
1602366f6083SPeter Grehan 
1603366f6083SPeter Grehan uint64_t *
1604366f6083SPeter Grehan vm_guest_msrs(struct vm *vm, int cpu)
1605366f6083SPeter Grehan {
1606366f6083SPeter Grehan 	return (vm->vcpu[cpu].guest_msrs);
1607366f6083SPeter Grehan }
1608366f6083SPeter Grehan 
1609366f6083SPeter Grehan struct vlapic *
1610366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu)
1611366f6083SPeter Grehan {
1612366f6083SPeter Grehan 	return (vm->vcpu[cpu].vlapic);
1613366f6083SPeter Grehan }
1614366f6083SPeter Grehan 
1615565bbb86SNeel Natu struct vioapic *
1616565bbb86SNeel Natu vm_ioapic(struct vm *vm)
1617565bbb86SNeel Natu {
1618565bbb86SNeel Natu 
1619565bbb86SNeel Natu 	return (vm->vioapic);
1620565bbb86SNeel Natu }
1621565bbb86SNeel Natu 
162208e3ff32SNeel Natu struct vhpet *
162308e3ff32SNeel Natu vm_hpet(struct vm *vm)
162408e3ff32SNeel Natu {
162508e3ff32SNeel Natu 
162608e3ff32SNeel Natu 	return (vm->vhpet);
162708e3ff32SNeel Natu }
162808e3ff32SNeel Natu 
1629366f6083SPeter Grehan boolean_t
1630366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func)
1631366f6083SPeter Grehan {
163207044a96SNeel Natu 	int found, i, n;
163307044a96SNeel Natu 	int b, s, f;
1634366f6083SPeter Grehan 	char *val, *cp, *cp2;
1635366f6083SPeter Grehan 
1636366f6083SPeter Grehan 	/*
163707044a96SNeel Natu 	 * XXX
163807044a96SNeel Natu 	 * The length of an environment variable is limited to 128 bytes which
163907044a96SNeel Natu 	 * puts an upper limit on the number of passthru devices that may be
164007044a96SNeel Natu 	 * specified using a single environment variable.
164107044a96SNeel Natu 	 *
164207044a96SNeel Natu 	 * Work around this by scanning multiple environment variable
164307044a96SNeel Natu 	 * names instead of a single one - yuck!
1644366f6083SPeter Grehan 	 */
164507044a96SNeel Natu 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
164607044a96SNeel Natu 
164707044a96SNeel Natu 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1648366f6083SPeter Grehan 	found = 0;
164907044a96SNeel Natu 	for (i = 0; names[i] != NULL && !found; i++) {
165007044a96SNeel Natu 		cp = val = getenv(names[i]);
1651366f6083SPeter Grehan 		while (cp != NULL && *cp != '\0') {
1652366f6083SPeter Grehan 			if ((cp2 = strchr(cp, ' ')) != NULL)
1653366f6083SPeter Grehan 				*cp2 = '\0';
1654366f6083SPeter Grehan 
1655366f6083SPeter Grehan 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1656366f6083SPeter Grehan 			if (n == 3 && bus == b && slot == s && func == f) {
1657366f6083SPeter Grehan 				found = 1;
1658366f6083SPeter Grehan 				break;
1659366f6083SPeter Grehan 			}
1660366f6083SPeter Grehan 
1661366f6083SPeter Grehan 			if (cp2 != NULL)
1662366f6083SPeter Grehan 				*cp2++ = ' ';
1663366f6083SPeter Grehan 
1664366f6083SPeter Grehan 			cp = cp2;
1665366f6083SPeter Grehan 		}
1666366f6083SPeter Grehan 		freeenv(val);
166707044a96SNeel Natu 	}
1668366f6083SPeter Grehan 	return (found);
1669366f6083SPeter Grehan }
1670366f6083SPeter Grehan 
1671366f6083SPeter Grehan void *
1672366f6083SPeter Grehan vm_iommu_domain(struct vm *vm)
1673366f6083SPeter Grehan {
1674366f6083SPeter Grehan 
1675366f6083SPeter Grehan 	return (vm->iommu);
1676366f6083SPeter Grehan }
1677366f6083SPeter Grehan 
167875dd3366SNeel Natu int
1679f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1680f80330a8SNeel Natu     bool from_idle)
1681366f6083SPeter Grehan {
168275dd3366SNeel Natu 	int error;
1683366f6083SPeter Grehan 	struct vcpu *vcpu;
1684366f6083SPeter Grehan 
1685366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1686366f6083SPeter Grehan 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1687366f6083SPeter Grehan 
1688366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpuid];
1689366f6083SPeter Grehan 
169075dd3366SNeel Natu 	vcpu_lock(vcpu);
1691f80330a8SNeel Natu 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
169275dd3366SNeel Natu 	vcpu_unlock(vcpu);
169375dd3366SNeel Natu 
169475dd3366SNeel Natu 	return (error);
169575dd3366SNeel Natu }
169675dd3366SNeel Natu 
169775dd3366SNeel Natu enum vcpu_state
1698d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1699366f6083SPeter Grehan {
1700366f6083SPeter Grehan 	struct vcpu *vcpu;
170175dd3366SNeel Natu 	enum vcpu_state state;
1702366f6083SPeter Grehan 
1703366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1704366f6083SPeter Grehan 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1705366f6083SPeter Grehan 
1706366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpuid];
1707366f6083SPeter Grehan 
170875dd3366SNeel Natu 	vcpu_lock(vcpu);
170975dd3366SNeel Natu 	state = vcpu->state;
1710d3c11f40SPeter Grehan 	if (hostcpu != NULL)
1711d3c11f40SPeter Grehan 		*hostcpu = vcpu->hostcpu;
171275dd3366SNeel Natu 	vcpu_unlock(vcpu);
1713366f6083SPeter Grehan 
171475dd3366SNeel Natu 	return (state);
1715366f6083SPeter Grehan }
1716366f6083SPeter Grehan 
1717366f6083SPeter Grehan void
1718366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid)
1719366f6083SPeter Grehan {
1720366f6083SPeter Grehan 
172122d822c6SNeel Natu 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
172222d822c6SNeel Natu 	    ("vm_activate_cpu: invalid vcpuid %d", vcpuid));
172322d822c6SNeel Natu 	KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus),
172422d822c6SNeel Natu 	    ("vm_activate_cpu: vcpuid %d is already active", vcpuid));
172522d822c6SNeel Natu 
172622d822c6SNeel Natu 	VCPU_CTR0(vm, vcpuid, "activated");
172722d822c6SNeel Natu 	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
1728366f6083SPeter Grehan }
1729366f6083SPeter Grehan 
1730a5615c90SPeter Grehan cpuset_t
1731366f6083SPeter Grehan vm_active_cpus(struct vm *vm)
1732366f6083SPeter Grehan {
1733366f6083SPeter Grehan 
1734366f6083SPeter Grehan 	return (vm->active_cpus);
1735366f6083SPeter Grehan }
1736366f6083SPeter Grehan 
1737366f6083SPeter Grehan void *
1738366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid)
1739366f6083SPeter Grehan {
1740366f6083SPeter Grehan 
1741366f6083SPeter Grehan 	return (vm->vcpu[vcpuid].stats);
1742366f6083SPeter Grehan }
1743e9027382SNeel Natu 
1744e9027382SNeel Natu int
1745e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1746e9027382SNeel Natu {
1747e9027382SNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1748e9027382SNeel Natu 		return (EINVAL);
1749e9027382SNeel Natu 
1750e9027382SNeel Natu 	*state = vm->vcpu[vcpuid].x2apic_state;
1751e9027382SNeel Natu 
1752e9027382SNeel Natu 	return (0);
1753e9027382SNeel Natu }
1754e9027382SNeel Natu 
1755e9027382SNeel Natu int
1756e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1757e9027382SNeel Natu {
1758e9027382SNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1759e9027382SNeel Natu 		return (EINVAL);
1760e9027382SNeel Natu 
17613f23d3caSNeel Natu 	if (state >= X2APIC_STATE_LAST)
1762e9027382SNeel Natu 		return (EINVAL);
1763e9027382SNeel Natu 
1764e9027382SNeel Natu 	vm->vcpu[vcpuid].x2apic_state = state;
1765e9027382SNeel Natu 
176673820fb0SNeel Natu 	vlapic_set_x2apic_state(vm, vcpuid, state);
176773820fb0SNeel Natu 
1768e9027382SNeel Natu 	return (0);
1769e9027382SNeel Natu }
177075dd3366SNeel Natu 
177122821874SNeel Natu /*
177222821874SNeel Natu  * This function is called to ensure that a vcpu "sees" a pending event
177322821874SNeel Natu  * as soon as possible:
177422821874SNeel Natu  * - If the vcpu thread is sleeping then it is woken up.
177522821874SNeel Natu  * - If the vcpu is running on a different host_cpu then an IPI will be directed
177622821874SNeel Natu  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
177722821874SNeel Natu  */
177875dd3366SNeel Natu void
1779de5ea6b6SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
178075dd3366SNeel Natu {
178175dd3366SNeel Natu 	int hostcpu;
178275dd3366SNeel Natu 	struct vcpu *vcpu;
178375dd3366SNeel Natu 
178475dd3366SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
178575dd3366SNeel Natu 
1786f76fc5d4SNeel Natu 	vcpu_lock(vcpu);
178775dd3366SNeel Natu 	hostcpu = vcpu->hostcpu;
1788ef39d7e9SNeel Natu 	if (vcpu->state == VCPU_RUNNING) {
1789ef39d7e9SNeel Natu 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1790de5ea6b6SNeel Natu 		if (hostcpu != curcpu) {
1791ef39d7e9SNeel Natu 			if (lapic_intr) {
1792add611fdSNeel Natu 				vlapic_post_intr(vcpu->vlapic, hostcpu,
1793add611fdSNeel Natu 				    vmm_ipinum);
1794ef39d7e9SNeel Natu 			} else {
179575dd3366SNeel Natu 				ipi_cpu(hostcpu, vmm_ipinum);
179675dd3366SNeel Natu 			}
1797ef39d7e9SNeel Natu 		} else {
1798ef39d7e9SNeel Natu 			/*
1799ef39d7e9SNeel Natu 			 * If the 'vcpu' is running on 'curcpu' then it must
1800ef39d7e9SNeel Natu 			 * be sending a notification to itself (e.g. SELF_IPI).
1801ef39d7e9SNeel Natu 			 * The pending event will be picked up when the vcpu
1802ef39d7e9SNeel Natu 			 * transitions back to guest context.
1803ef39d7e9SNeel Natu 			 */
1804ef39d7e9SNeel Natu 		}
1805ef39d7e9SNeel Natu 	} else {
1806ef39d7e9SNeel Natu 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1807ef39d7e9SNeel Natu 		    "with hostcpu %d", vcpu->state, hostcpu));
1808ef39d7e9SNeel Natu 		if (vcpu->state == VCPU_SLEEPING)
1809ef39d7e9SNeel Natu 			wakeup_one(vcpu);
1810de5ea6b6SNeel Natu 	}
1811f76fc5d4SNeel Natu 	vcpu_unlock(vcpu);
1812f76fc5d4SNeel Natu }
1813318224bbSNeel Natu 
1814318224bbSNeel Natu struct vmspace *
1815318224bbSNeel Natu vm_get_vmspace(struct vm *vm)
1816318224bbSNeel Natu {
1817318224bbSNeel Natu 
1818318224bbSNeel Natu 	return (vm->vmspace);
1819318224bbSNeel Natu }
1820565bbb86SNeel Natu 
1821565bbb86SNeel Natu int
1822565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid)
1823565bbb86SNeel Natu {
1824565bbb86SNeel Natu 	/*
1825565bbb86SNeel Natu 	 * XXX apic id is assumed to be numerically identical to vcpu id
1826565bbb86SNeel Natu 	 */
1827565bbb86SNeel Natu 	return (apicid);
1828565bbb86SNeel Natu }
18295b8a8cd1SNeel Natu 
18305b8a8cd1SNeel Natu void
18315b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
18325b8a8cd1SNeel Natu     vm_rendezvous_func_t func, void *arg)
18335b8a8cd1SNeel Natu {
1834970955e4SNeel Natu 	int i;
1835970955e4SNeel Natu 
18365b8a8cd1SNeel Natu 	/*
18375b8a8cd1SNeel Natu 	 * Enforce that this function is called without any locks
18385b8a8cd1SNeel Natu 	 */
18395b8a8cd1SNeel Natu 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
18405b8a8cd1SNeel Natu 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
18415b8a8cd1SNeel Natu 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
18425b8a8cd1SNeel Natu 
18435b8a8cd1SNeel Natu restart:
18445b8a8cd1SNeel Natu 	mtx_lock(&vm->rendezvous_mtx);
18455b8a8cd1SNeel Natu 	if (vm->rendezvous_func != NULL) {
18465b8a8cd1SNeel Natu 		/*
18475b8a8cd1SNeel Natu 		 * If a rendezvous is already in progress then we need to
18485b8a8cd1SNeel Natu 		 * call the rendezvous handler in case this 'vcpuid' is one
18495b8a8cd1SNeel Natu 		 * of the targets of the rendezvous.
18505b8a8cd1SNeel Natu 		 */
18515b8a8cd1SNeel Natu 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
18525b8a8cd1SNeel Natu 		mtx_unlock(&vm->rendezvous_mtx);
18535b8a8cd1SNeel Natu 		vm_handle_rendezvous(vm, vcpuid);
18545b8a8cd1SNeel Natu 		goto restart;
18555b8a8cd1SNeel Natu 	}
18565b8a8cd1SNeel Natu 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
18575b8a8cd1SNeel Natu 	    "rendezvous is still in progress"));
18585b8a8cd1SNeel Natu 
18595b8a8cd1SNeel Natu 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
18605b8a8cd1SNeel Natu 	vm->rendezvous_req_cpus = dest;
18615b8a8cd1SNeel Natu 	CPU_ZERO(&vm->rendezvous_done_cpus);
18625b8a8cd1SNeel Natu 	vm->rendezvous_arg = arg;
18635b8a8cd1SNeel Natu 	vm_set_rendezvous_func(vm, func);
18645b8a8cd1SNeel Natu 	mtx_unlock(&vm->rendezvous_mtx);
18655b8a8cd1SNeel Natu 
1866970955e4SNeel Natu 	/*
1867970955e4SNeel Natu 	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
1868970955e4SNeel Natu 	 * vcpus so they handle the rendezvous as soon as possible.
1869970955e4SNeel Natu 	 */
1870970955e4SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1871970955e4SNeel Natu 		if (CPU_ISSET(i, &dest))
1872970955e4SNeel Natu 			vcpu_notify_event(vm, i, false);
1873970955e4SNeel Natu 	}
1874970955e4SNeel Natu 
18755b8a8cd1SNeel Natu 	vm_handle_rendezvous(vm, vcpuid);
18765b8a8cd1SNeel Natu }
1877762fd208STycho Nightingale 
1878762fd208STycho Nightingale struct vatpic *
1879762fd208STycho Nightingale vm_atpic(struct vm *vm)
1880762fd208STycho Nightingale {
1881762fd208STycho Nightingale 	return (vm->vatpic);
1882762fd208STycho Nightingale }
1883e883c9bbSTycho Nightingale 
1884e883c9bbSTycho Nightingale struct vatpit *
1885e883c9bbSTycho Nightingale vm_atpit(struct vm *vm)
1886e883c9bbSTycho Nightingale {
1887e883c9bbSTycho Nightingale 	return (vm->vatpit);
1888e883c9bbSTycho Nightingale }
1889d17b5104SNeel Natu 
1890d17b5104SNeel Natu enum vm_reg_name
1891d17b5104SNeel Natu vm_segment_name(int seg)
1892d17b5104SNeel Natu {
1893d17b5104SNeel Natu 	static enum vm_reg_name seg_names[] = {
1894d17b5104SNeel Natu 		VM_REG_GUEST_ES,
1895d17b5104SNeel Natu 		VM_REG_GUEST_CS,
1896d17b5104SNeel Natu 		VM_REG_GUEST_SS,
1897d17b5104SNeel Natu 		VM_REG_GUEST_DS,
1898d17b5104SNeel Natu 		VM_REG_GUEST_FS,
1899d17b5104SNeel Natu 		VM_REG_GUEST_GS
1900d17b5104SNeel Natu 	};
1901d17b5104SNeel Natu 
1902d17b5104SNeel Natu 	KASSERT(seg >= 0 && seg < nitems(seg_names),
1903d17b5104SNeel Natu 	    ("%s: invalid segment encoding %d", __func__, seg));
1904d17b5104SNeel Natu 	return (seg_names[seg]);
1905d17b5104SNeel Natu }
1906