xref: /freebsd/sys/amd64/vmm/vmm.c (revision c6a0cc2e2152b87d2213a871ad863c7b787f935a)
1366f6083SPeter Grehan /*-
2366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3366f6083SPeter Grehan  * All rights reserved.
4366f6083SPeter Grehan  *
5366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7366f6083SPeter Grehan  * are met:
8366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13366f6083SPeter Grehan  *
14366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24366f6083SPeter Grehan  * SUCH DAMAGE.
25366f6083SPeter Grehan  *
26366f6083SPeter Grehan  * $FreeBSD$
27366f6083SPeter Grehan  */
28366f6083SPeter Grehan 
29366f6083SPeter Grehan #include <sys/cdefs.h>
30366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31366f6083SPeter Grehan 
32366f6083SPeter Grehan #include <sys/param.h>
3338f1b189SPeter Grehan #include <sys/systm.h>
34366f6083SPeter Grehan #include <sys/kernel.h>
35366f6083SPeter Grehan #include <sys/module.h>
36366f6083SPeter Grehan #include <sys/sysctl.h>
37366f6083SPeter Grehan #include <sys/malloc.h>
38366f6083SPeter Grehan #include <sys/pcpu.h>
39366f6083SPeter Grehan #include <sys/lock.h>
40366f6083SPeter Grehan #include <sys/mutex.h>
41366f6083SPeter Grehan #include <sys/proc.h>
42318224bbSNeel Natu #include <sys/rwlock.h>
43366f6083SPeter Grehan #include <sys/sched.h>
44366f6083SPeter Grehan #include <sys/smp.h>
45366f6083SPeter Grehan #include <sys/systm.h>
46366f6083SPeter Grehan 
47366f6083SPeter Grehan #include <vm/vm.h>
48318224bbSNeel Natu #include <vm/vm_object.h>
49318224bbSNeel Natu #include <vm/vm_page.h>
50318224bbSNeel Natu #include <vm/pmap.h>
51318224bbSNeel Natu #include <vm/vm_map.h>
52318224bbSNeel Natu #include <vm/vm_extern.h>
53318224bbSNeel Natu #include <vm/vm_param.h>
54366f6083SPeter Grehan 
5563e62d39SJohn Baldwin #include <machine/cpu.h>
56366f6083SPeter Grehan #include <machine/vm.h>
57366f6083SPeter Grehan #include <machine/pcb.h>
5875dd3366SNeel Natu #include <machine/smp.h>
591c052192SNeel Natu #include <x86/psl.h>
6034a6b2d6SJohn Baldwin #include <x86/apicreg.h>
61318224bbSNeel Natu #include <machine/vmparam.h>
62366f6083SPeter Grehan 
63366f6083SPeter Grehan #include <machine/vmm.h>
64565bbb86SNeel Natu #include <machine/vmm_dev.h>
65565bbb86SNeel Natu 
66318224bbSNeel Natu #include "vmm_ktr.h"
67b01c2033SNeel Natu #include "vmm_host.h"
68366f6083SPeter Grehan #include "vmm_mem.h"
69366f6083SPeter Grehan #include "vmm_util.h"
70762fd208STycho Nightingale #include "vatpic.h"
71e883c9bbSTycho Nightingale #include "vatpit.h"
7208e3ff32SNeel Natu #include "vhpet.h"
73565bbb86SNeel Natu #include "vioapic.h"
74366f6083SPeter Grehan #include "vlapic.h"
75366f6083SPeter Grehan #include "vmm_msr.h"
76366f6083SPeter Grehan #include "vmm_ipi.h"
77366f6083SPeter Grehan #include "vmm_stat.h"
78f76fc5d4SNeel Natu #include "vmm_lapic.h"
79366f6083SPeter Grehan 
80366f6083SPeter Grehan #include "io/ppt.h"
81366f6083SPeter Grehan #include "io/iommu.h"
82366f6083SPeter Grehan 
83366f6083SPeter Grehan struct vlapic;
84366f6083SPeter Grehan 
85366f6083SPeter Grehan struct vcpu {
86366f6083SPeter Grehan 	int		flags;
8775dd3366SNeel Natu 	enum vcpu_state	state;
8875dd3366SNeel Natu 	struct mtx	mtx;
89366f6083SPeter Grehan 	int		hostcpu;	/* host cpuid this vcpu last ran on */
90366f6083SPeter Grehan 	uint64_t	guest_msrs[VMM_MSR_NUM];
91366f6083SPeter Grehan 	struct vlapic	*vlapic;
92366f6083SPeter Grehan 	int		 vcpuid;
9338f1b189SPeter Grehan 	struct savefpu	*guestfpu;	/* guest fpu state */
94abb023fbSJohn Baldwin 	uint64_t	guest_xcr0;
95366f6083SPeter Grehan 	void		*stats;
9698ed632cSNeel Natu 	struct vm_exit	exitinfo;
97e9027382SNeel Natu 	enum x2apic_state x2apic_state;
98f352ff0cSNeel Natu 	int		nmi_pending;
990775fbb4STycho Nightingale 	int		extint_pending;
100dc506506SNeel Natu 	struct vm_exception exception;
101dc506506SNeel Natu 	int		exception_pending;
102366f6083SPeter Grehan };
103366f6083SPeter Grehan 
104f76fc5d4SNeel Natu #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
105f76fc5d4SNeel Natu #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
106f76fc5d4SNeel Natu #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
107318224bbSNeel Natu #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
10875dd3366SNeel Natu 
109318224bbSNeel Natu struct mem_seg {
110318224bbSNeel Natu 	vm_paddr_t	gpa;
111318224bbSNeel Natu 	size_t		len;
112318224bbSNeel Natu 	boolean_t	wired;
113318224bbSNeel Natu 	vm_object_t	object;
114318224bbSNeel Natu };
115366f6083SPeter Grehan #define	VM_MAX_MEMORY_SEGMENTS	2
116366f6083SPeter Grehan 
117366f6083SPeter Grehan struct vm {
118366f6083SPeter Grehan 	void		*cookie;	/* processor-specific data */
119366f6083SPeter Grehan 	void		*iommu;		/* iommu-specific data */
12008e3ff32SNeel Natu 	struct vhpet	*vhpet;		/* virtual HPET */
121565bbb86SNeel Natu 	struct vioapic	*vioapic;	/* virtual ioapic */
122762fd208STycho Nightingale 	struct vatpic	*vatpic;	/* virtual atpic */
123e883c9bbSTycho Nightingale 	struct vatpit	*vatpit;	/* virtual atpit */
124318224bbSNeel Natu 	struct vmspace	*vmspace;	/* guest's address space */
125366f6083SPeter Grehan 	struct vcpu	vcpu[VM_MAXCPU];
126366f6083SPeter Grehan 	int		num_mem_segs;
127318224bbSNeel Natu 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
128366f6083SPeter Grehan 	char		name[VM_MAX_NAMELEN];
129366f6083SPeter Grehan 
130366f6083SPeter Grehan 	/*
131a5615c90SPeter Grehan 	 * Set of active vcpus.
132366f6083SPeter Grehan 	 * An active vcpu is one that has been started implicitly (BSP) or
133366f6083SPeter Grehan 	 * explicitly (AP) by sending it a startup ipi.
134366f6083SPeter Grehan 	 */
13522d822c6SNeel Natu 	volatile cpuset_t active_cpus;
1365b8a8cd1SNeel Natu 
1375b8a8cd1SNeel Natu 	struct mtx	rendezvous_mtx;
1385b8a8cd1SNeel Natu 	cpuset_t	rendezvous_req_cpus;
1395b8a8cd1SNeel Natu 	cpuset_t	rendezvous_done_cpus;
1405b8a8cd1SNeel Natu 	void		*rendezvous_arg;
1415b8a8cd1SNeel Natu 	vm_rendezvous_func_t rendezvous_func;
142b15a09c0SNeel Natu 
143b15a09c0SNeel Natu 	int		suspend;
144b15a09c0SNeel Natu 	volatile cpuset_t suspended_cpus;
145366f6083SPeter Grehan };
146366f6083SPeter Grehan 
147d5408b1dSNeel Natu static int vmm_initialized;
148d5408b1dSNeel Natu 
149366f6083SPeter Grehan static struct vmm_ops *ops;
150add611fdSNeel Natu #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
151366f6083SPeter Grehan #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
15263e62d39SJohn Baldwin #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
153366f6083SPeter Grehan 
154318224bbSNeel Natu #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
155b15a09c0SNeel Natu #define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
156b15a09c0SNeel Natu 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
157366f6083SPeter Grehan #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
158318224bbSNeel Natu #define	VMSPACE_ALLOC(min, max) \
159318224bbSNeel Natu 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
160318224bbSNeel Natu #define	VMSPACE_FREE(vmspace) \
161318224bbSNeel Natu 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
162366f6083SPeter Grehan #define	VMGETREG(vmi, vcpu, num, retval)		\
163366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
164366f6083SPeter Grehan #define	VMSETREG(vmi, vcpu, num, val)		\
165366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
166366f6083SPeter Grehan #define	VMGETDESC(vmi, vcpu, num, desc)		\
167366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
168366f6083SPeter Grehan #define	VMSETDESC(vmi, vcpu, num, desc)		\
169366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
170366f6083SPeter Grehan #define	VMGETCAP(vmi, vcpu, num, retval)	\
171366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
172366f6083SPeter Grehan #define	VMSETCAP(vmi, vcpu, num, val)		\
173366f6083SPeter Grehan 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
174de5ea6b6SNeel Natu #define	VLAPIC_INIT(vmi, vcpu)			\
175de5ea6b6SNeel Natu 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
176de5ea6b6SNeel Natu #define	VLAPIC_CLEANUP(vmi, vlapic)		\
177de5ea6b6SNeel Natu 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
178366f6083SPeter Grehan 
179014a52f3SNeel Natu #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
180014a52f3SNeel Natu #define	fpu_stop_emulating()	clts()
181366f6083SPeter Grehan 
182366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm");
183366f6083SPeter Grehan CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
184366f6083SPeter Grehan 
185366f6083SPeter Grehan /* statistics */
18661592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
187366f6083SPeter Grehan 
188add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
189add611fdSNeel Natu 
190add611fdSNeel Natu static int vmm_ipinum;
191add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
192add611fdSNeel Natu     "IPI vector used for vcpu notifications");
193add611fdSNeel Natu 
194366f6083SPeter Grehan static void
195de5ea6b6SNeel Natu vcpu_cleanup(struct vm *vm, int i)
196366f6083SPeter Grehan {
197de5ea6b6SNeel Natu 	struct vcpu *vcpu = &vm->vcpu[i];
198de5ea6b6SNeel Natu 
199de5ea6b6SNeel Natu 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
200366f6083SPeter Grehan 	vmm_stat_free(vcpu->stats);
20138f1b189SPeter Grehan 	fpu_save_area_free(vcpu->guestfpu);
202366f6083SPeter Grehan }
203366f6083SPeter Grehan 
204366f6083SPeter Grehan static void
205366f6083SPeter Grehan vcpu_init(struct vm *vm, uint32_t vcpu_id)
206366f6083SPeter Grehan {
207366f6083SPeter Grehan 	struct vcpu *vcpu;
208366f6083SPeter Grehan 
209366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpu_id];
210366f6083SPeter Grehan 
21175dd3366SNeel Natu 	vcpu_lock_init(vcpu);
21275dd3366SNeel Natu 	vcpu->hostcpu = NOCPU;
213366f6083SPeter Grehan 	vcpu->vcpuid = vcpu_id;
214de5ea6b6SNeel Natu 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
21552e5c8a2SNeel Natu 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
216abb023fbSJohn Baldwin 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
21738f1b189SPeter Grehan 	vcpu->guestfpu = fpu_save_area_alloc();
21838f1b189SPeter Grehan 	fpu_save_area_reset(vcpu->guestfpu);
219366f6083SPeter Grehan 	vcpu->stats = vmm_stat_alloc();
220366f6083SPeter Grehan }
221366f6083SPeter Grehan 
22298ed632cSNeel Natu struct vm_exit *
22398ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid)
22498ed632cSNeel Natu {
22598ed632cSNeel Natu 	struct vcpu *vcpu;
22698ed632cSNeel Natu 
22798ed632cSNeel Natu 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
22898ed632cSNeel Natu 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
22998ed632cSNeel Natu 
23098ed632cSNeel Natu 	vcpu = &vm->vcpu[cpuid];
23198ed632cSNeel Natu 
23298ed632cSNeel Natu 	return (&vcpu->exitinfo);
23398ed632cSNeel Natu }
23498ed632cSNeel Natu 
23563e62d39SJohn Baldwin static void
23663e62d39SJohn Baldwin vmm_resume(void)
23763e62d39SJohn Baldwin {
23863e62d39SJohn Baldwin 	VMM_RESUME();
23963e62d39SJohn Baldwin }
24063e62d39SJohn Baldwin 
241366f6083SPeter Grehan static int
242366f6083SPeter Grehan vmm_init(void)
243366f6083SPeter Grehan {
244366f6083SPeter Grehan 	int error;
245366f6083SPeter Grehan 
246b01c2033SNeel Natu 	vmm_host_state_init();
247add611fdSNeel Natu 
248add611fdSNeel Natu 	vmm_ipinum = vmm_ipi_alloc();
249add611fdSNeel Natu 	if (vmm_ipinum == 0)
250add611fdSNeel Natu 		vmm_ipinum = IPI_AST;
251366f6083SPeter Grehan 
252366f6083SPeter Grehan 	error = vmm_mem_init();
253366f6083SPeter Grehan 	if (error)
254366f6083SPeter Grehan 		return (error);
255366f6083SPeter Grehan 
256366f6083SPeter Grehan 	if (vmm_is_intel())
257366f6083SPeter Grehan 		ops = &vmm_ops_intel;
258366f6083SPeter Grehan 	else if (vmm_is_amd())
259366f6083SPeter Grehan 		ops = &vmm_ops_amd;
260366f6083SPeter Grehan 	else
261366f6083SPeter Grehan 		return (ENXIO);
262366f6083SPeter Grehan 
263366f6083SPeter Grehan 	vmm_msr_init();
26463e62d39SJohn Baldwin 	vmm_resume_p = vmm_resume;
265366f6083SPeter Grehan 
266add611fdSNeel Natu 	return (VMM_INIT(vmm_ipinum));
267366f6083SPeter Grehan }
268366f6083SPeter Grehan 
269366f6083SPeter Grehan static int
270366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg)
271366f6083SPeter Grehan {
272366f6083SPeter Grehan 	int error;
273366f6083SPeter Grehan 
274366f6083SPeter Grehan 	switch (what) {
275366f6083SPeter Grehan 	case MOD_LOAD:
276366f6083SPeter Grehan 		vmmdev_init();
27751f45d01SNeel Natu 		if (ppt_avail_devices() > 0)
278366f6083SPeter Grehan 			iommu_init();
279366f6083SPeter Grehan 		error = vmm_init();
280d5408b1dSNeel Natu 		if (error == 0)
281d5408b1dSNeel Natu 			vmm_initialized = 1;
282366f6083SPeter Grehan 		break;
283366f6083SPeter Grehan 	case MOD_UNLOAD:
284cdc5b9e7SNeel Natu 		error = vmmdev_cleanup();
285cdc5b9e7SNeel Natu 		if (error == 0) {
28663e62d39SJohn Baldwin 			vmm_resume_p = NULL;
287366f6083SPeter Grehan 			iommu_cleanup();
288add611fdSNeel Natu 			if (vmm_ipinum != IPI_AST)
289add611fdSNeel Natu 				vmm_ipi_free(vmm_ipinum);
290366f6083SPeter Grehan 			error = VMM_CLEANUP();
29181ef6611SPeter Grehan 			/*
29281ef6611SPeter Grehan 			 * Something bad happened - prevent new
29381ef6611SPeter Grehan 			 * VMs from being created
29481ef6611SPeter Grehan 			 */
29581ef6611SPeter Grehan 			if (error)
296d5408b1dSNeel Natu 				vmm_initialized = 0;
29781ef6611SPeter Grehan 		}
298366f6083SPeter Grehan 		break;
299366f6083SPeter Grehan 	default:
300366f6083SPeter Grehan 		error = 0;
301366f6083SPeter Grehan 		break;
302366f6083SPeter Grehan 	}
303366f6083SPeter Grehan 	return (error);
304366f6083SPeter Grehan }
305366f6083SPeter Grehan 
306366f6083SPeter Grehan static moduledata_t vmm_kmod = {
307366f6083SPeter Grehan 	"vmm",
308366f6083SPeter Grehan 	vmm_handler,
309366f6083SPeter Grehan 	NULL
310366f6083SPeter Grehan };
311366f6083SPeter Grehan 
312366f6083SPeter Grehan /*
313e3f0800bSNeel Natu  * vmm initialization has the following dependencies:
314e3f0800bSNeel Natu  *
315e3f0800bSNeel Natu  * - iommu initialization must happen after the pci passthru driver has had
316e3f0800bSNeel Natu  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
317e3f0800bSNeel Natu  *
318e3f0800bSNeel Natu  * - VT-x initialization requires smp_rendezvous() and therefore must happen
319e3f0800bSNeel Natu  *   after SMP is fully functional (after SI_SUB_SMP).
320366f6083SPeter Grehan  */
321e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
322366f6083SPeter Grehan MODULE_VERSION(vmm, 1);
323366f6083SPeter Grehan 
324d5408b1dSNeel Natu int
325d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm)
326366f6083SPeter Grehan {
327366f6083SPeter Grehan 	int i;
328366f6083SPeter Grehan 	struct vm *vm;
329318224bbSNeel Natu 	struct vmspace *vmspace;
330366f6083SPeter Grehan 
331366f6083SPeter Grehan 	const int BSP = 0;
332366f6083SPeter Grehan 
333d5408b1dSNeel Natu 	/*
334d5408b1dSNeel Natu 	 * If vmm.ko could not be successfully initialized then don't attempt
335d5408b1dSNeel Natu 	 * to create the virtual machine.
336d5408b1dSNeel Natu 	 */
337d5408b1dSNeel Natu 	if (!vmm_initialized)
338d5408b1dSNeel Natu 		return (ENXIO);
339d5408b1dSNeel Natu 
340366f6083SPeter Grehan 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
341d5408b1dSNeel Natu 		return (EINVAL);
342366f6083SPeter Grehan 
343318224bbSNeel Natu 	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
344318224bbSNeel Natu 	if (vmspace == NULL)
345318224bbSNeel Natu 		return (ENOMEM);
346318224bbSNeel Natu 
347366f6083SPeter Grehan 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
348366f6083SPeter Grehan 	strcpy(vm->name, name);
34988c4b8d1SNeel Natu 	vm->vmspace = vmspace;
3505b8a8cd1SNeel Natu 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
351318224bbSNeel Natu 	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
352565bbb86SNeel Natu 	vm->vioapic = vioapic_init(vm);
35308e3ff32SNeel Natu 	vm->vhpet = vhpet_init(vm);
354762fd208STycho Nightingale 	vm->vatpic = vatpic_init(vm);
355e883c9bbSTycho Nightingale 	vm->vatpit = vatpit_init(vm);
356366f6083SPeter Grehan 
357366f6083SPeter Grehan 	for (i = 0; i < VM_MAXCPU; i++) {
358366f6083SPeter Grehan 		vcpu_init(vm, i);
359366f6083SPeter Grehan 		guest_msrs_init(vm, i);
360366f6083SPeter Grehan 	}
361366f6083SPeter Grehan 
362366f6083SPeter Grehan 	vm_activate_cpu(vm, BSP);
363366f6083SPeter Grehan 
364d5408b1dSNeel Natu 	*retvm = vm;
365d5408b1dSNeel Natu 	return (0);
366366f6083SPeter Grehan }
367366f6083SPeter Grehan 
368f7d51510SNeel Natu static void
369318224bbSNeel Natu vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
370f7d51510SNeel Natu {
3717ce04d0aSNeel Natu 
372318224bbSNeel Natu 	if (seg->object != NULL)
373318224bbSNeel Natu 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
374f7d51510SNeel Natu 
375318224bbSNeel Natu 	bzero(seg, sizeof(*seg));
376f7d51510SNeel Natu }
377f7d51510SNeel Natu 
378366f6083SPeter Grehan void
379366f6083SPeter Grehan vm_destroy(struct vm *vm)
380366f6083SPeter Grehan {
381366f6083SPeter Grehan 	int i;
382366f6083SPeter Grehan 
383366f6083SPeter Grehan 	ppt_unassign_all(vm);
384366f6083SPeter Grehan 
385318224bbSNeel Natu 	if (vm->iommu != NULL)
386318224bbSNeel Natu 		iommu_destroy_domain(vm->iommu);
387318224bbSNeel Natu 
388e883c9bbSTycho Nightingale 	vatpit_cleanup(vm->vatpit);
38908e3ff32SNeel Natu 	vhpet_cleanup(vm->vhpet);
390762fd208STycho Nightingale 	vatpic_cleanup(vm->vatpic);
39108e3ff32SNeel Natu 	vioapic_cleanup(vm->vioapic);
39208e3ff32SNeel Natu 
393366f6083SPeter Grehan 	for (i = 0; i < vm->num_mem_segs; i++)
394f7d51510SNeel Natu 		vm_free_mem_seg(vm, &vm->mem_segs[i]);
395f7d51510SNeel Natu 
396f7d51510SNeel Natu 	vm->num_mem_segs = 0;
397366f6083SPeter Grehan 
398366f6083SPeter Grehan 	for (i = 0; i < VM_MAXCPU; i++)
399de5ea6b6SNeel Natu 		vcpu_cleanup(vm, i);
400366f6083SPeter Grehan 
401318224bbSNeel Natu 	VMSPACE_FREE(vm->vmspace);
402366f6083SPeter Grehan 
403366f6083SPeter Grehan 	VMCLEANUP(vm->cookie);
404366f6083SPeter Grehan 
405366f6083SPeter Grehan 	free(vm, M_VM);
406366f6083SPeter Grehan }
407366f6083SPeter Grehan 
408366f6083SPeter Grehan const char *
409366f6083SPeter Grehan vm_name(struct vm *vm)
410366f6083SPeter Grehan {
411366f6083SPeter Grehan 	return (vm->name);
412366f6083SPeter Grehan }
413366f6083SPeter Grehan 
414366f6083SPeter Grehan int
415366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
416366f6083SPeter Grehan {
417318224bbSNeel Natu 	vm_object_t obj;
418366f6083SPeter Grehan 
419318224bbSNeel Natu 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
420318224bbSNeel Natu 		return (ENOMEM);
421318224bbSNeel Natu 	else
422318224bbSNeel Natu 		return (0);
423366f6083SPeter Grehan }
424366f6083SPeter Grehan 
425366f6083SPeter Grehan int
426366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
427366f6083SPeter Grehan {
428366f6083SPeter Grehan 
429318224bbSNeel Natu 	vmm_mmio_free(vm->vmspace, gpa, len);
430318224bbSNeel Natu 	return (0);
431366f6083SPeter Grehan }
432366f6083SPeter Grehan 
433318224bbSNeel Natu boolean_t
434318224bbSNeel Natu vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
435366f6083SPeter Grehan {
436341f19c9SNeel Natu 	int i;
437341f19c9SNeel Natu 	vm_paddr_t gpabase, gpalimit;
438341f19c9SNeel Natu 
439341f19c9SNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
440341f19c9SNeel Natu 		gpabase = vm->mem_segs[i].gpa;
441341f19c9SNeel Natu 		gpalimit = gpabase + vm->mem_segs[i].len;
442341f19c9SNeel Natu 		if (gpa >= gpabase && gpa < gpalimit)
443318224bbSNeel Natu 			return (TRUE);		/* 'gpa' is regular memory */
444341f19c9SNeel Natu 	}
445341f19c9SNeel Natu 
446318224bbSNeel Natu 	if (ppt_is_mmio(vm, gpa))
447318224bbSNeel Natu 		return (TRUE);			/* 'gpa' is pci passthru mmio */
448318224bbSNeel Natu 
449318224bbSNeel Natu 	return (FALSE);
450341f19c9SNeel Natu }
451341f19c9SNeel Natu 
452341f19c9SNeel Natu int
453341f19c9SNeel Natu vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
454341f19c9SNeel Natu {
455318224bbSNeel Natu 	int available, allocated;
456318224bbSNeel Natu 	struct mem_seg *seg;
457318224bbSNeel Natu 	vm_object_t object;
458318224bbSNeel Natu 	vm_paddr_t g;
459366f6083SPeter Grehan 
460341f19c9SNeel Natu 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
461341f19c9SNeel Natu 		return (EINVAL);
462341f19c9SNeel Natu 
463341f19c9SNeel Natu 	available = allocated = 0;
464341f19c9SNeel Natu 	g = gpa;
465341f19c9SNeel Natu 	while (g < gpa + len) {
466318224bbSNeel Natu 		if (vm_mem_allocated(vm, g))
467341f19c9SNeel Natu 			allocated++;
468318224bbSNeel Natu 		else
469318224bbSNeel Natu 			available++;
470341f19c9SNeel Natu 
471341f19c9SNeel Natu 		g += PAGE_SIZE;
472341f19c9SNeel Natu 	}
473341f19c9SNeel Natu 
474366f6083SPeter Grehan 	/*
475341f19c9SNeel Natu 	 * If there are some allocated and some available pages in the address
476341f19c9SNeel Natu 	 * range then it is an error.
477366f6083SPeter Grehan 	 */
478341f19c9SNeel Natu 	if (allocated && available)
479341f19c9SNeel Natu 		return (EINVAL);
480341f19c9SNeel Natu 
481341f19c9SNeel Natu 	/*
482341f19c9SNeel Natu 	 * If the entire address range being requested has already been
483341f19c9SNeel Natu 	 * allocated then there isn't anything more to do.
484341f19c9SNeel Natu 	 */
485341f19c9SNeel Natu 	if (allocated && available == 0)
486341f19c9SNeel Natu 		return (0);
487366f6083SPeter Grehan 
488366f6083SPeter Grehan 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
489366f6083SPeter Grehan 		return (E2BIG);
490366f6083SPeter Grehan 
491f7d51510SNeel Natu 	seg = &vm->mem_segs[vm->num_mem_segs];
492366f6083SPeter Grehan 
493318224bbSNeel Natu 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
494318224bbSNeel Natu 		return (ENOMEM);
495318224bbSNeel Natu 
496f7d51510SNeel Natu 	seg->gpa = gpa;
497318224bbSNeel Natu 	seg->len = len;
498318224bbSNeel Natu 	seg->object = object;
499318224bbSNeel Natu 	seg->wired = FALSE;
5007ce04d0aSNeel Natu 
501366f6083SPeter Grehan 	vm->num_mem_segs++;
502341f19c9SNeel Natu 
503366f6083SPeter Grehan 	return (0);
504366f6083SPeter Grehan }
505366f6083SPeter Grehan 
506318224bbSNeel Natu static void
507318224bbSNeel Natu vm_gpa_unwire(struct vm *vm)
508366f6083SPeter Grehan {
509318224bbSNeel Natu 	int i, rv;
510318224bbSNeel Natu 	struct mem_seg *seg;
5114db4fb2cSNeel Natu 
512318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
513318224bbSNeel Natu 		seg = &vm->mem_segs[i];
514318224bbSNeel Natu 		if (!seg->wired)
515318224bbSNeel Natu 			continue;
516366f6083SPeter Grehan 
517318224bbSNeel Natu 		rv = vm_map_unwire(&vm->vmspace->vm_map,
518318224bbSNeel Natu 				   seg->gpa, seg->gpa + seg->len,
519318224bbSNeel Natu 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
520318224bbSNeel Natu 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
521318224bbSNeel Natu 		    "%#lx/%ld could not be unwired: %d",
522318224bbSNeel Natu 		    vm_name(vm), seg->gpa, seg->len, rv));
523318224bbSNeel Natu 
524318224bbSNeel Natu 		seg->wired = FALSE;
525318224bbSNeel Natu 	}
526318224bbSNeel Natu }
527318224bbSNeel Natu 
528318224bbSNeel Natu static int
529318224bbSNeel Natu vm_gpa_wire(struct vm *vm)
530318224bbSNeel Natu {
531318224bbSNeel Natu 	int i, rv;
532318224bbSNeel Natu 	struct mem_seg *seg;
533318224bbSNeel Natu 
534318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
535318224bbSNeel Natu 		seg = &vm->mem_segs[i];
536318224bbSNeel Natu 		if (seg->wired)
537318224bbSNeel Natu 			continue;
538318224bbSNeel Natu 
539318224bbSNeel Natu 		/* XXX rlimits? */
540318224bbSNeel Natu 		rv = vm_map_wire(&vm->vmspace->vm_map,
541318224bbSNeel Natu 				 seg->gpa, seg->gpa + seg->len,
542318224bbSNeel Natu 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
543318224bbSNeel Natu 		if (rv != KERN_SUCCESS)
544318224bbSNeel Natu 			break;
545318224bbSNeel Natu 
546318224bbSNeel Natu 		seg->wired = TRUE;
547318224bbSNeel Natu 	}
548318224bbSNeel Natu 
549318224bbSNeel Natu 	if (i < vm->num_mem_segs) {
550318224bbSNeel Natu 		/*
551318224bbSNeel Natu 		 * Undo the wiring before returning an error.
552318224bbSNeel Natu 		 */
553318224bbSNeel Natu 		vm_gpa_unwire(vm);
554318224bbSNeel Natu 		return (EAGAIN);
555318224bbSNeel Natu 	}
556318224bbSNeel Natu 
557318224bbSNeel Natu 	return (0);
558318224bbSNeel Natu }
559318224bbSNeel Natu 
560318224bbSNeel Natu static void
561318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map)
562318224bbSNeel Natu {
563318224bbSNeel Natu 	int i, sz;
564318224bbSNeel Natu 	vm_paddr_t gpa, hpa;
565318224bbSNeel Natu 	struct mem_seg *seg;
566318224bbSNeel Natu 	void *vp, *cookie, *host_domain;
567318224bbSNeel Natu 
568318224bbSNeel Natu 	sz = PAGE_SIZE;
569318224bbSNeel Natu 	host_domain = iommu_host_domain();
570318224bbSNeel Natu 
571318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
572318224bbSNeel Natu 		seg = &vm->mem_segs[i];
573318224bbSNeel Natu 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
574318224bbSNeel Natu 		    vm_name(vm), seg->gpa, seg->len));
575318224bbSNeel Natu 
576318224bbSNeel Natu 		gpa = seg->gpa;
577318224bbSNeel Natu 		while (gpa < seg->gpa + seg->len) {
578318224bbSNeel Natu 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
579318224bbSNeel Natu 					 &cookie);
580318224bbSNeel Natu 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
581318224bbSNeel Natu 			    vm_name(vm), gpa));
582318224bbSNeel Natu 
583318224bbSNeel Natu 			vm_gpa_release(cookie);
584318224bbSNeel Natu 
585318224bbSNeel Natu 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
586318224bbSNeel Natu 			if (map) {
587318224bbSNeel Natu 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
588318224bbSNeel Natu 				iommu_remove_mapping(host_domain, hpa, sz);
589318224bbSNeel Natu 			} else {
590318224bbSNeel Natu 				iommu_remove_mapping(vm->iommu, gpa, sz);
591318224bbSNeel Natu 				iommu_create_mapping(host_domain, hpa, hpa, sz);
592318224bbSNeel Natu 			}
593318224bbSNeel Natu 
594318224bbSNeel Natu 			gpa += PAGE_SIZE;
595318224bbSNeel Natu 		}
596318224bbSNeel Natu 	}
597318224bbSNeel Natu 
598318224bbSNeel Natu 	/*
599318224bbSNeel Natu 	 * Invalidate the cached translations associated with the domain
600318224bbSNeel Natu 	 * from which pages were removed.
601318224bbSNeel Natu 	 */
602318224bbSNeel Natu 	if (map)
603318224bbSNeel Natu 		iommu_invalidate_tlb(host_domain);
604318224bbSNeel Natu 	else
605318224bbSNeel Natu 		iommu_invalidate_tlb(vm->iommu);
606318224bbSNeel Natu }
607318224bbSNeel Natu 
608318224bbSNeel Natu #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
609318224bbSNeel Natu #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
610318224bbSNeel Natu 
611318224bbSNeel Natu int
612318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
613318224bbSNeel Natu {
614318224bbSNeel Natu 	int error;
615318224bbSNeel Natu 
616318224bbSNeel Natu 	error = ppt_unassign_device(vm, bus, slot, func);
617318224bbSNeel Natu 	if (error)
618318224bbSNeel Natu 		return (error);
619318224bbSNeel Natu 
62051f45d01SNeel Natu 	if (ppt_assigned_devices(vm) == 0) {
621318224bbSNeel Natu 		vm_iommu_unmap(vm);
622318224bbSNeel Natu 		vm_gpa_unwire(vm);
623318224bbSNeel Natu 	}
624318224bbSNeel Natu 	return (0);
625318224bbSNeel Natu }
626318224bbSNeel Natu 
627318224bbSNeel Natu int
628318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
629318224bbSNeel Natu {
630318224bbSNeel Natu 	int error;
631318224bbSNeel Natu 	vm_paddr_t maxaddr;
632318224bbSNeel Natu 
633318224bbSNeel Natu 	/*
634318224bbSNeel Natu 	 * Virtual machines with pci passthru devices get special treatment:
635318224bbSNeel Natu 	 * - the guest physical memory is wired
636318224bbSNeel Natu 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
637318224bbSNeel Natu 	 *
638318224bbSNeel Natu 	 * We need to do this before the first pci passthru device is attached.
639318224bbSNeel Natu 	 */
64051f45d01SNeel Natu 	if (ppt_assigned_devices(vm) == 0) {
641318224bbSNeel Natu 		KASSERT(vm->iommu == NULL,
642318224bbSNeel Natu 		    ("vm_assign_pptdev: iommu must be NULL"));
643318224bbSNeel Natu 		maxaddr = vmm_mem_maxaddr();
644318224bbSNeel Natu 		vm->iommu = iommu_create_domain(maxaddr);
645318224bbSNeel Natu 
646318224bbSNeel Natu 		error = vm_gpa_wire(vm);
647318224bbSNeel Natu 		if (error)
648318224bbSNeel Natu 			return (error);
649318224bbSNeel Natu 
650318224bbSNeel Natu 		vm_iommu_map(vm);
651318224bbSNeel Natu 	}
652318224bbSNeel Natu 
653318224bbSNeel Natu 	error = ppt_assign_device(vm, bus, slot, func);
654318224bbSNeel Natu 	return (error);
655318224bbSNeel Natu }
656318224bbSNeel Natu 
657318224bbSNeel Natu void *
658318224bbSNeel Natu vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
659318224bbSNeel Natu 	    void **cookie)
660318224bbSNeel Natu {
661318224bbSNeel Natu 	int count, pageoff;
662318224bbSNeel Natu 	vm_page_t m;
663318224bbSNeel Natu 
664318224bbSNeel Natu 	pageoff = gpa & PAGE_MASK;
665318224bbSNeel Natu 	if (len > PAGE_SIZE - pageoff)
666318224bbSNeel Natu 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
667318224bbSNeel Natu 
668318224bbSNeel Natu 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
669318224bbSNeel Natu 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
670318224bbSNeel Natu 
671318224bbSNeel Natu 	if (count == 1) {
672318224bbSNeel Natu 		*cookie = m;
673318224bbSNeel Natu 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
674318224bbSNeel Natu 	} else {
675318224bbSNeel Natu 		*cookie = NULL;
676318224bbSNeel Natu 		return (NULL);
677318224bbSNeel Natu 	}
678318224bbSNeel Natu }
679318224bbSNeel Natu 
680318224bbSNeel Natu void
681318224bbSNeel Natu vm_gpa_release(void *cookie)
682318224bbSNeel Natu {
683318224bbSNeel Natu 	vm_page_t m = cookie;
684318224bbSNeel Natu 
685318224bbSNeel Natu 	vm_page_lock(m);
686318224bbSNeel Natu 	vm_page_unhold(m);
687318224bbSNeel Natu 	vm_page_unlock(m);
688366f6083SPeter Grehan }
689366f6083SPeter Grehan 
690366f6083SPeter Grehan int
691366f6083SPeter Grehan vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
692366f6083SPeter Grehan 		  struct vm_memory_segment *seg)
693366f6083SPeter Grehan {
694366f6083SPeter Grehan 	int i;
695366f6083SPeter Grehan 
696366f6083SPeter Grehan 	for (i = 0; i < vm->num_mem_segs; i++) {
697366f6083SPeter Grehan 		if (gpabase == vm->mem_segs[i].gpa) {
698318224bbSNeel Natu 			seg->gpa = vm->mem_segs[i].gpa;
699318224bbSNeel Natu 			seg->len = vm->mem_segs[i].len;
700318224bbSNeel Natu 			seg->wired = vm->mem_segs[i].wired;
701366f6083SPeter Grehan 			return (0);
702366f6083SPeter Grehan 		}
703366f6083SPeter Grehan 	}
704366f6083SPeter Grehan 	return (-1);
705366f6083SPeter Grehan }
706366f6083SPeter Grehan 
707366f6083SPeter Grehan int
708318224bbSNeel Natu vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
709318224bbSNeel Natu 	      vm_offset_t *offset, struct vm_object **object)
710318224bbSNeel Natu {
711318224bbSNeel Natu 	int i;
712318224bbSNeel Natu 	size_t seg_len;
713318224bbSNeel Natu 	vm_paddr_t seg_gpa;
714318224bbSNeel Natu 	vm_object_t seg_obj;
715318224bbSNeel Natu 
716318224bbSNeel Natu 	for (i = 0; i < vm->num_mem_segs; i++) {
717318224bbSNeel Natu 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
718318224bbSNeel Natu 			continue;
719318224bbSNeel Natu 
720318224bbSNeel Natu 		seg_gpa = vm->mem_segs[i].gpa;
721318224bbSNeel Natu 		seg_len = vm->mem_segs[i].len;
722318224bbSNeel Natu 
723318224bbSNeel Natu 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
724318224bbSNeel Natu 			*offset = gpa - seg_gpa;
725318224bbSNeel Natu 			*object = seg_obj;
726318224bbSNeel Natu 			vm_object_reference(seg_obj);
727318224bbSNeel Natu 			return (0);
728318224bbSNeel Natu 		}
729318224bbSNeel Natu 	}
730318224bbSNeel Natu 
731318224bbSNeel Natu 	return (EINVAL);
732318224bbSNeel Natu }
733318224bbSNeel Natu 
734318224bbSNeel Natu int
735366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
736366f6083SPeter Grehan {
737366f6083SPeter Grehan 
738366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
739366f6083SPeter Grehan 		return (EINVAL);
740366f6083SPeter Grehan 
741366f6083SPeter Grehan 	if (reg >= VM_REG_LAST)
742366f6083SPeter Grehan 		return (EINVAL);
743366f6083SPeter Grehan 
744366f6083SPeter Grehan 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
745366f6083SPeter Grehan }
746366f6083SPeter Grehan 
747366f6083SPeter Grehan int
748366f6083SPeter Grehan vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
749366f6083SPeter Grehan {
750366f6083SPeter Grehan 
751366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
752366f6083SPeter Grehan 		return (EINVAL);
753366f6083SPeter Grehan 
754366f6083SPeter Grehan 	if (reg >= VM_REG_LAST)
755366f6083SPeter Grehan 		return (EINVAL);
756366f6083SPeter Grehan 
757366f6083SPeter Grehan 	return (VMSETREG(vm->cookie, vcpu, reg, val));
758366f6083SPeter Grehan }
759366f6083SPeter Grehan 
760366f6083SPeter Grehan static boolean_t
761366f6083SPeter Grehan is_descriptor_table(int reg)
762366f6083SPeter Grehan {
763366f6083SPeter Grehan 
764366f6083SPeter Grehan 	switch (reg) {
765366f6083SPeter Grehan 	case VM_REG_GUEST_IDTR:
766366f6083SPeter Grehan 	case VM_REG_GUEST_GDTR:
767366f6083SPeter Grehan 		return (TRUE);
768366f6083SPeter Grehan 	default:
769366f6083SPeter Grehan 		return (FALSE);
770366f6083SPeter Grehan 	}
771366f6083SPeter Grehan }
772366f6083SPeter Grehan 
773366f6083SPeter Grehan static boolean_t
774366f6083SPeter Grehan is_segment_register(int reg)
775366f6083SPeter Grehan {
776366f6083SPeter Grehan 
777366f6083SPeter Grehan 	switch (reg) {
778366f6083SPeter Grehan 	case VM_REG_GUEST_ES:
779366f6083SPeter Grehan 	case VM_REG_GUEST_CS:
780366f6083SPeter Grehan 	case VM_REG_GUEST_SS:
781366f6083SPeter Grehan 	case VM_REG_GUEST_DS:
782366f6083SPeter Grehan 	case VM_REG_GUEST_FS:
783366f6083SPeter Grehan 	case VM_REG_GUEST_GS:
784366f6083SPeter Grehan 	case VM_REG_GUEST_TR:
785366f6083SPeter Grehan 	case VM_REG_GUEST_LDTR:
786366f6083SPeter Grehan 		return (TRUE);
787366f6083SPeter Grehan 	default:
788366f6083SPeter Grehan 		return (FALSE);
789366f6083SPeter Grehan 	}
790366f6083SPeter Grehan }
791366f6083SPeter Grehan 
792366f6083SPeter Grehan int
793366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
794366f6083SPeter Grehan 		struct seg_desc *desc)
795366f6083SPeter Grehan {
796366f6083SPeter Grehan 
797366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
798366f6083SPeter Grehan 		return (EINVAL);
799366f6083SPeter Grehan 
800366f6083SPeter Grehan 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
801366f6083SPeter Grehan 		return (EINVAL);
802366f6083SPeter Grehan 
803366f6083SPeter Grehan 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
804366f6083SPeter Grehan }
805366f6083SPeter Grehan 
806366f6083SPeter Grehan int
807366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
808366f6083SPeter Grehan 		struct seg_desc *desc)
809366f6083SPeter Grehan {
810366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
811366f6083SPeter Grehan 		return (EINVAL);
812366f6083SPeter Grehan 
813366f6083SPeter Grehan 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
814366f6083SPeter Grehan 		return (EINVAL);
815366f6083SPeter Grehan 
816366f6083SPeter Grehan 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
817366f6083SPeter Grehan }
818366f6083SPeter Grehan 
819366f6083SPeter Grehan static void
820366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu)
821366f6083SPeter Grehan {
822366f6083SPeter Grehan 
82338f1b189SPeter Grehan 	/* flush host state to the pcb */
82438f1b189SPeter Grehan 	fpuexit(curthread);
825bd8572e0SNeel Natu 
826bd8572e0SNeel Natu 	/* restore guest FPU state */
827366f6083SPeter Grehan 	fpu_stop_emulating();
82838f1b189SPeter Grehan 	fpurestore(vcpu->guestfpu);
829bd8572e0SNeel Natu 
830abb023fbSJohn Baldwin 	/* restore guest XCR0 if XSAVE is enabled in the host */
831abb023fbSJohn Baldwin 	if (rcr4() & CR4_XSAVE)
832abb023fbSJohn Baldwin 		load_xcr(0, vcpu->guest_xcr0);
833abb023fbSJohn Baldwin 
834bd8572e0SNeel Natu 	/*
835bd8572e0SNeel Natu 	 * The FPU is now "dirty" with the guest's state so turn on emulation
836bd8572e0SNeel Natu 	 * to trap any access to the FPU by the host.
837bd8572e0SNeel Natu 	 */
838bd8572e0SNeel Natu 	fpu_start_emulating();
839366f6083SPeter Grehan }
840366f6083SPeter Grehan 
841366f6083SPeter Grehan static void
842366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu)
843366f6083SPeter Grehan {
844366f6083SPeter Grehan 
845bd8572e0SNeel Natu 	if ((rcr0() & CR0_TS) == 0)
846bd8572e0SNeel Natu 		panic("fpu emulation not enabled in host!");
847bd8572e0SNeel Natu 
848abb023fbSJohn Baldwin 	/* save guest XCR0 and restore host XCR0 */
849abb023fbSJohn Baldwin 	if (rcr4() & CR4_XSAVE) {
850abb023fbSJohn Baldwin 		vcpu->guest_xcr0 = rxcr(0);
851abb023fbSJohn Baldwin 		load_xcr(0, vmm_get_host_xcr0());
852abb023fbSJohn Baldwin 	}
853abb023fbSJohn Baldwin 
854bd8572e0SNeel Natu 	/* save guest FPU state */
855bd8572e0SNeel Natu 	fpu_stop_emulating();
85638f1b189SPeter Grehan 	fpusave(vcpu->guestfpu);
857366f6083SPeter Grehan 	fpu_start_emulating();
858366f6083SPeter Grehan }
859366f6083SPeter Grehan 
86061592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
861f76fc5d4SNeel Natu 
862318224bbSNeel Natu static int
863f80330a8SNeel Natu vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
864f80330a8SNeel Natu     bool from_idle)
865366f6083SPeter Grehan {
866318224bbSNeel Natu 	int error;
867366f6083SPeter Grehan 
868318224bbSNeel Natu 	vcpu_assert_locked(vcpu);
869366f6083SPeter Grehan 
870f76fc5d4SNeel Natu 	/*
871f80330a8SNeel Natu 	 * State transitions from the vmmdev_ioctl() must always begin from
872f80330a8SNeel Natu 	 * the VCPU_IDLE state. This guarantees that there is only a single
873f80330a8SNeel Natu 	 * ioctl() operating on a vcpu at any point.
874f80330a8SNeel Natu 	 */
875f80330a8SNeel Natu 	if (from_idle) {
876f80330a8SNeel Natu 		while (vcpu->state != VCPU_IDLE)
877f80330a8SNeel Natu 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
878f80330a8SNeel Natu 	} else {
879f80330a8SNeel Natu 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
880f80330a8SNeel Natu 		    "vcpu idle state"));
881f80330a8SNeel Natu 	}
882f80330a8SNeel Natu 
883ef39d7e9SNeel Natu 	if (vcpu->state == VCPU_RUNNING) {
884ef39d7e9SNeel Natu 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
885ef39d7e9SNeel Natu 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
886ef39d7e9SNeel Natu 	} else {
887ef39d7e9SNeel Natu 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
888ef39d7e9SNeel Natu 		    "vcpu that is not running", vcpu->hostcpu));
889ef39d7e9SNeel Natu 	}
890ef39d7e9SNeel Natu 
891f80330a8SNeel Natu 	/*
892318224bbSNeel Natu 	 * The following state transitions are allowed:
893318224bbSNeel Natu 	 * IDLE -> FROZEN -> IDLE
894318224bbSNeel Natu 	 * FROZEN -> RUNNING -> FROZEN
895318224bbSNeel Natu 	 * FROZEN -> SLEEPING -> FROZEN
896f76fc5d4SNeel Natu 	 */
897318224bbSNeel Natu 	switch (vcpu->state) {
898318224bbSNeel Natu 	case VCPU_IDLE:
899318224bbSNeel Natu 	case VCPU_RUNNING:
900318224bbSNeel Natu 	case VCPU_SLEEPING:
901318224bbSNeel Natu 		error = (newstate != VCPU_FROZEN);
902318224bbSNeel Natu 		break;
903318224bbSNeel Natu 	case VCPU_FROZEN:
904318224bbSNeel Natu 		error = (newstate == VCPU_FROZEN);
905318224bbSNeel Natu 		break;
906318224bbSNeel Natu 	default:
907318224bbSNeel Natu 		error = 1;
908318224bbSNeel Natu 		break;
909318224bbSNeel Natu 	}
910318224bbSNeel Natu 
911f80330a8SNeel Natu 	if (error)
912f80330a8SNeel Natu 		return (EBUSY);
913318224bbSNeel Natu 
914f80330a8SNeel Natu 	vcpu->state = newstate;
915ef39d7e9SNeel Natu 	if (newstate == VCPU_RUNNING)
916ef39d7e9SNeel Natu 		vcpu->hostcpu = curcpu;
917ef39d7e9SNeel Natu 	else
918ef39d7e9SNeel Natu 		vcpu->hostcpu = NOCPU;
919ef39d7e9SNeel Natu 
920f80330a8SNeel Natu 	if (newstate == VCPU_IDLE)
921f80330a8SNeel Natu 		wakeup(&vcpu->state);
922f80330a8SNeel Natu 
923f80330a8SNeel Natu 	return (0);
924318224bbSNeel Natu }
925318224bbSNeel Natu 
926318224bbSNeel Natu static void
927318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
928318224bbSNeel Natu {
929318224bbSNeel Natu 	int error;
930318224bbSNeel Natu 
931f80330a8SNeel Natu 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
932318224bbSNeel Natu 		panic("Error %d setting state to %d\n", error, newstate);
933318224bbSNeel Natu }
934318224bbSNeel Natu 
935318224bbSNeel Natu static void
936318224bbSNeel Natu vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
937318224bbSNeel Natu {
938318224bbSNeel Natu 	int error;
939318224bbSNeel Natu 
940f80330a8SNeel Natu 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
941318224bbSNeel Natu 		panic("Error %d setting state to %d", error, newstate);
942318224bbSNeel Natu }
943318224bbSNeel Natu 
9445b8a8cd1SNeel Natu static void
9455b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
9465b8a8cd1SNeel Natu {
9475b8a8cd1SNeel Natu 
9485b8a8cd1SNeel Natu 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
9495b8a8cd1SNeel Natu 
9505b8a8cd1SNeel Natu 	/*
9515b8a8cd1SNeel Natu 	 * Update 'rendezvous_func' and execute a write memory barrier to
9525b8a8cd1SNeel Natu 	 * ensure that it is visible across all host cpus. This is not needed
9535b8a8cd1SNeel Natu 	 * for correctness but it does ensure that all the vcpus will notice
9545b8a8cd1SNeel Natu 	 * that the rendezvous is requested immediately.
9555b8a8cd1SNeel Natu 	 */
9565b8a8cd1SNeel Natu 	vm->rendezvous_func = func;
9575b8a8cd1SNeel Natu 	wmb();
9585b8a8cd1SNeel Natu }
9595b8a8cd1SNeel Natu 
9605b8a8cd1SNeel Natu #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
9615b8a8cd1SNeel Natu 	do {								\
9625b8a8cd1SNeel Natu 		if (vcpuid >= 0)					\
9635b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, fmt);			\
9645b8a8cd1SNeel Natu 		else							\
9655b8a8cd1SNeel Natu 			VM_CTR0(vm, fmt);				\
9665b8a8cd1SNeel Natu 	} while (0)
9675b8a8cd1SNeel Natu 
9685b8a8cd1SNeel Natu static void
9695b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid)
9705b8a8cd1SNeel Natu {
9715b8a8cd1SNeel Natu 
9725b8a8cd1SNeel Natu 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
9735b8a8cd1SNeel Natu 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
9745b8a8cd1SNeel Natu 
9755b8a8cd1SNeel Natu 	mtx_lock(&vm->rendezvous_mtx);
9765b8a8cd1SNeel Natu 	while (vm->rendezvous_func != NULL) {
97722d822c6SNeel Natu 		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
97822d822c6SNeel Natu 		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
97922d822c6SNeel Natu 
9805b8a8cd1SNeel Natu 		if (vcpuid != -1 &&
98122d822c6SNeel Natu 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
98222d822c6SNeel Natu 		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
9835b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
9845b8a8cd1SNeel Natu 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
9855b8a8cd1SNeel Natu 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
9865b8a8cd1SNeel Natu 		}
9875b8a8cd1SNeel Natu 		if (CPU_CMP(&vm->rendezvous_req_cpus,
9885b8a8cd1SNeel Natu 		    &vm->rendezvous_done_cpus) == 0) {
9895b8a8cd1SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
9905b8a8cd1SNeel Natu 			vm_set_rendezvous_func(vm, NULL);
9915b8a8cd1SNeel Natu 			wakeup(&vm->rendezvous_func);
9925b8a8cd1SNeel Natu 			break;
9935b8a8cd1SNeel Natu 		}
9945b8a8cd1SNeel Natu 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
9955b8a8cd1SNeel Natu 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
9965b8a8cd1SNeel Natu 		    "vmrndv", 0);
9975b8a8cd1SNeel Natu 	}
9985b8a8cd1SNeel Natu 	mtx_unlock(&vm->rendezvous_mtx);
9995b8a8cd1SNeel Natu }
10005b8a8cd1SNeel Natu 
1001318224bbSNeel Natu /*
1002318224bbSNeel Natu  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1003318224bbSNeel Natu  */
1004318224bbSNeel Natu static int
1005becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1006318224bbSNeel Natu {
1007318224bbSNeel Natu 	struct vcpu *vcpu;
1008*c6a0cc2eSNeel Natu 	const char *wmesg;
1009*c6a0cc2eSNeel Natu 	int t;
1010318224bbSNeel Natu 
1011318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1012318224bbSNeel Natu 
1013f76fc5d4SNeel Natu 	vcpu_lock(vcpu);
1014*c6a0cc2eSNeel Natu 	while (1) {
1015f76fc5d4SNeel Natu 		/*
1016f76fc5d4SNeel Natu 		 * Do a final check for pending NMI or interrupts before
1017*c6a0cc2eSNeel Natu 		 * really putting this thread to sleep. Also check for
1018*c6a0cc2eSNeel Natu 		 * software events that would cause this vcpu to wakeup.
1019f76fc5d4SNeel Natu 		 *
1020*c6a0cc2eSNeel Natu 		 * These interrupts/events could have happened after the
1021*c6a0cc2eSNeel Natu 		 * vcpu returned from VMRUN() and before it acquired the
1022*c6a0cc2eSNeel Natu 		 * vcpu lock above.
1023f76fc5d4SNeel Natu 		 */
1024*c6a0cc2eSNeel Natu 		if (vm->rendezvous_func != NULL || vm->suspend)
1025*c6a0cc2eSNeel Natu 			break;
1026*c6a0cc2eSNeel Natu 		if (vm_nmi_pending(vm, vcpuid))
1027*c6a0cc2eSNeel Natu 			break;
1028*c6a0cc2eSNeel Natu 		if (!intr_disabled) {
1029*c6a0cc2eSNeel Natu 			if (vm_extint_pending(vm, vcpuid) ||
1030*c6a0cc2eSNeel Natu 			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1031*c6a0cc2eSNeel Natu 				break;
1032*c6a0cc2eSNeel Natu 			}
1033*c6a0cc2eSNeel Natu 		}
1034*c6a0cc2eSNeel Natu 
1035*c6a0cc2eSNeel Natu 		if (vlapic_enabled(vcpu->vlapic))
1036*c6a0cc2eSNeel Natu 			wmesg = "vmidle";
1037*c6a0cc2eSNeel Natu 		else
1038*c6a0cc2eSNeel Natu 			wmesg = "vmhalt";
1039*c6a0cc2eSNeel Natu 
1040f76fc5d4SNeel Natu 		t = ticks;
1041318224bbSNeel Natu 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1042*c6a0cc2eSNeel Natu 		msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
104322d822c6SNeel Natu 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
104422d822c6SNeel Natu 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
104522d822c6SNeel Natu 	}
104622d822c6SNeel Natu 	vcpu_unlock(vcpu);
104722d822c6SNeel Natu 
1048318224bbSNeel Natu 	return (0);
1049318224bbSNeel Natu }
1050318224bbSNeel Natu 
1051318224bbSNeel Natu static int
1052becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1053318224bbSNeel Natu {
1054318224bbSNeel Natu 	int rv, ftype;
1055318224bbSNeel Natu 	struct vm_map *map;
1056318224bbSNeel Natu 	struct vcpu *vcpu;
1057318224bbSNeel Natu 	struct vm_exit *vme;
1058318224bbSNeel Natu 
1059318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1060318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1061318224bbSNeel Natu 
1062318224bbSNeel Natu 	ftype = vme->u.paging.fault_type;
1063318224bbSNeel Natu 	KASSERT(ftype == VM_PROT_READ ||
1064318224bbSNeel Natu 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1065318224bbSNeel Natu 	    ("vm_handle_paging: invalid fault_type %d", ftype));
1066318224bbSNeel Natu 
1067318224bbSNeel Natu 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1068318224bbSNeel Natu 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1069318224bbSNeel Natu 		    vme->u.paging.gpa, ftype);
1070318224bbSNeel Natu 		if (rv == 0)
1071318224bbSNeel Natu 			goto done;
1072318224bbSNeel Natu 	}
1073318224bbSNeel Natu 
1074318224bbSNeel Natu 	map = &vm->vmspace->vm_map;
1075318224bbSNeel Natu 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1076318224bbSNeel Natu 
1077513c8d33SNeel Natu 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1078513c8d33SNeel Natu 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1079318224bbSNeel Natu 
1080318224bbSNeel Natu 	if (rv != KERN_SUCCESS)
1081318224bbSNeel Natu 		return (EFAULT);
1082318224bbSNeel Natu done:
1083318224bbSNeel Natu 	/* restart execution at the faulting instruction */
1084318224bbSNeel Natu 	vme->inst_length = 0;
1085318224bbSNeel Natu 
1086318224bbSNeel Natu 	return (0);
1087318224bbSNeel Natu }
1088318224bbSNeel Natu 
1089318224bbSNeel Natu static int
1090becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1091318224bbSNeel Natu {
1092318224bbSNeel Natu 	struct vie *vie;
1093318224bbSNeel Natu 	struct vcpu *vcpu;
1094318224bbSNeel Natu 	struct vm_exit *vme;
1095318224bbSNeel Natu 	int error, inst_length;
1096318224bbSNeel Natu 	uint64_t rip, gla, gpa, cr3;
109700f3efe1SJohn Baldwin 	enum vie_cpu_mode cpu_mode;
109800f3efe1SJohn Baldwin 	enum vie_paging_mode paging_mode;
1099565bbb86SNeel Natu 	mem_region_read_t mread;
1100565bbb86SNeel Natu 	mem_region_write_t mwrite;
1101318224bbSNeel Natu 
1102318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1103318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1104318224bbSNeel Natu 
1105318224bbSNeel Natu 	rip = vme->rip;
1106318224bbSNeel Natu 	inst_length = vme->inst_length;
1107318224bbSNeel Natu 
1108318224bbSNeel Natu 	gla = vme->u.inst_emul.gla;
1109318224bbSNeel Natu 	gpa = vme->u.inst_emul.gpa;
1110318224bbSNeel Natu 	cr3 = vme->u.inst_emul.cr3;
111100f3efe1SJohn Baldwin 	cpu_mode = vme->u.inst_emul.cpu_mode;
111200f3efe1SJohn Baldwin 	paging_mode = vme->u.inst_emul.paging_mode;
1113318224bbSNeel Natu 	vie = &vme->u.inst_emul.vie;
1114318224bbSNeel Natu 
1115318224bbSNeel Natu 	vie_init(vie);
1116318224bbSNeel Natu 
1117318224bbSNeel Natu 	/* Fetch, decode and emulate the faulting instruction */
111800f3efe1SJohn Baldwin 	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
111900f3efe1SJohn Baldwin 	    paging_mode, vie) != 0)
1120318224bbSNeel Natu 		return (EFAULT);
1121318224bbSNeel Natu 
112200f3efe1SJohn Baldwin 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
1123318224bbSNeel Natu 		return (EFAULT);
1124318224bbSNeel Natu 
112508e3ff32SNeel Natu 	/* return to userland unless this is an in-kernel emulated device */
1126565bbb86SNeel Natu 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1127565bbb86SNeel Natu 		mread = lapic_mmio_read;
1128565bbb86SNeel Natu 		mwrite = lapic_mmio_write;
1129565bbb86SNeel Natu 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1130565bbb86SNeel Natu 		mread = vioapic_mmio_read;
1131565bbb86SNeel Natu 		mwrite = vioapic_mmio_write;
113208e3ff32SNeel Natu 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
113308e3ff32SNeel Natu 		mread = vhpet_mmio_read;
113408e3ff32SNeel Natu 		mwrite = vhpet_mmio_write;
1135565bbb86SNeel Natu 	} else {
1136becd9849SNeel Natu 		*retu = true;
1137318224bbSNeel Natu 		return (0);
1138318224bbSNeel Natu 	}
1139318224bbSNeel Natu 
1140becd9849SNeel Natu 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1141becd9849SNeel Natu 	    retu);
1142318224bbSNeel Natu 
1143318224bbSNeel Natu 	return (error);
1144318224bbSNeel Natu }
1145318224bbSNeel Natu 
1146b15a09c0SNeel Natu static int
1147b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1148b15a09c0SNeel Natu {
1149b15a09c0SNeel Natu 	int i, done;
1150b15a09c0SNeel Natu 	struct vcpu *vcpu;
1151b15a09c0SNeel Natu 
1152b15a09c0SNeel Natu 	done = 0;
1153b15a09c0SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1154b15a09c0SNeel Natu 
1155b15a09c0SNeel Natu 	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1156b15a09c0SNeel Natu 
1157b15a09c0SNeel Natu 	/*
1158b15a09c0SNeel Natu 	 * Wait until all 'active_cpus' have suspended themselves.
1159b15a09c0SNeel Natu 	 *
1160b15a09c0SNeel Natu 	 * Since a VM may be suspended at any time including when one or
1161b15a09c0SNeel Natu 	 * more vcpus are doing a rendezvous we need to call the rendezvous
1162b15a09c0SNeel Natu 	 * handler while we are waiting to prevent a deadlock.
1163b15a09c0SNeel Natu 	 */
1164b15a09c0SNeel Natu 	vcpu_lock(vcpu);
1165b15a09c0SNeel Natu 	while (1) {
1166b15a09c0SNeel Natu 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1167b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1168b15a09c0SNeel Natu 			break;
1169b15a09c0SNeel Natu 		}
1170b15a09c0SNeel Natu 
1171b15a09c0SNeel Natu 		if (vm->rendezvous_func == NULL) {
1172b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1173b15a09c0SNeel Natu 			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1174b15a09c0SNeel Natu 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1175b15a09c0SNeel Natu 			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1176b15a09c0SNeel Natu 		} else {
1177b15a09c0SNeel Natu 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1178b15a09c0SNeel Natu 			vcpu_unlock(vcpu);
1179b15a09c0SNeel Natu 			vm_handle_rendezvous(vm, vcpuid);
1180b15a09c0SNeel Natu 			vcpu_lock(vcpu);
1181b15a09c0SNeel Natu 		}
1182b15a09c0SNeel Natu 	}
1183b15a09c0SNeel Natu 	vcpu_unlock(vcpu);
1184b15a09c0SNeel Natu 
1185b15a09c0SNeel Natu 	/*
1186b15a09c0SNeel Natu 	 * Wakeup the other sleeping vcpus and return to userspace.
1187b15a09c0SNeel Natu 	 */
1188b15a09c0SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1189b15a09c0SNeel Natu 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1190b15a09c0SNeel Natu 			vcpu_notify_event(vm, i, false);
1191b15a09c0SNeel Natu 		}
1192b15a09c0SNeel Natu 	}
1193b15a09c0SNeel Natu 
1194b15a09c0SNeel Natu 	*retu = true;
1195b15a09c0SNeel Natu 	return (0);
1196b15a09c0SNeel Natu }
1197b15a09c0SNeel Natu 
1198b15a09c0SNeel Natu int
1199f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how)
1200b15a09c0SNeel Natu {
1201f0fdcfe2SNeel Natu 	int i;
1202b15a09c0SNeel Natu 
1203f0fdcfe2SNeel Natu 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1204f0fdcfe2SNeel Natu 		return (EINVAL);
1205f0fdcfe2SNeel Natu 
1206f0fdcfe2SNeel Natu 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1207f0fdcfe2SNeel Natu 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1208f0fdcfe2SNeel Natu 		    vm->suspend, how);
1209b15a09c0SNeel Natu 		return (EALREADY);
1210b15a09c0SNeel Natu 	}
1211f0fdcfe2SNeel Natu 
1212f0fdcfe2SNeel Natu 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1213f0fdcfe2SNeel Natu 
1214f0fdcfe2SNeel Natu 	/*
1215f0fdcfe2SNeel Natu 	 * Notify all active vcpus that they are now suspended.
1216f0fdcfe2SNeel Natu 	 */
1217f0fdcfe2SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1218f0fdcfe2SNeel Natu 		if (CPU_ISSET(i, &vm->active_cpus))
1219f0fdcfe2SNeel Natu 			vcpu_notify_event(vm, i, false);
1220f0fdcfe2SNeel Natu 	}
1221f0fdcfe2SNeel Natu 
1222f0fdcfe2SNeel Natu 	return (0);
1223f0fdcfe2SNeel Natu }
1224f0fdcfe2SNeel Natu 
1225f0fdcfe2SNeel Natu void
1226f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1227f0fdcfe2SNeel Natu {
1228f0fdcfe2SNeel Natu 	struct vm_exit *vmexit;
1229f0fdcfe2SNeel Natu 
1230f0fdcfe2SNeel Natu 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1231f0fdcfe2SNeel Natu 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1232f0fdcfe2SNeel Natu 
1233f0fdcfe2SNeel Natu 	vmexit = vm_exitinfo(vm, vcpuid);
1234f0fdcfe2SNeel Natu 	vmexit->rip = rip;
1235f0fdcfe2SNeel Natu 	vmexit->inst_length = 0;
1236f0fdcfe2SNeel Natu 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1237f0fdcfe2SNeel Natu 	vmexit->u.suspended.how = vm->suspend;
1238b15a09c0SNeel Natu }
1239b15a09c0SNeel Natu 
1240318224bbSNeel Natu int
1241318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun)
1242318224bbSNeel Natu {
1243318224bbSNeel Natu 	int error, vcpuid;
1244318224bbSNeel Natu 	struct vcpu *vcpu;
1245318224bbSNeel Natu 	struct pcb *pcb;
1246318224bbSNeel Natu 	uint64_t tscval, rip;
1247318224bbSNeel Natu 	struct vm_exit *vme;
1248becd9849SNeel Natu 	bool retu, intr_disabled;
1249318224bbSNeel Natu 	pmap_t pmap;
1250b15a09c0SNeel Natu 	void *rptr, *sptr;
1251318224bbSNeel Natu 
1252318224bbSNeel Natu 	vcpuid = vmrun->cpuid;
1253318224bbSNeel Natu 
1254318224bbSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1255318224bbSNeel Natu 		return (EINVAL);
1256318224bbSNeel Natu 
1257b15a09c0SNeel Natu 	rptr = &vm->rendezvous_func;
1258b15a09c0SNeel Natu 	sptr = &vm->suspend;
1259318224bbSNeel Natu 	pmap = vmspace_pmap(vm->vmspace);
1260318224bbSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1261318224bbSNeel Natu 	vme = &vcpu->exitinfo;
1262318224bbSNeel Natu 	rip = vmrun->rip;
1263318224bbSNeel Natu restart:
1264318224bbSNeel Natu 	critical_enter();
1265318224bbSNeel Natu 
1266318224bbSNeel Natu 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1267318224bbSNeel Natu 	    ("vm_run: absurd pm_active"));
1268318224bbSNeel Natu 
1269318224bbSNeel Natu 	tscval = rdtsc();
1270318224bbSNeel Natu 
1271318224bbSNeel Natu 	pcb = PCPU_GET(curpcb);
1272318224bbSNeel Natu 	set_pcb_flags(pcb, PCB_FULL_IRET);
1273318224bbSNeel Natu 
1274318224bbSNeel Natu 	restore_guest_msrs(vm, vcpuid);
1275318224bbSNeel Natu 	restore_guest_fpustate(vcpu);
1276318224bbSNeel Natu 
1277318224bbSNeel Natu 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1278b15a09c0SNeel Natu 	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1279318224bbSNeel Natu 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1280318224bbSNeel Natu 
1281318224bbSNeel Natu 	save_guest_fpustate(vcpu);
1282318224bbSNeel Natu 	restore_host_msrs(vm, vcpuid);
1283318224bbSNeel Natu 
1284318224bbSNeel Natu 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1285318224bbSNeel Natu 
1286318224bbSNeel Natu 	critical_exit();
1287318224bbSNeel Natu 
1288318224bbSNeel Natu 	if (error == 0) {
1289becd9849SNeel Natu 		retu = false;
1290318224bbSNeel Natu 		switch (vme->exitcode) {
1291b15a09c0SNeel Natu 		case VM_EXITCODE_SUSPENDED:
1292b15a09c0SNeel Natu 			error = vm_handle_suspend(vm, vcpuid, &retu);
1293b15a09c0SNeel Natu 			break;
129430b94db8SNeel Natu 		case VM_EXITCODE_IOAPIC_EOI:
129530b94db8SNeel Natu 			vioapic_process_eoi(vm, vcpuid,
129630b94db8SNeel Natu 			    vme->u.ioapic_eoi.vector);
129730b94db8SNeel Natu 			break;
12985b8a8cd1SNeel Natu 		case VM_EXITCODE_RENDEZVOUS:
12995b8a8cd1SNeel Natu 			vm_handle_rendezvous(vm, vcpuid);
13005b8a8cd1SNeel Natu 			error = 0;
13015b8a8cd1SNeel Natu 			break;
1302318224bbSNeel Natu 		case VM_EXITCODE_HLT:
1303becd9849SNeel Natu 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
13041c052192SNeel Natu 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1305318224bbSNeel Natu 			break;
1306318224bbSNeel Natu 		case VM_EXITCODE_PAGING:
1307318224bbSNeel Natu 			error = vm_handle_paging(vm, vcpuid, &retu);
1308318224bbSNeel Natu 			break;
1309318224bbSNeel Natu 		case VM_EXITCODE_INST_EMUL:
1310318224bbSNeel Natu 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1311318224bbSNeel Natu 			break;
1312318224bbSNeel Natu 		default:
1313becd9849SNeel Natu 			retu = true;	/* handled in userland */
1314318224bbSNeel Natu 			break;
1315318224bbSNeel Natu 		}
1316318224bbSNeel Natu 	}
1317318224bbSNeel Natu 
1318becd9849SNeel Natu 	if (error == 0 && retu == false) {
1319f76fc5d4SNeel Natu 		rip = vme->rip + vme->inst_length;
1320f76fc5d4SNeel Natu 		goto restart;
1321f76fc5d4SNeel Natu 	}
1322f76fc5d4SNeel Natu 
1323318224bbSNeel Natu 	/* copy the exit information */
1324318224bbSNeel Natu 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1325366f6083SPeter Grehan 	return (error);
1326366f6083SPeter Grehan }
1327366f6083SPeter Grehan 
1328366f6083SPeter Grehan int
1329dc506506SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1330366f6083SPeter Grehan {
1331dc506506SNeel Natu 	struct vcpu *vcpu;
1332dc506506SNeel Natu 
1333366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1334366f6083SPeter Grehan 		return (EINVAL);
1335366f6083SPeter Grehan 
1336dc506506SNeel Natu 	if (exception->vector < 0 || exception->vector >= 32)
1337366f6083SPeter Grehan 		return (EINVAL);
1338366f6083SPeter Grehan 
1339dc506506SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1340366f6083SPeter Grehan 
1341dc506506SNeel Natu 	if (vcpu->exception_pending) {
1342dc506506SNeel Natu 		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1343dc506506SNeel Natu 		    "pending exception %d", exception->vector,
1344dc506506SNeel Natu 		    vcpu->exception.vector);
1345dc506506SNeel Natu 		return (EBUSY);
1346dc506506SNeel Natu 	}
1347dc506506SNeel Natu 
1348dc506506SNeel Natu 	vcpu->exception_pending = 1;
1349dc506506SNeel Natu 	vcpu->exception = *exception;
1350dc506506SNeel Natu 	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1351dc506506SNeel Natu 	return (0);
1352dc506506SNeel Natu }
1353dc506506SNeel Natu 
1354dc506506SNeel Natu int
1355dc506506SNeel Natu vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
1356dc506506SNeel Natu {
1357dc506506SNeel Natu 	struct vcpu *vcpu;
1358dc506506SNeel Natu 	int pending;
1359dc506506SNeel Natu 
1360dc506506SNeel Natu 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1361dc506506SNeel Natu 
1362dc506506SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1363dc506506SNeel Natu 	pending = vcpu->exception_pending;
1364dc506506SNeel Natu 	if (pending) {
1365dc506506SNeel Natu 		vcpu->exception_pending = 0;
1366dc506506SNeel Natu 		*exception = vcpu->exception;
1367dc506506SNeel Natu 		VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
1368dc506506SNeel Natu 		    exception->vector);
1369dc506506SNeel Natu 	}
1370dc506506SNeel Natu 	return (pending);
1371dc506506SNeel Natu }
1372dc506506SNeel Natu 
1373dc506506SNeel Natu static void
1374dc506506SNeel Natu vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
1375dc506506SNeel Natu {
1376dc506506SNeel Natu 	struct vm_exit *vmexit;
1377dc506506SNeel Natu 	int error;
1378dc506506SNeel Natu 
1379dc506506SNeel Natu 	error = vm_inject_exception(vm, vcpuid, exception);
1380dc506506SNeel Natu 	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1381dc506506SNeel Natu 
1382dc506506SNeel Natu 	/*
1383dc506506SNeel Natu 	 * A fault-like exception allows the instruction to be restarted
1384dc506506SNeel Natu 	 * after the exception handler returns.
1385dc506506SNeel Natu 	 *
1386dc506506SNeel Natu 	 * By setting the inst_length to 0 we ensure that the instruction
1387dc506506SNeel Natu 	 * pointer remains at the faulting instruction.
1388dc506506SNeel Natu 	 */
1389dc506506SNeel Natu 	vmexit = vm_exitinfo(vm, vcpuid);
1390dc506506SNeel Natu 	vmexit->inst_length = 0;
1391dc506506SNeel Natu }
1392dc506506SNeel Natu 
1393dc506506SNeel Natu void
1394dc506506SNeel Natu vm_inject_gp(struct vm *vm, int vcpuid)
1395dc506506SNeel Natu {
1396dc506506SNeel Natu 	struct vm_exception gpf = {
1397dc506506SNeel Natu 		.vector = IDT_GP,
1398dc506506SNeel Natu 		.error_code_valid = 1,
1399dc506506SNeel Natu 		.error_code = 0
1400dc506506SNeel Natu 	};
1401dc506506SNeel Natu 
1402dc506506SNeel Natu 	vm_inject_fault(vm, vcpuid, &gpf);
1403dc506506SNeel Natu }
1404dc506506SNeel Natu 
1405dc506506SNeel Natu void
1406dc506506SNeel Natu vm_inject_ud(struct vm *vm, int vcpuid)
1407dc506506SNeel Natu {
1408dc506506SNeel Natu 	struct vm_exception udf = {
1409dc506506SNeel Natu 		.vector = IDT_UD,
1410dc506506SNeel Natu 		.error_code_valid = 0
1411dc506506SNeel Natu 	};
1412dc506506SNeel Natu 
1413dc506506SNeel Natu 	vm_inject_fault(vm, vcpuid, &udf);
1414366f6083SPeter Grehan }
1415366f6083SPeter Grehan 
141661592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1417366f6083SPeter Grehan 
1418f352ff0cSNeel Natu int
1419f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid)
1420f352ff0cSNeel Natu {
1421f352ff0cSNeel Natu 	struct vcpu *vcpu;
1422f352ff0cSNeel Natu 
1423f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1424366f6083SPeter Grehan 		return (EINVAL);
1425366f6083SPeter Grehan 
1426f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1427f352ff0cSNeel Natu 
1428f352ff0cSNeel Natu 	vcpu->nmi_pending = 1;
1429de5ea6b6SNeel Natu 	vcpu_notify_event(vm, vcpuid, false);
1430f352ff0cSNeel Natu 	return (0);
1431f352ff0cSNeel Natu }
1432f352ff0cSNeel Natu 
1433f352ff0cSNeel Natu int
1434f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid)
1435f352ff0cSNeel Natu {
1436f352ff0cSNeel Natu 	struct vcpu *vcpu;
1437f352ff0cSNeel Natu 
1438f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1439f352ff0cSNeel Natu 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1440f352ff0cSNeel Natu 
1441f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1442f352ff0cSNeel Natu 
1443f352ff0cSNeel Natu 	return (vcpu->nmi_pending);
1444f352ff0cSNeel Natu }
1445f352ff0cSNeel Natu 
1446f352ff0cSNeel Natu void
1447f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid)
1448f352ff0cSNeel Natu {
1449f352ff0cSNeel Natu 	struct vcpu *vcpu;
1450f352ff0cSNeel Natu 
1451f352ff0cSNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1452f352ff0cSNeel Natu 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1453f352ff0cSNeel Natu 
1454f352ff0cSNeel Natu 	vcpu = &vm->vcpu[vcpuid];
1455f352ff0cSNeel Natu 
1456f352ff0cSNeel Natu 	if (vcpu->nmi_pending == 0)
1457f352ff0cSNeel Natu 		panic("vm_nmi_clear: inconsistent nmi_pending state");
1458f352ff0cSNeel Natu 
1459f352ff0cSNeel Natu 	vcpu->nmi_pending = 0;
1460f352ff0cSNeel Natu 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1461366f6083SPeter Grehan }
1462366f6083SPeter Grehan 
14630775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
14640775fbb4STycho Nightingale 
14650775fbb4STycho Nightingale int
14660775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid)
14670775fbb4STycho Nightingale {
14680775fbb4STycho Nightingale 	struct vcpu *vcpu;
14690775fbb4STycho Nightingale 
14700775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
14710775fbb4STycho Nightingale 		return (EINVAL);
14720775fbb4STycho Nightingale 
14730775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
14740775fbb4STycho Nightingale 
14750775fbb4STycho Nightingale 	vcpu->extint_pending = 1;
14760775fbb4STycho Nightingale 	vcpu_notify_event(vm, vcpuid, false);
14770775fbb4STycho Nightingale 	return (0);
14780775fbb4STycho Nightingale }
14790775fbb4STycho Nightingale 
14800775fbb4STycho Nightingale int
14810775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid)
14820775fbb4STycho Nightingale {
14830775fbb4STycho Nightingale 	struct vcpu *vcpu;
14840775fbb4STycho Nightingale 
14850775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
14860775fbb4STycho Nightingale 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
14870775fbb4STycho Nightingale 
14880775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
14890775fbb4STycho Nightingale 
14900775fbb4STycho Nightingale 	return (vcpu->extint_pending);
14910775fbb4STycho Nightingale }
14920775fbb4STycho Nightingale 
14930775fbb4STycho Nightingale void
14940775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid)
14950775fbb4STycho Nightingale {
14960775fbb4STycho Nightingale 	struct vcpu *vcpu;
14970775fbb4STycho Nightingale 
14980775fbb4STycho Nightingale 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
14990775fbb4STycho Nightingale 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
15000775fbb4STycho Nightingale 
15010775fbb4STycho Nightingale 	vcpu = &vm->vcpu[vcpuid];
15020775fbb4STycho Nightingale 
15030775fbb4STycho Nightingale 	if (vcpu->extint_pending == 0)
15040775fbb4STycho Nightingale 		panic("vm_extint_clear: inconsistent extint_pending state");
15050775fbb4STycho Nightingale 
15060775fbb4STycho Nightingale 	vcpu->extint_pending = 0;
15070775fbb4STycho Nightingale 	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
15080775fbb4STycho Nightingale }
15090775fbb4STycho Nightingale 
1510366f6083SPeter Grehan int
1511366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1512366f6083SPeter Grehan {
1513366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1514366f6083SPeter Grehan 		return (EINVAL);
1515366f6083SPeter Grehan 
1516366f6083SPeter Grehan 	if (type < 0 || type >= VM_CAP_MAX)
1517366f6083SPeter Grehan 		return (EINVAL);
1518366f6083SPeter Grehan 
1519366f6083SPeter Grehan 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1520366f6083SPeter Grehan }
1521366f6083SPeter Grehan 
1522366f6083SPeter Grehan int
1523366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val)
1524366f6083SPeter Grehan {
1525366f6083SPeter Grehan 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1526366f6083SPeter Grehan 		return (EINVAL);
1527366f6083SPeter Grehan 
1528366f6083SPeter Grehan 	if (type < 0 || type >= VM_CAP_MAX)
1529366f6083SPeter Grehan 		return (EINVAL);
1530366f6083SPeter Grehan 
1531366f6083SPeter Grehan 	return (VMSETCAP(vm->cookie, vcpu, type, val));
1532366f6083SPeter Grehan }
1533366f6083SPeter Grehan 
1534366f6083SPeter Grehan uint64_t *
1535366f6083SPeter Grehan vm_guest_msrs(struct vm *vm, int cpu)
1536366f6083SPeter Grehan {
1537366f6083SPeter Grehan 	return (vm->vcpu[cpu].guest_msrs);
1538366f6083SPeter Grehan }
1539366f6083SPeter Grehan 
1540366f6083SPeter Grehan struct vlapic *
1541366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu)
1542366f6083SPeter Grehan {
1543366f6083SPeter Grehan 	return (vm->vcpu[cpu].vlapic);
1544366f6083SPeter Grehan }
1545366f6083SPeter Grehan 
1546565bbb86SNeel Natu struct vioapic *
1547565bbb86SNeel Natu vm_ioapic(struct vm *vm)
1548565bbb86SNeel Natu {
1549565bbb86SNeel Natu 
1550565bbb86SNeel Natu 	return (vm->vioapic);
1551565bbb86SNeel Natu }
1552565bbb86SNeel Natu 
155308e3ff32SNeel Natu struct vhpet *
155408e3ff32SNeel Natu vm_hpet(struct vm *vm)
155508e3ff32SNeel Natu {
155608e3ff32SNeel Natu 
155708e3ff32SNeel Natu 	return (vm->vhpet);
155808e3ff32SNeel Natu }
155908e3ff32SNeel Natu 
1560366f6083SPeter Grehan boolean_t
1561366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func)
1562366f6083SPeter Grehan {
156307044a96SNeel Natu 	int found, i, n;
156407044a96SNeel Natu 	int b, s, f;
1565366f6083SPeter Grehan 	char *val, *cp, *cp2;
1566366f6083SPeter Grehan 
1567366f6083SPeter Grehan 	/*
156807044a96SNeel Natu 	 * XXX
156907044a96SNeel Natu 	 * The length of an environment variable is limited to 128 bytes which
157007044a96SNeel Natu 	 * puts an upper limit on the number of passthru devices that may be
157107044a96SNeel Natu 	 * specified using a single environment variable.
157207044a96SNeel Natu 	 *
157307044a96SNeel Natu 	 * Work around this by scanning multiple environment variable
157407044a96SNeel Natu 	 * names instead of a single one - yuck!
1575366f6083SPeter Grehan 	 */
157607044a96SNeel Natu 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
157707044a96SNeel Natu 
157807044a96SNeel Natu 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1579366f6083SPeter Grehan 	found = 0;
158007044a96SNeel Natu 	for (i = 0; names[i] != NULL && !found; i++) {
158107044a96SNeel Natu 		cp = val = getenv(names[i]);
1582366f6083SPeter Grehan 		while (cp != NULL && *cp != '\0') {
1583366f6083SPeter Grehan 			if ((cp2 = strchr(cp, ' ')) != NULL)
1584366f6083SPeter Grehan 				*cp2 = '\0';
1585366f6083SPeter Grehan 
1586366f6083SPeter Grehan 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1587366f6083SPeter Grehan 			if (n == 3 && bus == b && slot == s && func == f) {
1588366f6083SPeter Grehan 				found = 1;
1589366f6083SPeter Grehan 				break;
1590366f6083SPeter Grehan 			}
1591366f6083SPeter Grehan 
1592366f6083SPeter Grehan 			if (cp2 != NULL)
1593366f6083SPeter Grehan 				*cp2++ = ' ';
1594366f6083SPeter Grehan 
1595366f6083SPeter Grehan 			cp = cp2;
1596366f6083SPeter Grehan 		}
1597366f6083SPeter Grehan 		freeenv(val);
159807044a96SNeel Natu 	}
1599366f6083SPeter Grehan 	return (found);
1600366f6083SPeter Grehan }
1601366f6083SPeter Grehan 
1602366f6083SPeter Grehan void *
1603366f6083SPeter Grehan vm_iommu_domain(struct vm *vm)
1604366f6083SPeter Grehan {
1605366f6083SPeter Grehan 
1606366f6083SPeter Grehan 	return (vm->iommu);
1607366f6083SPeter Grehan }
1608366f6083SPeter Grehan 
160975dd3366SNeel Natu int
1610f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1611f80330a8SNeel Natu     bool from_idle)
1612366f6083SPeter Grehan {
161375dd3366SNeel Natu 	int error;
1614366f6083SPeter Grehan 	struct vcpu *vcpu;
1615366f6083SPeter Grehan 
1616366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1617366f6083SPeter Grehan 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1618366f6083SPeter Grehan 
1619366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpuid];
1620366f6083SPeter Grehan 
162175dd3366SNeel Natu 	vcpu_lock(vcpu);
1622f80330a8SNeel Natu 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
162375dd3366SNeel Natu 	vcpu_unlock(vcpu);
162475dd3366SNeel Natu 
162575dd3366SNeel Natu 	return (error);
162675dd3366SNeel Natu }
162775dd3366SNeel Natu 
162875dd3366SNeel Natu enum vcpu_state
1629d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1630366f6083SPeter Grehan {
1631366f6083SPeter Grehan 	struct vcpu *vcpu;
163275dd3366SNeel Natu 	enum vcpu_state state;
1633366f6083SPeter Grehan 
1634366f6083SPeter Grehan 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1635366f6083SPeter Grehan 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1636366f6083SPeter Grehan 
1637366f6083SPeter Grehan 	vcpu = &vm->vcpu[vcpuid];
1638366f6083SPeter Grehan 
163975dd3366SNeel Natu 	vcpu_lock(vcpu);
164075dd3366SNeel Natu 	state = vcpu->state;
1641d3c11f40SPeter Grehan 	if (hostcpu != NULL)
1642d3c11f40SPeter Grehan 		*hostcpu = vcpu->hostcpu;
164375dd3366SNeel Natu 	vcpu_unlock(vcpu);
1644366f6083SPeter Grehan 
164575dd3366SNeel Natu 	return (state);
1646366f6083SPeter Grehan }
1647366f6083SPeter Grehan 
1648366f6083SPeter Grehan void
1649366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid)
1650366f6083SPeter Grehan {
1651366f6083SPeter Grehan 
165222d822c6SNeel Natu 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
165322d822c6SNeel Natu 	    ("vm_activate_cpu: invalid vcpuid %d", vcpuid));
165422d822c6SNeel Natu 	KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus),
165522d822c6SNeel Natu 	    ("vm_activate_cpu: vcpuid %d is already active", vcpuid));
165622d822c6SNeel Natu 
165722d822c6SNeel Natu 	VCPU_CTR0(vm, vcpuid, "activated");
165822d822c6SNeel Natu 	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
1659366f6083SPeter Grehan }
1660366f6083SPeter Grehan 
1661a5615c90SPeter Grehan cpuset_t
1662366f6083SPeter Grehan vm_active_cpus(struct vm *vm)
1663366f6083SPeter Grehan {
1664366f6083SPeter Grehan 
1665366f6083SPeter Grehan 	return (vm->active_cpus);
1666366f6083SPeter Grehan }
1667366f6083SPeter Grehan 
1668366f6083SPeter Grehan void *
1669366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid)
1670366f6083SPeter Grehan {
1671366f6083SPeter Grehan 
1672366f6083SPeter Grehan 	return (vm->vcpu[vcpuid].stats);
1673366f6083SPeter Grehan }
1674e9027382SNeel Natu 
1675e9027382SNeel Natu int
1676e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1677e9027382SNeel Natu {
1678e9027382SNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1679e9027382SNeel Natu 		return (EINVAL);
1680e9027382SNeel Natu 
1681e9027382SNeel Natu 	*state = vm->vcpu[vcpuid].x2apic_state;
1682e9027382SNeel Natu 
1683e9027382SNeel Natu 	return (0);
1684e9027382SNeel Natu }
1685e9027382SNeel Natu 
1686e9027382SNeel Natu int
1687e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1688e9027382SNeel Natu {
1689e9027382SNeel Natu 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1690e9027382SNeel Natu 		return (EINVAL);
1691e9027382SNeel Natu 
16923f23d3caSNeel Natu 	if (state >= X2APIC_STATE_LAST)
1693e9027382SNeel Natu 		return (EINVAL);
1694e9027382SNeel Natu 
1695e9027382SNeel Natu 	vm->vcpu[vcpuid].x2apic_state = state;
1696e9027382SNeel Natu 
169773820fb0SNeel Natu 	vlapic_set_x2apic_state(vm, vcpuid, state);
169873820fb0SNeel Natu 
1699e9027382SNeel Natu 	return (0);
1700e9027382SNeel Natu }
170175dd3366SNeel Natu 
170222821874SNeel Natu /*
170322821874SNeel Natu  * This function is called to ensure that a vcpu "sees" a pending event
170422821874SNeel Natu  * as soon as possible:
170522821874SNeel Natu  * - If the vcpu thread is sleeping then it is woken up.
170622821874SNeel Natu  * - If the vcpu is running on a different host_cpu then an IPI will be directed
170722821874SNeel Natu  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
170822821874SNeel Natu  */
170975dd3366SNeel Natu void
1710de5ea6b6SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
171175dd3366SNeel Natu {
171275dd3366SNeel Natu 	int hostcpu;
171375dd3366SNeel Natu 	struct vcpu *vcpu;
171475dd3366SNeel Natu 
171575dd3366SNeel Natu 	vcpu = &vm->vcpu[vcpuid];
171675dd3366SNeel Natu 
1717f76fc5d4SNeel Natu 	vcpu_lock(vcpu);
171875dd3366SNeel Natu 	hostcpu = vcpu->hostcpu;
1719ef39d7e9SNeel Natu 	if (vcpu->state == VCPU_RUNNING) {
1720ef39d7e9SNeel Natu 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1721de5ea6b6SNeel Natu 		if (hostcpu != curcpu) {
1722ef39d7e9SNeel Natu 			if (lapic_intr) {
1723add611fdSNeel Natu 				vlapic_post_intr(vcpu->vlapic, hostcpu,
1724add611fdSNeel Natu 				    vmm_ipinum);
1725ef39d7e9SNeel Natu 			} else {
172675dd3366SNeel Natu 				ipi_cpu(hostcpu, vmm_ipinum);
172775dd3366SNeel Natu 			}
1728ef39d7e9SNeel Natu 		} else {
1729ef39d7e9SNeel Natu 			/*
1730ef39d7e9SNeel Natu 			 * If the 'vcpu' is running on 'curcpu' then it must
1731ef39d7e9SNeel Natu 			 * be sending a notification to itself (e.g. SELF_IPI).
1732ef39d7e9SNeel Natu 			 * The pending event will be picked up when the vcpu
1733ef39d7e9SNeel Natu 			 * transitions back to guest context.
1734ef39d7e9SNeel Natu 			 */
1735ef39d7e9SNeel Natu 		}
1736ef39d7e9SNeel Natu 	} else {
1737ef39d7e9SNeel Natu 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1738ef39d7e9SNeel Natu 		    "with hostcpu %d", vcpu->state, hostcpu));
1739ef39d7e9SNeel Natu 		if (vcpu->state == VCPU_SLEEPING)
1740ef39d7e9SNeel Natu 			wakeup_one(vcpu);
1741de5ea6b6SNeel Natu 	}
1742f76fc5d4SNeel Natu 	vcpu_unlock(vcpu);
1743f76fc5d4SNeel Natu }
1744318224bbSNeel Natu 
1745318224bbSNeel Natu struct vmspace *
1746318224bbSNeel Natu vm_get_vmspace(struct vm *vm)
1747318224bbSNeel Natu {
1748318224bbSNeel Natu 
1749318224bbSNeel Natu 	return (vm->vmspace);
1750318224bbSNeel Natu }
1751565bbb86SNeel Natu 
1752565bbb86SNeel Natu int
1753565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid)
1754565bbb86SNeel Natu {
1755565bbb86SNeel Natu 	/*
1756565bbb86SNeel Natu 	 * XXX apic id is assumed to be numerically identical to vcpu id
1757565bbb86SNeel Natu 	 */
1758565bbb86SNeel Natu 	return (apicid);
1759565bbb86SNeel Natu }
17605b8a8cd1SNeel Natu 
17615b8a8cd1SNeel Natu void
17625b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
17635b8a8cd1SNeel Natu     vm_rendezvous_func_t func, void *arg)
17645b8a8cd1SNeel Natu {
1765970955e4SNeel Natu 	int i;
1766970955e4SNeel Natu 
17675b8a8cd1SNeel Natu 	/*
17685b8a8cd1SNeel Natu 	 * Enforce that this function is called without any locks
17695b8a8cd1SNeel Natu 	 */
17705b8a8cd1SNeel Natu 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
17715b8a8cd1SNeel Natu 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
17725b8a8cd1SNeel Natu 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
17735b8a8cd1SNeel Natu 
17745b8a8cd1SNeel Natu restart:
17755b8a8cd1SNeel Natu 	mtx_lock(&vm->rendezvous_mtx);
17765b8a8cd1SNeel Natu 	if (vm->rendezvous_func != NULL) {
17775b8a8cd1SNeel Natu 		/*
17785b8a8cd1SNeel Natu 		 * If a rendezvous is already in progress then we need to
17795b8a8cd1SNeel Natu 		 * call the rendezvous handler in case this 'vcpuid' is one
17805b8a8cd1SNeel Natu 		 * of the targets of the rendezvous.
17815b8a8cd1SNeel Natu 		 */
17825b8a8cd1SNeel Natu 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
17835b8a8cd1SNeel Natu 		mtx_unlock(&vm->rendezvous_mtx);
17845b8a8cd1SNeel Natu 		vm_handle_rendezvous(vm, vcpuid);
17855b8a8cd1SNeel Natu 		goto restart;
17865b8a8cd1SNeel Natu 	}
17875b8a8cd1SNeel Natu 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
17885b8a8cd1SNeel Natu 	    "rendezvous is still in progress"));
17895b8a8cd1SNeel Natu 
17905b8a8cd1SNeel Natu 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
17915b8a8cd1SNeel Natu 	vm->rendezvous_req_cpus = dest;
17925b8a8cd1SNeel Natu 	CPU_ZERO(&vm->rendezvous_done_cpus);
17935b8a8cd1SNeel Natu 	vm->rendezvous_arg = arg;
17945b8a8cd1SNeel Natu 	vm_set_rendezvous_func(vm, func);
17955b8a8cd1SNeel Natu 	mtx_unlock(&vm->rendezvous_mtx);
17965b8a8cd1SNeel Natu 
1797970955e4SNeel Natu 	/*
1798970955e4SNeel Natu 	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
1799970955e4SNeel Natu 	 * vcpus so they handle the rendezvous as soon as possible.
1800970955e4SNeel Natu 	 */
1801970955e4SNeel Natu 	for (i = 0; i < VM_MAXCPU; i++) {
1802970955e4SNeel Natu 		if (CPU_ISSET(i, &dest))
1803970955e4SNeel Natu 			vcpu_notify_event(vm, i, false);
1804970955e4SNeel Natu 	}
1805970955e4SNeel Natu 
18065b8a8cd1SNeel Natu 	vm_handle_rendezvous(vm, vcpuid);
18075b8a8cd1SNeel Natu }
1808762fd208STycho Nightingale 
1809762fd208STycho Nightingale struct vatpic *
1810762fd208STycho Nightingale vm_atpic(struct vm *vm)
1811762fd208STycho Nightingale {
1812762fd208STycho Nightingale 	return (vm->vatpic);
1813762fd208STycho Nightingale }
1814e883c9bbSTycho Nightingale 
1815e883c9bbSTycho Nightingale struct vatpit *
1816e883c9bbSTycho Nightingale vm_atpit(struct vm *vm)
1817e883c9bbSTycho Nightingale {
1818e883c9bbSTycho Nightingale 	return (vm->vatpit);
1819e883c9bbSTycho Nightingale }
1820