xref: /freebsd/sys/amd64/vmm/vmm.c (revision 4abd7edcbde21ba7a089c7d1a0bba8f87ebece06)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/sysctl.h>
37 #include <sys/malloc.h>
38 #include <sys/pcpu.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/rwlock.h>
43 #include <sys/sched.h>
44 #include <sys/smp.h>
45 #include <sys/systm.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_object.h>
49 #include <vm/vm_page.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/cpu.h>
56 #include <machine/vm.h>
57 #include <machine/pcb.h>
58 #include <machine/smp.h>
59 #include <x86/psl.h>
60 #include <x86/apicreg.h>
61 #include <machine/vmparam.h>
62 
63 #include <machine/vmm.h>
64 #include <machine/vmm_dev.h>
65 
66 #include "vmm_ktr.h"
67 #include "vmm_host.h"
68 #include "vmm_mem.h"
69 #include "vmm_util.h"
70 #include "vhpet.h"
71 #include "vioapic.h"
72 #include "vlapic.h"
73 #include "vmm_msr.h"
74 #include "vmm_ipi.h"
75 #include "vmm_stat.h"
76 #include "vmm_lapic.h"
77 
78 #include "io/ppt.h"
79 #include "io/iommu.h"
80 
81 struct vlapic;
82 
83 struct vcpu {
84 	int		flags;
85 	enum vcpu_state	state;
86 	struct mtx	mtx;
87 	int		hostcpu;	/* host cpuid this vcpu last ran on */
88 	uint64_t	guest_msrs[VMM_MSR_NUM];
89 	struct vlapic	*vlapic;
90 	int		 vcpuid;
91 	struct savefpu	*guestfpu;	/* guest fpu state */
92 	void		*stats;
93 	struct vm_exit	exitinfo;
94 	enum x2apic_state x2apic_state;
95 	int		nmi_pending;
96 };
97 
98 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
99 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
100 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
101 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
102 
103 struct mem_seg {
104 	vm_paddr_t	gpa;
105 	size_t		len;
106 	boolean_t	wired;
107 	vm_object_t	object;
108 };
109 #define	VM_MAX_MEMORY_SEGMENTS	2
110 
111 struct vm {
112 	void		*cookie;	/* processor-specific data */
113 	void		*iommu;		/* iommu-specific data */
114 	struct vhpet	*vhpet;		/* virtual HPET */
115 	struct vioapic	*vioapic;	/* virtual ioapic */
116 	struct vmspace	*vmspace;	/* guest's address space */
117 	struct vcpu	vcpu[VM_MAXCPU];
118 	int		num_mem_segs;
119 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
120 	char		name[VM_MAX_NAMELEN];
121 
122 	/*
123 	 * Set of active vcpus.
124 	 * An active vcpu is one that has been started implicitly (BSP) or
125 	 * explicitly (AP) by sending it a startup ipi.
126 	 */
127 	cpuset_t	active_cpus;
128 };
129 
130 static int vmm_initialized;
131 
132 static struct vmm_ops *ops;
133 #define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
134 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
135 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
136 
137 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
138 #define	VMRUN(vmi, vcpu, rip, pmap) \
139 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO)
140 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
141 #define	VMSPACE_ALLOC(min, max) \
142 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
143 #define	VMSPACE_FREE(vmspace) \
144 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
145 #define	VMGETREG(vmi, vcpu, num, retval)		\
146 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
147 #define	VMSETREG(vmi, vcpu, num, val)		\
148 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
149 #define	VMGETDESC(vmi, vcpu, num, desc)		\
150 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
151 #define	VMSETDESC(vmi, vcpu, num, desc)		\
152 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
153 #define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
154 	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
155 #define	VMGETCAP(vmi, vcpu, num, retval)	\
156 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
157 #define	VMSETCAP(vmi, vcpu, num, val)		\
158 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
159 #define	VLAPIC_INIT(vmi, vcpu)			\
160 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
161 #define	VLAPIC_CLEANUP(vmi, vlapic)		\
162 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
163 
164 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
165 #define	fpu_stop_emulating()	clts()
166 
167 static MALLOC_DEFINE(M_VM, "vm", "vm");
168 CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
169 
170 /* statistics */
171 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
172 
173 static void
174 vcpu_cleanup(struct vm *vm, int i)
175 {
176 	struct vcpu *vcpu = &vm->vcpu[i];
177 
178 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
179 	vmm_stat_free(vcpu->stats);
180 	fpu_save_area_free(vcpu->guestfpu);
181 }
182 
183 static void
184 vcpu_init(struct vm *vm, uint32_t vcpu_id)
185 {
186 	struct vcpu *vcpu;
187 
188 	vcpu = &vm->vcpu[vcpu_id];
189 
190 	vcpu_lock_init(vcpu);
191 	vcpu->hostcpu = NOCPU;
192 	vcpu->vcpuid = vcpu_id;
193 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
194 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
195 	vcpu->guestfpu = fpu_save_area_alloc();
196 	fpu_save_area_reset(vcpu->guestfpu);
197 	vcpu->stats = vmm_stat_alloc();
198 }
199 
200 struct vm_exit *
201 vm_exitinfo(struct vm *vm, int cpuid)
202 {
203 	struct vcpu *vcpu;
204 
205 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
206 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
207 
208 	vcpu = &vm->vcpu[cpuid];
209 
210 	return (&vcpu->exitinfo);
211 }
212 
213 static void
214 vmm_resume(void)
215 {
216 	VMM_RESUME();
217 }
218 
219 static int
220 vmm_init(void)
221 {
222 	int error;
223 
224 	vmm_host_state_init();
225 	vmm_ipi_init();
226 
227 	error = vmm_mem_init();
228 	if (error)
229 		return (error);
230 
231 	if (vmm_is_intel())
232 		ops = &vmm_ops_intel;
233 	else if (vmm_is_amd())
234 		ops = &vmm_ops_amd;
235 	else
236 		return (ENXIO);
237 
238 	vmm_msr_init();
239 	vmm_resume_p = vmm_resume;
240 
241 	return (VMM_INIT());
242 }
243 
244 static int
245 vmm_handler(module_t mod, int what, void *arg)
246 {
247 	int error;
248 
249 	switch (what) {
250 	case MOD_LOAD:
251 		vmmdev_init();
252 		iommu_init();
253 		error = vmm_init();
254 		if (error == 0)
255 			vmm_initialized = 1;
256 		break;
257 	case MOD_UNLOAD:
258 		error = vmmdev_cleanup();
259 		if (error == 0) {
260 			vmm_resume_p = NULL;
261 			iommu_cleanup();
262 			vmm_ipi_cleanup();
263 			error = VMM_CLEANUP();
264 			/*
265 			 * Something bad happened - prevent new
266 			 * VMs from being created
267 			 */
268 			if (error)
269 				vmm_initialized = 0;
270 		}
271 		break;
272 	default:
273 		error = 0;
274 		break;
275 	}
276 	return (error);
277 }
278 
279 static moduledata_t vmm_kmod = {
280 	"vmm",
281 	vmm_handler,
282 	NULL
283 };
284 
285 /*
286  * vmm initialization has the following dependencies:
287  *
288  * - iommu initialization must happen after the pci passthru driver has had
289  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
290  *
291  * - VT-x initialization requires smp_rendezvous() and therefore must happen
292  *   after SMP is fully functional (after SI_SUB_SMP).
293  */
294 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
295 MODULE_VERSION(vmm, 1);
296 
297 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
298 
299 int
300 vm_create(const char *name, struct vm **retvm)
301 {
302 	int i;
303 	struct vm *vm;
304 	struct vmspace *vmspace;
305 
306 	const int BSP = 0;
307 
308 	/*
309 	 * If vmm.ko could not be successfully initialized then don't attempt
310 	 * to create the virtual machine.
311 	 */
312 	if (!vmm_initialized)
313 		return (ENXIO);
314 
315 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
316 		return (EINVAL);
317 
318 	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
319 	if (vmspace == NULL)
320 		return (ENOMEM);
321 
322 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
323 	strcpy(vm->name, name);
324 	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
325 	vm->vioapic = vioapic_init(vm);
326 	vm->vhpet = vhpet_init(vm);
327 
328 	for (i = 0; i < VM_MAXCPU; i++) {
329 		vcpu_init(vm, i);
330 		guest_msrs_init(vm, i);
331 	}
332 
333 	vm_activate_cpu(vm, BSP);
334 	vm->vmspace = vmspace;
335 
336 	*retvm = vm;
337 	return (0);
338 }
339 
340 static void
341 vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
342 {
343 
344 	if (seg->object != NULL)
345 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
346 
347 	bzero(seg, sizeof(*seg));
348 }
349 
350 void
351 vm_destroy(struct vm *vm)
352 {
353 	int i;
354 
355 	ppt_unassign_all(vm);
356 
357 	if (vm->iommu != NULL)
358 		iommu_destroy_domain(vm->iommu);
359 
360 	vhpet_cleanup(vm->vhpet);
361 	vioapic_cleanup(vm->vioapic);
362 
363 	for (i = 0; i < vm->num_mem_segs; i++)
364 		vm_free_mem_seg(vm, &vm->mem_segs[i]);
365 
366 	vm->num_mem_segs = 0;
367 
368 	for (i = 0; i < VM_MAXCPU; i++)
369 		vcpu_cleanup(vm, i);
370 
371 	VMSPACE_FREE(vm->vmspace);
372 
373 	VMCLEANUP(vm->cookie);
374 
375 	free(vm, M_VM);
376 }
377 
378 const char *
379 vm_name(struct vm *vm)
380 {
381 	return (vm->name);
382 }
383 
384 int
385 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
386 {
387 	vm_object_t obj;
388 
389 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
390 		return (ENOMEM);
391 	else
392 		return (0);
393 }
394 
395 int
396 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
397 {
398 
399 	vmm_mmio_free(vm->vmspace, gpa, len);
400 	return (0);
401 }
402 
403 boolean_t
404 vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
405 {
406 	int i;
407 	vm_paddr_t gpabase, gpalimit;
408 
409 	for (i = 0; i < vm->num_mem_segs; i++) {
410 		gpabase = vm->mem_segs[i].gpa;
411 		gpalimit = gpabase + vm->mem_segs[i].len;
412 		if (gpa >= gpabase && gpa < gpalimit)
413 			return (TRUE);		/* 'gpa' is regular memory */
414 	}
415 
416 	if (ppt_is_mmio(vm, gpa))
417 		return (TRUE);			/* 'gpa' is pci passthru mmio */
418 
419 	return (FALSE);
420 }
421 
422 int
423 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
424 {
425 	int available, allocated;
426 	struct mem_seg *seg;
427 	vm_object_t object;
428 	vm_paddr_t g;
429 
430 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
431 		return (EINVAL);
432 
433 	available = allocated = 0;
434 	g = gpa;
435 	while (g < gpa + len) {
436 		if (vm_mem_allocated(vm, g))
437 			allocated++;
438 		else
439 			available++;
440 
441 		g += PAGE_SIZE;
442 	}
443 
444 	/*
445 	 * If there are some allocated and some available pages in the address
446 	 * range then it is an error.
447 	 */
448 	if (allocated && available)
449 		return (EINVAL);
450 
451 	/*
452 	 * If the entire address range being requested has already been
453 	 * allocated then there isn't anything more to do.
454 	 */
455 	if (allocated && available == 0)
456 		return (0);
457 
458 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
459 		return (E2BIG);
460 
461 	seg = &vm->mem_segs[vm->num_mem_segs];
462 
463 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
464 		return (ENOMEM);
465 
466 	seg->gpa = gpa;
467 	seg->len = len;
468 	seg->object = object;
469 	seg->wired = FALSE;
470 
471 	vm->num_mem_segs++;
472 
473 	return (0);
474 }
475 
476 static void
477 vm_gpa_unwire(struct vm *vm)
478 {
479 	int i, rv;
480 	struct mem_seg *seg;
481 
482 	for (i = 0; i < vm->num_mem_segs; i++) {
483 		seg = &vm->mem_segs[i];
484 		if (!seg->wired)
485 			continue;
486 
487 		rv = vm_map_unwire(&vm->vmspace->vm_map,
488 				   seg->gpa, seg->gpa + seg->len,
489 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
490 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
491 		    "%#lx/%ld could not be unwired: %d",
492 		    vm_name(vm), seg->gpa, seg->len, rv));
493 
494 		seg->wired = FALSE;
495 	}
496 }
497 
498 static int
499 vm_gpa_wire(struct vm *vm)
500 {
501 	int i, rv;
502 	struct mem_seg *seg;
503 
504 	for (i = 0; i < vm->num_mem_segs; i++) {
505 		seg = &vm->mem_segs[i];
506 		if (seg->wired)
507 			continue;
508 
509 		/* XXX rlimits? */
510 		rv = vm_map_wire(&vm->vmspace->vm_map,
511 				 seg->gpa, seg->gpa + seg->len,
512 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
513 		if (rv != KERN_SUCCESS)
514 			break;
515 
516 		seg->wired = TRUE;
517 	}
518 
519 	if (i < vm->num_mem_segs) {
520 		/*
521 		 * Undo the wiring before returning an error.
522 		 */
523 		vm_gpa_unwire(vm);
524 		return (EAGAIN);
525 	}
526 
527 	return (0);
528 }
529 
530 static void
531 vm_iommu_modify(struct vm *vm, boolean_t map)
532 {
533 	int i, sz;
534 	vm_paddr_t gpa, hpa;
535 	struct mem_seg *seg;
536 	void *vp, *cookie, *host_domain;
537 
538 	sz = PAGE_SIZE;
539 	host_domain = iommu_host_domain();
540 
541 	for (i = 0; i < vm->num_mem_segs; i++) {
542 		seg = &vm->mem_segs[i];
543 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
544 		    vm_name(vm), seg->gpa, seg->len));
545 
546 		gpa = seg->gpa;
547 		while (gpa < seg->gpa + seg->len) {
548 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
549 					 &cookie);
550 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
551 			    vm_name(vm), gpa));
552 
553 			vm_gpa_release(cookie);
554 
555 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
556 			if (map) {
557 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
558 				iommu_remove_mapping(host_domain, hpa, sz);
559 			} else {
560 				iommu_remove_mapping(vm->iommu, gpa, sz);
561 				iommu_create_mapping(host_domain, hpa, hpa, sz);
562 			}
563 
564 			gpa += PAGE_SIZE;
565 		}
566 	}
567 
568 	/*
569 	 * Invalidate the cached translations associated with the domain
570 	 * from which pages were removed.
571 	 */
572 	if (map)
573 		iommu_invalidate_tlb(host_domain);
574 	else
575 		iommu_invalidate_tlb(vm->iommu);
576 }
577 
578 #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
579 #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
580 
581 int
582 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
583 {
584 	int error;
585 
586 	error = ppt_unassign_device(vm, bus, slot, func);
587 	if (error)
588 		return (error);
589 
590 	if (ppt_num_devices(vm) == 0) {
591 		vm_iommu_unmap(vm);
592 		vm_gpa_unwire(vm);
593 	}
594 	return (0);
595 }
596 
597 int
598 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
599 {
600 	int error;
601 	vm_paddr_t maxaddr;
602 
603 	/*
604 	 * Virtual machines with pci passthru devices get special treatment:
605 	 * - the guest physical memory is wired
606 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
607 	 *
608 	 * We need to do this before the first pci passthru device is attached.
609 	 */
610 	if (ppt_num_devices(vm) == 0) {
611 		KASSERT(vm->iommu == NULL,
612 		    ("vm_assign_pptdev: iommu must be NULL"));
613 		maxaddr = vmm_mem_maxaddr();
614 		vm->iommu = iommu_create_domain(maxaddr);
615 
616 		error = vm_gpa_wire(vm);
617 		if (error)
618 			return (error);
619 
620 		vm_iommu_map(vm);
621 	}
622 
623 	error = ppt_assign_device(vm, bus, slot, func);
624 	return (error);
625 }
626 
627 void *
628 vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
629 	    void **cookie)
630 {
631 	int count, pageoff;
632 	vm_page_t m;
633 
634 	pageoff = gpa & PAGE_MASK;
635 	if (len > PAGE_SIZE - pageoff)
636 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
637 
638 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
639 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
640 
641 	if (count == 1) {
642 		*cookie = m;
643 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
644 	} else {
645 		*cookie = NULL;
646 		return (NULL);
647 	}
648 }
649 
650 void
651 vm_gpa_release(void *cookie)
652 {
653 	vm_page_t m = cookie;
654 
655 	vm_page_lock(m);
656 	vm_page_unhold(m);
657 	vm_page_unlock(m);
658 }
659 
660 int
661 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
662 		  struct vm_memory_segment *seg)
663 {
664 	int i;
665 
666 	for (i = 0; i < vm->num_mem_segs; i++) {
667 		if (gpabase == vm->mem_segs[i].gpa) {
668 			seg->gpa = vm->mem_segs[i].gpa;
669 			seg->len = vm->mem_segs[i].len;
670 			seg->wired = vm->mem_segs[i].wired;
671 			return (0);
672 		}
673 	}
674 	return (-1);
675 }
676 
677 int
678 vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
679 	      vm_offset_t *offset, struct vm_object **object)
680 {
681 	int i;
682 	size_t seg_len;
683 	vm_paddr_t seg_gpa;
684 	vm_object_t seg_obj;
685 
686 	for (i = 0; i < vm->num_mem_segs; i++) {
687 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
688 			continue;
689 
690 		seg_gpa = vm->mem_segs[i].gpa;
691 		seg_len = vm->mem_segs[i].len;
692 
693 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
694 			*offset = gpa - seg_gpa;
695 			*object = seg_obj;
696 			vm_object_reference(seg_obj);
697 			return (0);
698 		}
699 	}
700 
701 	return (EINVAL);
702 }
703 
704 int
705 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
706 {
707 
708 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
709 		return (EINVAL);
710 
711 	if (reg >= VM_REG_LAST)
712 		return (EINVAL);
713 
714 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
715 }
716 
717 int
718 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
719 {
720 
721 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
722 		return (EINVAL);
723 
724 	if (reg >= VM_REG_LAST)
725 		return (EINVAL);
726 
727 	return (VMSETREG(vm->cookie, vcpu, reg, val));
728 }
729 
730 static boolean_t
731 is_descriptor_table(int reg)
732 {
733 
734 	switch (reg) {
735 	case VM_REG_GUEST_IDTR:
736 	case VM_REG_GUEST_GDTR:
737 		return (TRUE);
738 	default:
739 		return (FALSE);
740 	}
741 }
742 
743 static boolean_t
744 is_segment_register(int reg)
745 {
746 
747 	switch (reg) {
748 	case VM_REG_GUEST_ES:
749 	case VM_REG_GUEST_CS:
750 	case VM_REG_GUEST_SS:
751 	case VM_REG_GUEST_DS:
752 	case VM_REG_GUEST_FS:
753 	case VM_REG_GUEST_GS:
754 	case VM_REG_GUEST_TR:
755 	case VM_REG_GUEST_LDTR:
756 		return (TRUE);
757 	default:
758 		return (FALSE);
759 	}
760 }
761 
762 int
763 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
764 		struct seg_desc *desc)
765 {
766 
767 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
768 		return (EINVAL);
769 
770 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
771 		return (EINVAL);
772 
773 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
774 }
775 
776 int
777 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
778 		struct seg_desc *desc)
779 {
780 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
781 		return (EINVAL);
782 
783 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
784 		return (EINVAL);
785 
786 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
787 }
788 
789 static void
790 restore_guest_fpustate(struct vcpu *vcpu)
791 {
792 
793 	/* flush host state to the pcb */
794 	fpuexit(curthread);
795 
796 	/* restore guest FPU state */
797 	fpu_stop_emulating();
798 	fpurestore(vcpu->guestfpu);
799 
800 	/*
801 	 * The FPU is now "dirty" with the guest's state so turn on emulation
802 	 * to trap any access to the FPU by the host.
803 	 */
804 	fpu_start_emulating();
805 }
806 
807 static void
808 save_guest_fpustate(struct vcpu *vcpu)
809 {
810 
811 	if ((rcr0() & CR0_TS) == 0)
812 		panic("fpu emulation not enabled in host!");
813 
814 	/* save guest FPU state */
815 	fpu_stop_emulating();
816 	fpusave(vcpu->guestfpu);
817 	fpu_start_emulating();
818 }
819 
820 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
821 
822 static int
823 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
824     bool from_idle)
825 {
826 	int error;
827 
828 	vcpu_assert_locked(vcpu);
829 
830 	/*
831 	 * State transitions from the vmmdev_ioctl() must always begin from
832 	 * the VCPU_IDLE state. This guarantees that there is only a single
833 	 * ioctl() operating on a vcpu at any point.
834 	 */
835 	if (from_idle) {
836 		while (vcpu->state != VCPU_IDLE)
837 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
838 	} else {
839 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
840 		    "vcpu idle state"));
841 	}
842 
843 	/*
844 	 * The following state transitions are allowed:
845 	 * IDLE -> FROZEN -> IDLE
846 	 * FROZEN -> RUNNING -> FROZEN
847 	 * FROZEN -> SLEEPING -> FROZEN
848 	 */
849 	switch (vcpu->state) {
850 	case VCPU_IDLE:
851 	case VCPU_RUNNING:
852 	case VCPU_SLEEPING:
853 		error = (newstate != VCPU_FROZEN);
854 		break;
855 	case VCPU_FROZEN:
856 		error = (newstate == VCPU_FROZEN);
857 		break;
858 	default:
859 		error = 1;
860 		break;
861 	}
862 
863 	if (error)
864 		return (EBUSY);
865 
866 	vcpu->state = newstate;
867 	if (newstate == VCPU_IDLE)
868 		wakeup(&vcpu->state);
869 
870 	return (0);
871 }
872 
873 static void
874 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
875 {
876 	int error;
877 
878 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
879 		panic("Error %d setting state to %d\n", error, newstate);
880 }
881 
882 static void
883 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
884 {
885 	int error;
886 
887 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
888 		panic("Error %d setting state to %d", error, newstate);
889 }
890 
891 /*
892  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
893  */
894 static int
895 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
896 {
897 	struct vm_exit *vmexit;
898 	struct vcpu *vcpu;
899 	int t, timo;
900 
901 	vcpu = &vm->vcpu[vcpuid];
902 
903 	vcpu_lock(vcpu);
904 
905 	/*
906 	 * Do a final check for pending NMI or interrupts before
907 	 * really putting this thread to sleep.
908 	 *
909 	 * These interrupts could have happened any time after we
910 	 * returned from VMRUN() and before we grabbed the vcpu lock.
911 	 */
912 	if (!vm_nmi_pending(vm, vcpuid) &&
913 	    (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
914 		t = ticks;
915 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
916 		if (vlapic_enabled(vcpu->vlapic)) {
917 			/*
918 			 * XXX msleep_spin() is not interruptible so use the
919 			 * 'timo' to put an upper bound on the sleep time.
920 			 */
921 			timo = hz;
922 			msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
923 		} else {
924 			/*
925 			 * Spindown the vcpu if the apic is disabled and it
926 			 * had entered the halted state.
927 			 */
928 			*retu = true;
929 			vmexit = vm_exitinfo(vm, vcpuid);
930 			vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
931 			VCPU_CTR0(vm, vcpuid, "spinning down cpu");
932 		}
933 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
934 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
935 	}
936 	vcpu_unlock(vcpu);
937 
938 	return (0);
939 }
940 
941 static int
942 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
943 {
944 	int rv, ftype;
945 	struct vm_map *map;
946 	struct vcpu *vcpu;
947 	struct vm_exit *vme;
948 
949 	vcpu = &vm->vcpu[vcpuid];
950 	vme = &vcpu->exitinfo;
951 
952 	ftype = vme->u.paging.fault_type;
953 	KASSERT(ftype == VM_PROT_READ ||
954 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
955 	    ("vm_handle_paging: invalid fault_type %d", ftype));
956 
957 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
958 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
959 		    vme->u.paging.gpa, ftype);
960 		if (rv == 0)
961 			goto done;
962 	}
963 
964 	map = &vm->vmspace->vm_map;
965 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
966 
967 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
968 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
969 
970 	if (rv != KERN_SUCCESS)
971 		return (EFAULT);
972 done:
973 	/* restart execution at the faulting instruction */
974 	vme->inst_length = 0;
975 
976 	return (0);
977 }
978 
979 static int
980 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
981 {
982 	struct vie *vie;
983 	struct vcpu *vcpu;
984 	struct vm_exit *vme;
985 	int error, inst_length;
986 	uint64_t rip, gla, gpa, cr3;
987 	mem_region_read_t mread;
988 	mem_region_write_t mwrite;
989 
990 	vcpu = &vm->vcpu[vcpuid];
991 	vme = &vcpu->exitinfo;
992 
993 	rip = vme->rip;
994 	inst_length = vme->inst_length;
995 
996 	gla = vme->u.inst_emul.gla;
997 	gpa = vme->u.inst_emul.gpa;
998 	cr3 = vme->u.inst_emul.cr3;
999 	vie = &vme->u.inst_emul.vie;
1000 
1001 	vie_init(vie);
1002 
1003 	/* Fetch, decode and emulate the faulting instruction */
1004 	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0)
1005 		return (EFAULT);
1006 
1007 	if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0)
1008 		return (EFAULT);
1009 
1010 	/* return to userland unless this is an in-kernel emulated device */
1011 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1012 		mread = lapic_mmio_read;
1013 		mwrite = lapic_mmio_write;
1014 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1015 		mread = vioapic_mmio_read;
1016 		mwrite = vioapic_mmio_write;
1017 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1018 		mread = vhpet_mmio_read;
1019 		mwrite = vhpet_mmio_write;
1020 	} else {
1021 		*retu = true;
1022 		return (0);
1023 	}
1024 
1025 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1026 	    retu);
1027 
1028 	return (error);
1029 }
1030 
1031 int
1032 vm_run(struct vm *vm, struct vm_run *vmrun)
1033 {
1034 	int error, vcpuid;
1035 	struct vcpu *vcpu;
1036 	struct pcb *pcb;
1037 	uint64_t tscval, rip;
1038 	struct vm_exit *vme;
1039 	bool retu, intr_disabled;
1040 	pmap_t pmap;
1041 
1042 	vcpuid = vmrun->cpuid;
1043 
1044 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1045 		return (EINVAL);
1046 
1047 	pmap = vmspace_pmap(vm->vmspace);
1048 	vcpu = &vm->vcpu[vcpuid];
1049 	vme = &vcpu->exitinfo;
1050 	rip = vmrun->rip;
1051 restart:
1052 	critical_enter();
1053 
1054 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1055 	    ("vm_run: absurd pm_active"));
1056 
1057 	tscval = rdtsc();
1058 
1059 	pcb = PCPU_GET(curpcb);
1060 	set_pcb_flags(pcb, PCB_FULL_IRET);
1061 
1062 	restore_guest_msrs(vm, vcpuid);
1063 	restore_guest_fpustate(vcpu);
1064 
1065 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1066 	vcpu->hostcpu = curcpu;
1067 	error = VMRUN(vm->cookie, vcpuid, rip, pmap);
1068 	vcpu->hostcpu = NOCPU;
1069 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1070 
1071 	save_guest_fpustate(vcpu);
1072 	restore_host_msrs(vm, vcpuid);
1073 
1074 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1075 
1076 	critical_exit();
1077 
1078 	if (error == 0) {
1079 		retu = false;
1080 		switch (vme->exitcode) {
1081 		case VM_EXITCODE_HLT:
1082 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1083 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1084 			break;
1085 		case VM_EXITCODE_PAGING:
1086 			error = vm_handle_paging(vm, vcpuid, &retu);
1087 			break;
1088 		case VM_EXITCODE_INST_EMUL:
1089 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1090 			break;
1091 		default:
1092 			retu = true;	/* handled in userland */
1093 			break;
1094 		}
1095 	}
1096 
1097 	if (error == 0 && retu == false) {
1098 		rip = vme->rip + vme->inst_length;
1099 		goto restart;
1100 	}
1101 
1102 	/* copy the exit information */
1103 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1104 	return (error);
1105 }
1106 
1107 int
1108 vm_inject_event(struct vm *vm, int vcpuid, int type,
1109 		int vector, uint32_t code, int code_valid)
1110 {
1111 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1112 		return (EINVAL);
1113 
1114 	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
1115 		return (EINVAL);
1116 
1117 	if (vector < 0 || vector > 255)
1118 		return (EINVAL);
1119 
1120 	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
1121 }
1122 
1123 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1124 
1125 int
1126 vm_inject_nmi(struct vm *vm, int vcpuid)
1127 {
1128 	struct vcpu *vcpu;
1129 
1130 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1131 		return (EINVAL);
1132 
1133 	vcpu = &vm->vcpu[vcpuid];
1134 
1135 	vcpu->nmi_pending = 1;
1136 	vcpu_notify_event(vm, vcpuid, false);
1137 	return (0);
1138 }
1139 
1140 int
1141 vm_nmi_pending(struct vm *vm, int vcpuid)
1142 {
1143 	struct vcpu *vcpu;
1144 
1145 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1146 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1147 
1148 	vcpu = &vm->vcpu[vcpuid];
1149 
1150 	return (vcpu->nmi_pending);
1151 }
1152 
1153 void
1154 vm_nmi_clear(struct vm *vm, int vcpuid)
1155 {
1156 	struct vcpu *vcpu;
1157 
1158 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1159 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1160 
1161 	vcpu = &vm->vcpu[vcpuid];
1162 
1163 	if (vcpu->nmi_pending == 0)
1164 		panic("vm_nmi_clear: inconsistent nmi_pending state");
1165 
1166 	vcpu->nmi_pending = 0;
1167 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1168 }
1169 
1170 int
1171 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1172 {
1173 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1174 		return (EINVAL);
1175 
1176 	if (type < 0 || type >= VM_CAP_MAX)
1177 		return (EINVAL);
1178 
1179 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1180 }
1181 
1182 int
1183 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
1184 {
1185 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1186 		return (EINVAL);
1187 
1188 	if (type < 0 || type >= VM_CAP_MAX)
1189 		return (EINVAL);
1190 
1191 	return (VMSETCAP(vm->cookie, vcpu, type, val));
1192 }
1193 
1194 uint64_t *
1195 vm_guest_msrs(struct vm *vm, int cpu)
1196 {
1197 	return (vm->vcpu[cpu].guest_msrs);
1198 }
1199 
1200 struct vlapic *
1201 vm_lapic(struct vm *vm, int cpu)
1202 {
1203 	return (vm->vcpu[cpu].vlapic);
1204 }
1205 
1206 struct vioapic *
1207 vm_ioapic(struct vm *vm)
1208 {
1209 
1210 	return (vm->vioapic);
1211 }
1212 
1213 struct vhpet *
1214 vm_hpet(struct vm *vm)
1215 {
1216 
1217 	return (vm->vhpet);
1218 }
1219 
1220 boolean_t
1221 vmm_is_pptdev(int bus, int slot, int func)
1222 {
1223 	int found, i, n;
1224 	int b, s, f;
1225 	char *val, *cp, *cp2;
1226 
1227 	/*
1228 	 * XXX
1229 	 * The length of an environment variable is limited to 128 bytes which
1230 	 * puts an upper limit on the number of passthru devices that may be
1231 	 * specified using a single environment variable.
1232 	 *
1233 	 * Work around this by scanning multiple environment variable
1234 	 * names instead of a single one - yuck!
1235 	 */
1236 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1237 
1238 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1239 	found = 0;
1240 	for (i = 0; names[i] != NULL && !found; i++) {
1241 		cp = val = getenv(names[i]);
1242 		while (cp != NULL && *cp != '\0') {
1243 			if ((cp2 = strchr(cp, ' ')) != NULL)
1244 				*cp2 = '\0';
1245 
1246 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1247 			if (n == 3 && bus == b && slot == s && func == f) {
1248 				found = 1;
1249 				break;
1250 			}
1251 
1252 			if (cp2 != NULL)
1253 				*cp2++ = ' ';
1254 
1255 			cp = cp2;
1256 		}
1257 		freeenv(val);
1258 	}
1259 	return (found);
1260 }
1261 
1262 void *
1263 vm_iommu_domain(struct vm *vm)
1264 {
1265 
1266 	return (vm->iommu);
1267 }
1268 
1269 int
1270 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1271     bool from_idle)
1272 {
1273 	int error;
1274 	struct vcpu *vcpu;
1275 
1276 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1277 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1278 
1279 	vcpu = &vm->vcpu[vcpuid];
1280 
1281 	vcpu_lock(vcpu);
1282 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1283 	vcpu_unlock(vcpu);
1284 
1285 	return (error);
1286 }
1287 
1288 enum vcpu_state
1289 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1290 {
1291 	struct vcpu *vcpu;
1292 	enum vcpu_state state;
1293 
1294 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1295 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1296 
1297 	vcpu = &vm->vcpu[vcpuid];
1298 
1299 	vcpu_lock(vcpu);
1300 	state = vcpu->state;
1301 	if (hostcpu != NULL)
1302 		*hostcpu = vcpu->hostcpu;
1303 	vcpu_unlock(vcpu);
1304 
1305 	return (state);
1306 }
1307 
1308 void
1309 vm_activate_cpu(struct vm *vm, int vcpuid)
1310 {
1311 
1312 	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
1313 		CPU_SET(vcpuid, &vm->active_cpus);
1314 }
1315 
1316 cpuset_t
1317 vm_active_cpus(struct vm *vm)
1318 {
1319 
1320 	return (vm->active_cpus);
1321 }
1322 
1323 void *
1324 vcpu_stats(struct vm *vm, int vcpuid)
1325 {
1326 
1327 	return (vm->vcpu[vcpuid].stats);
1328 }
1329 
1330 int
1331 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1332 {
1333 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1334 		return (EINVAL);
1335 
1336 	*state = vm->vcpu[vcpuid].x2apic_state;
1337 
1338 	return (0);
1339 }
1340 
1341 int
1342 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1343 {
1344 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1345 		return (EINVAL);
1346 
1347 	if (state >= X2APIC_STATE_LAST)
1348 		return (EINVAL);
1349 
1350 	vm->vcpu[vcpuid].x2apic_state = state;
1351 
1352 	vlapic_set_x2apic_state(vm, vcpuid, state);
1353 
1354 	return (0);
1355 }
1356 
1357 /*
1358  * This function is called to ensure that a vcpu "sees" a pending event
1359  * as soon as possible:
1360  * - If the vcpu thread is sleeping then it is woken up.
1361  * - If the vcpu is running on a different host_cpu then an IPI will be directed
1362  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
1363  */
1364 void
1365 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
1366 {
1367 	int hostcpu;
1368 	struct vcpu *vcpu;
1369 
1370 	vcpu = &vm->vcpu[vcpuid];
1371 
1372 	vcpu_lock(vcpu);
1373 	hostcpu = vcpu->hostcpu;
1374 	if (hostcpu == NOCPU) {
1375 		if (vcpu->state == VCPU_SLEEPING)
1376 			wakeup_one(vcpu);
1377 	} else {
1378 		if (vcpu->state != VCPU_RUNNING)
1379 			panic("invalid vcpu state %d", vcpu->state);
1380 		if (hostcpu != curcpu) {
1381 			if (lapic_intr)
1382 				vlapic_post_intr(vcpu->vlapic, hostcpu);
1383 			else
1384 				ipi_cpu(hostcpu, vmm_ipinum);
1385 		}
1386 	}
1387 	vcpu_unlock(vcpu);
1388 }
1389 
1390 struct vmspace *
1391 vm_get_vmspace(struct vm *vm)
1392 {
1393 
1394 	return (vm->vmspace);
1395 }
1396 
1397 int
1398 vm_apicid2vcpuid(struct vm *vm, int apicid)
1399 {
1400 	/*
1401 	 * XXX apic id is assumed to be numerically identical to vcpu id
1402 	 */
1403 	return (apicid);
1404 }
1405