xref: /freebsd/sys/amd64/vmm/vmm.c (revision a223d3ed90bfe313ce5987d468a25a915d7d1254)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/sysctl.h>
37 #include <sys/malloc.h>
38 #include <sys/pcpu.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/rwlock.h>
43 #include <sys/sched.h>
44 #include <sys/smp.h>
45 #include <sys/systm.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_object.h>
49 #include <vm/vm_page.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/cpu.h>
56 #include <machine/vm.h>
57 #include <machine/pcb.h>
58 #include <machine/smp.h>
59 #include <x86/psl.h>
60 #include <x86/apicreg.h>
61 #include <machine/vmparam.h>
62 
63 #include <machine/vmm.h>
64 #include <machine/vmm_dev.h>
65 #include <machine/vmm_instruction_emul.h>
66 
67 #include "vmm_ioport.h"
68 #include "vmm_ktr.h"
69 #include "vmm_host.h"
70 #include "vmm_mem.h"
71 #include "vmm_util.h"
72 #include "vatpic.h"
73 #include "vatpit.h"
74 #include "vhpet.h"
75 #include "vioapic.h"
76 #include "vlapic.h"
77 #include "vmm_msr.h"
78 #include "vmm_ipi.h"
79 #include "vmm_stat.h"
80 #include "vmm_lapic.h"
81 
82 #include "io/ppt.h"
83 #include "io/iommu.h"
84 
85 struct vlapic;
86 
87 /*
88  * Initialization:
89  * (a) allocated when vcpu is created
90  * (i) initialized when vcpu is created and when it is reinitialized
91  * (o) initialized the first time the vcpu is created
92  * (x) initialized before use
93  */
94 struct vcpu {
95 	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
96 	enum vcpu_state	state;		/* (o) vcpu state */
97 	int		hostcpu;	/* (o) vcpu's host cpu */
98 	struct vlapic	*vlapic;	/* (i) APIC device model */
99 	enum x2apic_state x2apic_state;	/* (i) APIC mode */
100 	int		nmi_pending;	/* (i) NMI pending */
101 	int		extint_pending;	/* (i) INTR pending */
102 	struct vm_exception exception;	/* (x) exception collateral */
103 	int	exception_pending;	/* (i) exception pending */
104 	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
105 	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
106 	void		*stats;		/* (a,i) statistics */
107 	uint64_t guest_msrs[VMM_MSR_NUM]; /* (i) emulated MSRs */
108 	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
109 };
110 
111 #define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
112 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
113 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
114 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
115 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
116 
117 struct mem_seg {
118 	vm_paddr_t	gpa;
119 	size_t		len;
120 	boolean_t	wired;
121 	vm_object_t	object;
122 };
123 #define	VM_MAX_MEMORY_SEGMENTS	2
124 
125 /*
126  * Initialization:
127  * (o) initialized the first time the VM is created
128  * (i) initialized when VM is created and when it is reinitialized
129  * (x) initialized before use
130  */
131 struct vm {
132 	void		*cookie;		/* (i) cpu-specific data */
133 	void		*iommu;			/* (x) iommu-specific data */
134 	struct vhpet	*vhpet;			/* (i) virtual HPET */
135 	struct vioapic	*vioapic;		/* (i) virtual ioapic */
136 	struct vatpic	*vatpic;		/* (i) virtual atpic */
137 	struct vatpit	*vatpit;		/* (i) virtual atpit */
138 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
139 	int		suspend;		/* (i) stop VM execution */
140 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
141 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
142 	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
143 	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
144 	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
145 	vm_rendezvous_func_t rendezvous_func;
146 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
147 	int		num_mem_segs;		/* (o) guest memory segments */
148 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
149 	struct vmspace	*vmspace;		/* (o) guest's address space */
150 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
151 	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
152 };
153 
154 static int vmm_initialized;
155 
156 static struct vmm_ops *ops;
157 #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
158 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
159 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
160 
161 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
162 #define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
163 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
164 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
165 #define	VMSPACE_ALLOC(min, max) \
166 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
167 #define	VMSPACE_FREE(vmspace) \
168 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
169 #define	VMGETREG(vmi, vcpu, num, retval)		\
170 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
171 #define	VMSETREG(vmi, vcpu, num, val)		\
172 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
173 #define	VMGETDESC(vmi, vcpu, num, desc)		\
174 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
175 #define	VMSETDESC(vmi, vcpu, num, desc)		\
176 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
177 #define	VMGETCAP(vmi, vcpu, num, retval)	\
178 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
179 #define	VMSETCAP(vmi, vcpu, num, val)		\
180 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
181 #define	VLAPIC_INIT(vmi, vcpu)			\
182 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
183 #define	VLAPIC_CLEANUP(vmi, vlapic)		\
184 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
185 
186 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
187 #define	fpu_stop_emulating()	clts()
188 
189 static MALLOC_DEFINE(M_VM, "vm", "vm");
190 CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
191 
192 /* statistics */
193 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
194 
195 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
196 
197 /*
198  * Halt the guest if all vcpus are executing a HLT instruction with
199  * interrupts disabled.
200  */
201 static int halt_detection_enabled = 1;
202 TUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
203 SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
204     &halt_detection_enabled, 0,
205     "Halt VM if all vcpus execute HLT with interrupts disabled");
206 
207 static int vmm_ipinum;
208 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
209     "IPI vector used for vcpu notifications");
210 
211 static void
212 vcpu_cleanup(struct vm *vm, int i, bool destroy)
213 {
214 	struct vcpu *vcpu = &vm->vcpu[i];
215 
216 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
217 	if (destroy) {
218 		vmm_stat_free(vcpu->stats);
219 		fpu_save_area_free(vcpu->guestfpu);
220 	}
221 }
222 
223 static void
224 vcpu_init(struct vm *vm, int vcpu_id, bool create)
225 {
226 	struct vcpu *vcpu;
227 
228 	KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU,
229 	    ("vcpu_init: invalid vcpu %d", vcpu_id));
230 
231 	vcpu = &vm->vcpu[vcpu_id];
232 
233 	if (create) {
234 		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
235 		    "initialized", vcpu_id));
236 		vcpu_lock_init(vcpu);
237 		vcpu->state = VCPU_IDLE;
238 		vcpu->hostcpu = NOCPU;
239 		vcpu->guestfpu = fpu_save_area_alloc();
240 		vcpu->stats = vmm_stat_alloc();
241 	}
242 
243 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
244 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
245 	vcpu->nmi_pending = 0;
246 	vcpu->extint_pending = 0;
247 	vcpu->exception_pending = 0;
248 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
249 	fpu_save_area_reset(vcpu->guestfpu);
250 	vmm_stat_init(vcpu->stats);
251 	guest_msrs_init(vm, vcpu_id);
252 }
253 
254 struct vm_exit *
255 vm_exitinfo(struct vm *vm, int cpuid)
256 {
257 	struct vcpu *vcpu;
258 
259 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
260 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
261 
262 	vcpu = &vm->vcpu[cpuid];
263 
264 	return (&vcpu->exitinfo);
265 }
266 
267 static void
268 vmm_resume(void)
269 {
270 	VMM_RESUME();
271 }
272 
273 static int
274 vmm_init(void)
275 {
276 	int error;
277 
278 	vmm_host_state_init();
279 
280 	vmm_ipinum = vmm_ipi_alloc();
281 	if (vmm_ipinum == 0)
282 		vmm_ipinum = IPI_AST;
283 
284 	error = vmm_mem_init();
285 	if (error)
286 		return (error);
287 
288 	if (vmm_is_intel())
289 		ops = &vmm_ops_intel;
290 	else if (vmm_is_amd())
291 		ops = &vmm_ops_amd;
292 	else
293 		return (ENXIO);
294 
295 	vmm_msr_init();
296 	vmm_resume_p = vmm_resume;
297 
298 	return (VMM_INIT(vmm_ipinum));
299 }
300 
301 static int
302 vmm_handler(module_t mod, int what, void *arg)
303 {
304 	int error;
305 
306 	switch (what) {
307 	case MOD_LOAD:
308 		vmmdev_init();
309 		if (ppt_avail_devices() > 0)
310 			iommu_init();
311 		error = vmm_init();
312 		if (error == 0)
313 			vmm_initialized = 1;
314 		break;
315 	case MOD_UNLOAD:
316 		error = vmmdev_cleanup();
317 		if (error == 0) {
318 			vmm_resume_p = NULL;
319 			iommu_cleanup();
320 			if (vmm_ipinum != IPI_AST)
321 				vmm_ipi_free(vmm_ipinum);
322 			error = VMM_CLEANUP();
323 			/*
324 			 * Something bad happened - prevent new
325 			 * VMs from being created
326 			 */
327 			if (error)
328 				vmm_initialized = 0;
329 		}
330 		break;
331 	default:
332 		error = 0;
333 		break;
334 	}
335 	return (error);
336 }
337 
338 static moduledata_t vmm_kmod = {
339 	"vmm",
340 	vmm_handler,
341 	NULL
342 };
343 
344 /*
345  * vmm initialization has the following dependencies:
346  *
347  * - iommu initialization must happen after the pci passthru driver has had
348  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
349  *
350  * - VT-x initialization requires smp_rendezvous() and therefore must happen
351  *   after SMP is fully functional (after SI_SUB_SMP).
352  */
353 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
354 MODULE_VERSION(vmm, 1);
355 
356 static void
357 vm_init(struct vm *vm, bool create)
358 {
359 	int i;
360 
361 	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
362 	vm->iommu = NULL;
363 	vm->vioapic = vioapic_init(vm);
364 	vm->vhpet = vhpet_init(vm);
365 	vm->vatpic = vatpic_init(vm);
366 	vm->vatpit = vatpit_init(vm);
367 
368 	CPU_ZERO(&vm->active_cpus);
369 
370 	vm->suspend = 0;
371 	CPU_ZERO(&vm->suspended_cpus);
372 
373 	for (i = 0; i < VM_MAXCPU; i++)
374 		vcpu_init(vm, i, create);
375 }
376 
377 int
378 vm_create(const char *name, struct vm **retvm)
379 {
380 	struct vm *vm;
381 	struct vmspace *vmspace;
382 
383 	/*
384 	 * If vmm.ko could not be successfully initialized then don't attempt
385 	 * to create the virtual machine.
386 	 */
387 	if (!vmm_initialized)
388 		return (ENXIO);
389 
390 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
391 		return (EINVAL);
392 
393 	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
394 	if (vmspace == NULL)
395 		return (ENOMEM);
396 
397 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
398 	strcpy(vm->name, name);
399 	vm->num_mem_segs = 0;
400 	vm->vmspace = vmspace;
401 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
402 
403 	vm_init(vm, true);
404 
405 	*retvm = vm;
406 	return (0);
407 }
408 
409 static void
410 vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
411 {
412 
413 	if (seg->object != NULL)
414 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
415 
416 	bzero(seg, sizeof(*seg));
417 }
418 
419 static void
420 vm_cleanup(struct vm *vm, bool destroy)
421 {
422 	int i;
423 
424 	ppt_unassign_all(vm);
425 
426 	if (vm->iommu != NULL)
427 		iommu_destroy_domain(vm->iommu);
428 
429 	vatpit_cleanup(vm->vatpit);
430 	vhpet_cleanup(vm->vhpet);
431 	vatpic_cleanup(vm->vatpic);
432 	vioapic_cleanup(vm->vioapic);
433 
434 	for (i = 0; i < VM_MAXCPU; i++)
435 		vcpu_cleanup(vm, i, destroy);
436 
437 	VMCLEANUP(vm->cookie);
438 
439 	if (destroy) {
440 		for (i = 0; i < vm->num_mem_segs; i++)
441 			vm_free_mem_seg(vm, &vm->mem_segs[i]);
442 
443 		vm->num_mem_segs = 0;
444 
445 		VMSPACE_FREE(vm->vmspace);
446 		vm->vmspace = NULL;
447 	}
448 }
449 
450 void
451 vm_destroy(struct vm *vm)
452 {
453 	vm_cleanup(vm, true);
454 	free(vm, M_VM);
455 }
456 
457 int
458 vm_reinit(struct vm *vm)
459 {
460 	int error;
461 
462 	/*
463 	 * A virtual machine can be reset only if all vcpus are suspended.
464 	 */
465 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
466 		vm_cleanup(vm, false);
467 		vm_init(vm, false);
468 		error = 0;
469 	} else {
470 		error = EBUSY;
471 	}
472 
473 	return (error);
474 }
475 
476 const char *
477 vm_name(struct vm *vm)
478 {
479 	return (vm->name);
480 }
481 
482 int
483 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
484 {
485 	vm_object_t obj;
486 
487 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
488 		return (ENOMEM);
489 	else
490 		return (0);
491 }
492 
493 int
494 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
495 {
496 
497 	vmm_mmio_free(vm->vmspace, gpa, len);
498 	return (0);
499 }
500 
501 boolean_t
502 vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
503 {
504 	int i;
505 	vm_paddr_t gpabase, gpalimit;
506 
507 	for (i = 0; i < vm->num_mem_segs; i++) {
508 		gpabase = vm->mem_segs[i].gpa;
509 		gpalimit = gpabase + vm->mem_segs[i].len;
510 		if (gpa >= gpabase && gpa < gpalimit)
511 			return (TRUE);		/* 'gpa' is regular memory */
512 	}
513 
514 	if (ppt_is_mmio(vm, gpa))
515 		return (TRUE);			/* 'gpa' is pci passthru mmio */
516 
517 	return (FALSE);
518 }
519 
520 int
521 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
522 {
523 	int available, allocated;
524 	struct mem_seg *seg;
525 	vm_object_t object;
526 	vm_paddr_t g;
527 
528 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
529 		return (EINVAL);
530 
531 	available = allocated = 0;
532 	g = gpa;
533 	while (g < gpa + len) {
534 		if (vm_mem_allocated(vm, g))
535 			allocated++;
536 		else
537 			available++;
538 
539 		g += PAGE_SIZE;
540 	}
541 
542 	/*
543 	 * If there are some allocated and some available pages in the address
544 	 * range then it is an error.
545 	 */
546 	if (allocated && available)
547 		return (EINVAL);
548 
549 	/*
550 	 * If the entire address range being requested has already been
551 	 * allocated then there isn't anything more to do.
552 	 */
553 	if (allocated && available == 0)
554 		return (0);
555 
556 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
557 		return (E2BIG);
558 
559 	seg = &vm->mem_segs[vm->num_mem_segs];
560 
561 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
562 		return (ENOMEM);
563 
564 	seg->gpa = gpa;
565 	seg->len = len;
566 	seg->object = object;
567 	seg->wired = FALSE;
568 
569 	vm->num_mem_segs++;
570 
571 	return (0);
572 }
573 
574 static void
575 vm_gpa_unwire(struct vm *vm)
576 {
577 	int i, rv;
578 	struct mem_seg *seg;
579 
580 	for (i = 0; i < vm->num_mem_segs; i++) {
581 		seg = &vm->mem_segs[i];
582 		if (!seg->wired)
583 			continue;
584 
585 		rv = vm_map_unwire(&vm->vmspace->vm_map,
586 				   seg->gpa, seg->gpa + seg->len,
587 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
588 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
589 		    "%#lx/%ld could not be unwired: %d",
590 		    vm_name(vm), seg->gpa, seg->len, rv));
591 
592 		seg->wired = FALSE;
593 	}
594 }
595 
596 static int
597 vm_gpa_wire(struct vm *vm)
598 {
599 	int i, rv;
600 	struct mem_seg *seg;
601 
602 	for (i = 0; i < vm->num_mem_segs; i++) {
603 		seg = &vm->mem_segs[i];
604 		if (seg->wired)
605 			continue;
606 
607 		/* XXX rlimits? */
608 		rv = vm_map_wire(&vm->vmspace->vm_map,
609 				 seg->gpa, seg->gpa + seg->len,
610 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
611 		if (rv != KERN_SUCCESS)
612 			break;
613 
614 		seg->wired = TRUE;
615 	}
616 
617 	if (i < vm->num_mem_segs) {
618 		/*
619 		 * Undo the wiring before returning an error.
620 		 */
621 		vm_gpa_unwire(vm);
622 		return (EAGAIN);
623 	}
624 
625 	return (0);
626 }
627 
628 static void
629 vm_iommu_modify(struct vm *vm, boolean_t map)
630 {
631 	int i, sz;
632 	vm_paddr_t gpa, hpa;
633 	struct mem_seg *seg;
634 	void *vp, *cookie, *host_domain;
635 
636 	sz = PAGE_SIZE;
637 	host_domain = iommu_host_domain();
638 
639 	for (i = 0; i < vm->num_mem_segs; i++) {
640 		seg = &vm->mem_segs[i];
641 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
642 		    vm_name(vm), seg->gpa, seg->len));
643 
644 		gpa = seg->gpa;
645 		while (gpa < seg->gpa + seg->len) {
646 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
647 					 &cookie);
648 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
649 			    vm_name(vm), gpa));
650 
651 			vm_gpa_release(cookie);
652 
653 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
654 			if (map) {
655 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
656 				iommu_remove_mapping(host_domain, hpa, sz);
657 			} else {
658 				iommu_remove_mapping(vm->iommu, gpa, sz);
659 				iommu_create_mapping(host_domain, hpa, hpa, sz);
660 			}
661 
662 			gpa += PAGE_SIZE;
663 		}
664 	}
665 
666 	/*
667 	 * Invalidate the cached translations associated with the domain
668 	 * from which pages were removed.
669 	 */
670 	if (map)
671 		iommu_invalidate_tlb(host_domain);
672 	else
673 		iommu_invalidate_tlb(vm->iommu);
674 }
675 
676 #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
677 #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
678 
679 int
680 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
681 {
682 	int error;
683 
684 	error = ppt_unassign_device(vm, bus, slot, func);
685 	if (error)
686 		return (error);
687 
688 	if (ppt_assigned_devices(vm) == 0) {
689 		vm_iommu_unmap(vm);
690 		vm_gpa_unwire(vm);
691 	}
692 	return (0);
693 }
694 
695 int
696 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
697 {
698 	int error;
699 	vm_paddr_t maxaddr;
700 
701 	/*
702 	 * Virtual machines with pci passthru devices get special treatment:
703 	 * - the guest physical memory is wired
704 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
705 	 *
706 	 * We need to do this before the first pci passthru device is attached.
707 	 */
708 	if (ppt_assigned_devices(vm) == 0) {
709 		KASSERT(vm->iommu == NULL,
710 		    ("vm_assign_pptdev: iommu must be NULL"));
711 		maxaddr = vmm_mem_maxaddr();
712 		vm->iommu = iommu_create_domain(maxaddr);
713 
714 		error = vm_gpa_wire(vm);
715 		if (error)
716 			return (error);
717 
718 		vm_iommu_map(vm);
719 	}
720 
721 	error = ppt_assign_device(vm, bus, slot, func);
722 	return (error);
723 }
724 
725 void *
726 vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
727 	    void **cookie)
728 {
729 	int count, pageoff;
730 	vm_page_t m;
731 
732 	pageoff = gpa & PAGE_MASK;
733 	if (len > PAGE_SIZE - pageoff)
734 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
735 
736 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
737 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
738 
739 	if (count == 1) {
740 		*cookie = m;
741 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
742 	} else {
743 		*cookie = NULL;
744 		return (NULL);
745 	}
746 }
747 
748 void
749 vm_gpa_release(void *cookie)
750 {
751 	vm_page_t m = cookie;
752 
753 	vm_page_lock(m);
754 	vm_page_unhold(m);
755 	vm_page_unlock(m);
756 }
757 
758 int
759 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
760 		  struct vm_memory_segment *seg)
761 {
762 	int i;
763 
764 	for (i = 0; i < vm->num_mem_segs; i++) {
765 		if (gpabase == vm->mem_segs[i].gpa) {
766 			seg->gpa = vm->mem_segs[i].gpa;
767 			seg->len = vm->mem_segs[i].len;
768 			seg->wired = vm->mem_segs[i].wired;
769 			return (0);
770 		}
771 	}
772 	return (-1);
773 }
774 
775 int
776 vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
777 	      vm_offset_t *offset, struct vm_object **object)
778 {
779 	int i;
780 	size_t seg_len;
781 	vm_paddr_t seg_gpa;
782 	vm_object_t seg_obj;
783 
784 	for (i = 0; i < vm->num_mem_segs; i++) {
785 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
786 			continue;
787 
788 		seg_gpa = vm->mem_segs[i].gpa;
789 		seg_len = vm->mem_segs[i].len;
790 
791 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
792 			*offset = gpa - seg_gpa;
793 			*object = seg_obj;
794 			vm_object_reference(seg_obj);
795 			return (0);
796 		}
797 	}
798 
799 	return (EINVAL);
800 }
801 
802 int
803 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
804 {
805 
806 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
807 		return (EINVAL);
808 
809 	if (reg >= VM_REG_LAST)
810 		return (EINVAL);
811 
812 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
813 }
814 
815 int
816 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
817 {
818 
819 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
820 		return (EINVAL);
821 
822 	if (reg >= VM_REG_LAST)
823 		return (EINVAL);
824 
825 	return (VMSETREG(vm->cookie, vcpu, reg, val));
826 }
827 
828 static boolean_t
829 is_descriptor_table(int reg)
830 {
831 
832 	switch (reg) {
833 	case VM_REG_GUEST_IDTR:
834 	case VM_REG_GUEST_GDTR:
835 		return (TRUE);
836 	default:
837 		return (FALSE);
838 	}
839 }
840 
841 static boolean_t
842 is_segment_register(int reg)
843 {
844 
845 	switch (reg) {
846 	case VM_REG_GUEST_ES:
847 	case VM_REG_GUEST_CS:
848 	case VM_REG_GUEST_SS:
849 	case VM_REG_GUEST_DS:
850 	case VM_REG_GUEST_FS:
851 	case VM_REG_GUEST_GS:
852 	case VM_REG_GUEST_TR:
853 	case VM_REG_GUEST_LDTR:
854 		return (TRUE);
855 	default:
856 		return (FALSE);
857 	}
858 }
859 
860 int
861 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
862 		struct seg_desc *desc)
863 {
864 
865 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
866 		return (EINVAL);
867 
868 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
869 		return (EINVAL);
870 
871 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
872 }
873 
874 int
875 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
876 		struct seg_desc *desc)
877 {
878 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
879 		return (EINVAL);
880 
881 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
882 		return (EINVAL);
883 
884 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
885 }
886 
887 static void
888 restore_guest_fpustate(struct vcpu *vcpu)
889 {
890 
891 	/* flush host state to the pcb */
892 	fpuexit(curthread);
893 
894 	/* restore guest FPU state */
895 	fpu_stop_emulating();
896 	fpurestore(vcpu->guestfpu);
897 
898 	/* restore guest XCR0 if XSAVE is enabled in the host */
899 	if (rcr4() & CR4_XSAVE)
900 		load_xcr(0, vcpu->guest_xcr0);
901 
902 	/*
903 	 * The FPU is now "dirty" with the guest's state so turn on emulation
904 	 * to trap any access to the FPU by the host.
905 	 */
906 	fpu_start_emulating();
907 }
908 
909 static void
910 save_guest_fpustate(struct vcpu *vcpu)
911 {
912 
913 	if ((rcr0() & CR0_TS) == 0)
914 		panic("fpu emulation not enabled in host!");
915 
916 	/* save guest XCR0 and restore host XCR0 */
917 	if (rcr4() & CR4_XSAVE) {
918 		vcpu->guest_xcr0 = rxcr(0);
919 		load_xcr(0, vmm_get_host_xcr0());
920 	}
921 
922 	/* save guest FPU state */
923 	fpu_stop_emulating();
924 	fpusave(vcpu->guestfpu);
925 	fpu_start_emulating();
926 }
927 
928 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
929 
930 static int
931 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
932     bool from_idle)
933 {
934 	int error;
935 
936 	vcpu_assert_locked(vcpu);
937 
938 	/*
939 	 * State transitions from the vmmdev_ioctl() must always begin from
940 	 * the VCPU_IDLE state. This guarantees that there is only a single
941 	 * ioctl() operating on a vcpu at any point.
942 	 */
943 	if (from_idle) {
944 		while (vcpu->state != VCPU_IDLE)
945 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
946 	} else {
947 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
948 		    "vcpu idle state"));
949 	}
950 
951 	if (vcpu->state == VCPU_RUNNING) {
952 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
953 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
954 	} else {
955 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
956 		    "vcpu that is not running", vcpu->hostcpu));
957 	}
958 
959 	/*
960 	 * The following state transitions are allowed:
961 	 * IDLE -> FROZEN -> IDLE
962 	 * FROZEN -> RUNNING -> FROZEN
963 	 * FROZEN -> SLEEPING -> FROZEN
964 	 */
965 	switch (vcpu->state) {
966 	case VCPU_IDLE:
967 	case VCPU_RUNNING:
968 	case VCPU_SLEEPING:
969 		error = (newstate != VCPU_FROZEN);
970 		break;
971 	case VCPU_FROZEN:
972 		error = (newstate == VCPU_FROZEN);
973 		break;
974 	default:
975 		error = 1;
976 		break;
977 	}
978 
979 	if (error)
980 		return (EBUSY);
981 
982 	vcpu->state = newstate;
983 	if (newstate == VCPU_RUNNING)
984 		vcpu->hostcpu = curcpu;
985 	else
986 		vcpu->hostcpu = NOCPU;
987 
988 	if (newstate == VCPU_IDLE)
989 		wakeup(&vcpu->state);
990 
991 	return (0);
992 }
993 
994 static void
995 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
996 {
997 	int error;
998 
999 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
1000 		panic("Error %d setting state to %d\n", error, newstate);
1001 }
1002 
1003 static void
1004 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1005 {
1006 	int error;
1007 
1008 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1009 		panic("Error %d setting state to %d", error, newstate);
1010 }
1011 
1012 static void
1013 vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
1014 {
1015 
1016 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
1017 
1018 	/*
1019 	 * Update 'rendezvous_func' and execute a write memory barrier to
1020 	 * ensure that it is visible across all host cpus. This is not needed
1021 	 * for correctness but it does ensure that all the vcpus will notice
1022 	 * that the rendezvous is requested immediately.
1023 	 */
1024 	vm->rendezvous_func = func;
1025 	wmb();
1026 }
1027 
1028 #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
1029 	do {								\
1030 		if (vcpuid >= 0)					\
1031 			VCPU_CTR0(vm, vcpuid, fmt);			\
1032 		else							\
1033 			VM_CTR0(vm, fmt);				\
1034 	} while (0)
1035 
1036 static void
1037 vm_handle_rendezvous(struct vm *vm, int vcpuid)
1038 {
1039 
1040 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
1041 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
1042 
1043 	mtx_lock(&vm->rendezvous_mtx);
1044 	while (vm->rendezvous_func != NULL) {
1045 		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
1046 		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
1047 
1048 		if (vcpuid != -1 &&
1049 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
1050 		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
1051 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
1052 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
1053 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
1054 		}
1055 		if (CPU_CMP(&vm->rendezvous_req_cpus,
1056 		    &vm->rendezvous_done_cpus) == 0) {
1057 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
1058 			vm_set_rendezvous_func(vm, NULL);
1059 			wakeup(&vm->rendezvous_func);
1060 			break;
1061 		}
1062 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
1063 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
1064 		    "vmrndv", 0);
1065 	}
1066 	mtx_unlock(&vm->rendezvous_mtx);
1067 }
1068 
1069 /*
1070  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1071  */
1072 static int
1073 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1074 {
1075 	struct vcpu *vcpu;
1076 	const char *wmesg;
1077 	int t, vcpu_halted, vm_halted;
1078 
1079 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1080 
1081 	vcpu = &vm->vcpu[vcpuid];
1082 	vcpu_halted = 0;
1083 	vm_halted = 0;
1084 
1085 	vcpu_lock(vcpu);
1086 	while (1) {
1087 		/*
1088 		 * Do a final check for pending NMI or interrupts before
1089 		 * really putting this thread to sleep. Also check for
1090 		 * software events that would cause this vcpu to wakeup.
1091 		 *
1092 		 * These interrupts/events could have happened after the
1093 		 * vcpu returned from VMRUN() and before it acquired the
1094 		 * vcpu lock above.
1095 		 */
1096 		if (vm->rendezvous_func != NULL || vm->suspend)
1097 			break;
1098 		if (vm_nmi_pending(vm, vcpuid))
1099 			break;
1100 		if (!intr_disabled) {
1101 			if (vm_extint_pending(vm, vcpuid) ||
1102 			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1103 				break;
1104 			}
1105 		}
1106 
1107 		/*
1108 		 * Some Linux guests implement "halt" by having all vcpus
1109 		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
1110 		 * track of the vcpus that have entered this state. When all
1111 		 * vcpus enter the halted state the virtual machine is halted.
1112 		 */
1113 		if (intr_disabled) {
1114 			wmesg = "vmhalt";
1115 			VCPU_CTR0(vm, vcpuid, "Halted");
1116 			if (!vcpu_halted && halt_detection_enabled) {
1117 				vcpu_halted = 1;
1118 				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1119 			}
1120 			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1121 				vm_halted = 1;
1122 				break;
1123 			}
1124 		} else {
1125 			wmesg = "vmidle";
1126 		}
1127 
1128 		t = ticks;
1129 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1130 		msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
1131 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1132 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1133 	}
1134 
1135 	if (vcpu_halted)
1136 		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1137 
1138 	vcpu_unlock(vcpu);
1139 
1140 	if (vm_halted)
1141 		vm_suspend(vm, VM_SUSPEND_HALT);
1142 
1143 	return (0);
1144 }
1145 
1146 static int
1147 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1148 {
1149 	int rv, ftype;
1150 	struct vm_map *map;
1151 	struct vcpu *vcpu;
1152 	struct vm_exit *vme;
1153 
1154 	vcpu = &vm->vcpu[vcpuid];
1155 	vme = &vcpu->exitinfo;
1156 
1157 	ftype = vme->u.paging.fault_type;
1158 	KASSERT(ftype == VM_PROT_READ ||
1159 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1160 	    ("vm_handle_paging: invalid fault_type %d", ftype));
1161 
1162 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1163 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1164 		    vme->u.paging.gpa, ftype);
1165 		if (rv == 0)
1166 			goto done;
1167 	}
1168 
1169 	map = &vm->vmspace->vm_map;
1170 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1171 
1172 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1173 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1174 
1175 	if (rv != KERN_SUCCESS)
1176 		return (EFAULT);
1177 done:
1178 	/* restart execution at the faulting instruction */
1179 	vme->inst_length = 0;
1180 
1181 	return (0);
1182 }
1183 
1184 static int
1185 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1186 {
1187 	struct vie *vie;
1188 	struct vcpu *vcpu;
1189 	struct vm_exit *vme;
1190 	uint64_t gla, gpa;
1191 	struct vm_guest_paging *paging;
1192 	mem_region_read_t mread;
1193 	mem_region_write_t mwrite;
1194 	int error;
1195 
1196 	vcpu = &vm->vcpu[vcpuid];
1197 	vme = &vcpu->exitinfo;
1198 
1199 	gla = vme->u.inst_emul.gla;
1200 	gpa = vme->u.inst_emul.gpa;
1201 	vie = &vme->u.inst_emul.vie;
1202 	paging = &vme->u.inst_emul.paging;
1203 
1204 	vie_init(vie);
1205 
1206 	/* Fetch, decode and emulate the faulting instruction */
1207 	error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
1208 	    vme->inst_length, vie);
1209 	if (error == 1)
1210 		return (0);		/* Resume guest to handle page fault */
1211 	else if (error == -1)
1212 		return (EFAULT);
1213 	else if (error != 0)
1214 		panic("%s: vmm_fetch_instruction error %d", __func__, error);
1215 
1216 	if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
1217 		return (EFAULT);
1218 
1219 	/* return to userland unless this is an in-kernel emulated device */
1220 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1221 		mread = lapic_mmio_read;
1222 		mwrite = lapic_mmio_write;
1223 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1224 		mread = vioapic_mmio_read;
1225 		mwrite = vioapic_mmio_write;
1226 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1227 		mread = vhpet_mmio_read;
1228 		mwrite = vhpet_mmio_write;
1229 	} else {
1230 		*retu = true;
1231 		return (0);
1232 	}
1233 
1234 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1235 	    retu);
1236 
1237 	return (error);
1238 }
1239 
1240 static int
1241 vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1242 {
1243 	int i, done;
1244 	struct vcpu *vcpu;
1245 
1246 	done = 0;
1247 	vcpu = &vm->vcpu[vcpuid];
1248 
1249 	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1250 
1251 	/*
1252 	 * Wait until all 'active_cpus' have suspended themselves.
1253 	 *
1254 	 * Since a VM may be suspended at any time including when one or
1255 	 * more vcpus are doing a rendezvous we need to call the rendezvous
1256 	 * handler while we are waiting to prevent a deadlock.
1257 	 */
1258 	vcpu_lock(vcpu);
1259 	while (1) {
1260 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1261 			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1262 			break;
1263 		}
1264 
1265 		if (vm->rendezvous_func == NULL) {
1266 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1267 			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1268 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1269 			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1270 		} else {
1271 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1272 			vcpu_unlock(vcpu);
1273 			vm_handle_rendezvous(vm, vcpuid);
1274 			vcpu_lock(vcpu);
1275 		}
1276 	}
1277 	vcpu_unlock(vcpu);
1278 
1279 	/*
1280 	 * Wakeup the other sleeping vcpus and return to userspace.
1281 	 */
1282 	for (i = 0; i < VM_MAXCPU; i++) {
1283 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1284 			vcpu_notify_event(vm, i, false);
1285 		}
1286 	}
1287 
1288 	*retu = true;
1289 	return (0);
1290 }
1291 
1292 int
1293 vm_suspend(struct vm *vm, enum vm_suspend_how how)
1294 {
1295 	int i;
1296 
1297 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1298 		return (EINVAL);
1299 
1300 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1301 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1302 		    vm->suspend, how);
1303 		return (EALREADY);
1304 	}
1305 
1306 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1307 
1308 	/*
1309 	 * Notify all active vcpus that they are now suspended.
1310 	 */
1311 	for (i = 0; i < VM_MAXCPU; i++) {
1312 		if (CPU_ISSET(i, &vm->active_cpus))
1313 			vcpu_notify_event(vm, i, false);
1314 	}
1315 
1316 	return (0);
1317 }
1318 
1319 void
1320 vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1321 {
1322 	struct vm_exit *vmexit;
1323 
1324 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1325 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1326 
1327 	vmexit = vm_exitinfo(vm, vcpuid);
1328 	vmexit->rip = rip;
1329 	vmexit->inst_length = 0;
1330 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1331 	vmexit->u.suspended.how = vm->suspend;
1332 }
1333 
1334 void
1335 vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
1336 {
1337 	struct vm_exit *vmexit;
1338 
1339 	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
1340 
1341 	vmexit = vm_exitinfo(vm, vcpuid);
1342 	vmexit->rip = rip;
1343 	vmexit->inst_length = 0;
1344 	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
1345 	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
1346 }
1347 
1348 void
1349 vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
1350 {
1351 	struct vm_exit *vmexit;
1352 
1353 	vmexit = vm_exitinfo(vm, vcpuid);
1354 	vmexit->rip = rip;
1355 	vmexit->inst_length = 0;
1356 	vmexit->exitcode = VM_EXITCODE_BOGUS;
1357 	vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
1358 }
1359 
1360 int
1361 vm_run(struct vm *vm, struct vm_run *vmrun)
1362 {
1363 	int error, vcpuid;
1364 	struct vcpu *vcpu;
1365 	struct pcb *pcb;
1366 	uint64_t tscval, rip;
1367 	struct vm_exit *vme;
1368 	bool retu, intr_disabled;
1369 	pmap_t pmap;
1370 	void *rptr, *sptr;
1371 
1372 	vcpuid = vmrun->cpuid;
1373 
1374 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1375 		return (EINVAL);
1376 
1377 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1378 		return (EINVAL);
1379 
1380 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1381 		return (EINVAL);
1382 
1383 	rptr = &vm->rendezvous_func;
1384 	sptr = &vm->suspend;
1385 	pmap = vmspace_pmap(vm->vmspace);
1386 	vcpu = &vm->vcpu[vcpuid];
1387 	vme = &vcpu->exitinfo;
1388 	rip = vmrun->rip;
1389 restart:
1390 	critical_enter();
1391 
1392 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1393 	    ("vm_run: absurd pm_active"));
1394 
1395 	tscval = rdtsc();
1396 
1397 	pcb = PCPU_GET(curpcb);
1398 	set_pcb_flags(pcb, PCB_FULL_IRET);
1399 
1400 	restore_guest_msrs(vm, vcpuid);
1401 	restore_guest_fpustate(vcpu);
1402 
1403 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1404 	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1405 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1406 
1407 	save_guest_fpustate(vcpu);
1408 	restore_host_msrs(vm, vcpuid);
1409 
1410 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1411 
1412 	critical_exit();
1413 
1414 	if (error == 0) {
1415 		retu = false;
1416 		switch (vme->exitcode) {
1417 		case VM_EXITCODE_SUSPENDED:
1418 			error = vm_handle_suspend(vm, vcpuid, &retu);
1419 			break;
1420 		case VM_EXITCODE_IOAPIC_EOI:
1421 			vioapic_process_eoi(vm, vcpuid,
1422 			    vme->u.ioapic_eoi.vector);
1423 			break;
1424 		case VM_EXITCODE_RENDEZVOUS:
1425 			vm_handle_rendezvous(vm, vcpuid);
1426 			error = 0;
1427 			break;
1428 		case VM_EXITCODE_HLT:
1429 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1430 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1431 			break;
1432 		case VM_EXITCODE_PAGING:
1433 			error = vm_handle_paging(vm, vcpuid, &retu);
1434 			break;
1435 		case VM_EXITCODE_INST_EMUL:
1436 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1437 			break;
1438 		case VM_EXITCODE_INOUT:
1439 		case VM_EXITCODE_INOUT_STR:
1440 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
1441 			break;
1442 		default:
1443 			retu = true;	/* handled in userland */
1444 			break;
1445 		}
1446 	}
1447 
1448 	if (error == 0 && retu == false) {
1449 		rip = vme->rip + vme->inst_length;
1450 		goto restart;
1451 	}
1452 
1453 	/* copy the exit information */
1454 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1455 	return (error);
1456 }
1457 
1458 int
1459 vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1460 {
1461 	struct vcpu *vcpu;
1462 
1463 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1464 		return (EINVAL);
1465 
1466 	if (exception->vector < 0 || exception->vector >= 32)
1467 		return (EINVAL);
1468 
1469 	vcpu = &vm->vcpu[vcpuid];
1470 
1471 	if (vcpu->exception_pending) {
1472 		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1473 		    "pending exception %d", exception->vector,
1474 		    vcpu->exception.vector);
1475 		return (EBUSY);
1476 	}
1477 
1478 	vcpu->exception_pending = 1;
1479 	vcpu->exception = *exception;
1480 	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1481 	return (0);
1482 }
1483 
1484 int
1485 vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
1486 {
1487 	struct vcpu *vcpu;
1488 	int pending;
1489 
1490 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1491 
1492 	vcpu = &vm->vcpu[vcpuid];
1493 	pending = vcpu->exception_pending;
1494 	if (pending) {
1495 		vcpu->exception_pending = 0;
1496 		*exception = vcpu->exception;
1497 		VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
1498 		    exception->vector);
1499 	}
1500 	return (pending);
1501 }
1502 
1503 static void
1504 vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
1505 {
1506 	struct vm_exit *vmexit;
1507 	int error;
1508 
1509 	error = vm_inject_exception(vm, vcpuid, exception);
1510 	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1511 
1512 	/*
1513 	 * A fault-like exception allows the instruction to be restarted
1514 	 * after the exception handler returns.
1515 	 *
1516 	 * By setting the inst_length to 0 we ensure that the instruction
1517 	 * pointer remains at the faulting instruction.
1518 	 */
1519 	vmexit = vm_exitinfo(vm, vcpuid);
1520 	vmexit->inst_length = 0;
1521 }
1522 
1523 void
1524 vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
1525 {
1526 	struct vm_exception pf = {
1527 		.vector = IDT_PF,
1528 		.error_code_valid = 1,
1529 		.error_code = error_code
1530 	};
1531 	int error;
1532 
1533 	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
1534 	    error_code, cr2);
1535 
1536 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
1537 	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
1538 
1539 	vm_inject_fault(vm, vcpuid, &pf);
1540 }
1541 
1542 void
1543 vm_inject_gp(struct vm *vm, int vcpuid)
1544 {
1545 	struct vm_exception gpf = {
1546 		.vector = IDT_GP,
1547 		.error_code_valid = 1,
1548 		.error_code = 0
1549 	};
1550 
1551 	vm_inject_fault(vm, vcpuid, &gpf);
1552 }
1553 
1554 void
1555 vm_inject_ud(struct vm *vm, int vcpuid)
1556 {
1557 	struct vm_exception udf = {
1558 		.vector = IDT_UD,
1559 		.error_code_valid = 0
1560 	};
1561 
1562 	vm_inject_fault(vm, vcpuid, &udf);
1563 }
1564 
1565 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1566 
1567 int
1568 vm_inject_nmi(struct vm *vm, int vcpuid)
1569 {
1570 	struct vcpu *vcpu;
1571 
1572 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1573 		return (EINVAL);
1574 
1575 	vcpu = &vm->vcpu[vcpuid];
1576 
1577 	vcpu->nmi_pending = 1;
1578 	vcpu_notify_event(vm, vcpuid, false);
1579 	return (0);
1580 }
1581 
1582 int
1583 vm_nmi_pending(struct vm *vm, int vcpuid)
1584 {
1585 	struct vcpu *vcpu;
1586 
1587 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1588 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1589 
1590 	vcpu = &vm->vcpu[vcpuid];
1591 
1592 	return (vcpu->nmi_pending);
1593 }
1594 
1595 void
1596 vm_nmi_clear(struct vm *vm, int vcpuid)
1597 {
1598 	struct vcpu *vcpu;
1599 
1600 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1601 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1602 
1603 	vcpu = &vm->vcpu[vcpuid];
1604 
1605 	if (vcpu->nmi_pending == 0)
1606 		panic("vm_nmi_clear: inconsistent nmi_pending state");
1607 
1608 	vcpu->nmi_pending = 0;
1609 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1610 }
1611 
1612 static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
1613 
1614 int
1615 vm_inject_extint(struct vm *vm, int vcpuid)
1616 {
1617 	struct vcpu *vcpu;
1618 
1619 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1620 		return (EINVAL);
1621 
1622 	vcpu = &vm->vcpu[vcpuid];
1623 
1624 	vcpu->extint_pending = 1;
1625 	vcpu_notify_event(vm, vcpuid, false);
1626 	return (0);
1627 }
1628 
1629 int
1630 vm_extint_pending(struct vm *vm, int vcpuid)
1631 {
1632 	struct vcpu *vcpu;
1633 
1634 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1635 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1636 
1637 	vcpu = &vm->vcpu[vcpuid];
1638 
1639 	return (vcpu->extint_pending);
1640 }
1641 
1642 void
1643 vm_extint_clear(struct vm *vm, int vcpuid)
1644 {
1645 	struct vcpu *vcpu;
1646 
1647 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1648 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1649 
1650 	vcpu = &vm->vcpu[vcpuid];
1651 
1652 	if (vcpu->extint_pending == 0)
1653 		panic("vm_extint_clear: inconsistent extint_pending state");
1654 
1655 	vcpu->extint_pending = 0;
1656 	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
1657 }
1658 
1659 int
1660 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1661 {
1662 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1663 		return (EINVAL);
1664 
1665 	if (type < 0 || type >= VM_CAP_MAX)
1666 		return (EINVAL);
1667 
1668 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1669 }
1670 
1671 int
1672 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
1673 {
1674 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1675 		return (EINVAL);
1676 
1677 	if (type < 0 || type >= VM_CAP_MAX)
1678 		return (EINVAL);
1679 
1680 	return (VMSETCAP(vm->cookie, vcpu, type, val));
1681 }
1682 
1683 uint64_t *
1684 vm_guest_msrs(struct vm *vm, int cpu)
1685 {
1686 	return (vm->vcpu[cpu].guest_msrs);
1687 }
1688 
1689 struct vlapic *
1690 vm_lapic(struct vm *vm, int cpu)
1691 {
1692 	return (vm->vcpu[cpu].vlapic);
1693 }
1694 
1695 struct vioapic *
1696 vm_ioapic(struct vm *vm)
1697 {
1698 
1699 	return (vm->vioapic);
1700 }
1701 
1702 struct vhpet *
1703 vm_hpet(struct vm *vm)
1704 {
1705 
1706 	return (vm->vhpet);
1707 }
1708 
1709 boolean_t
1710 vmm_is_pptdev(int bus, int slot, int func)
1711 {
1712 	int found, i, n;
1713 	int b, s, f;
1714 	char *val, *cp, *cp2;
1715 
1716 	/*
1717 	 * XXX
1718 	 * The length of an environment variable is limited to 128 bytes which
1719 	 * puts an upper limit on the number of passthru devices that may be
1720 	 * specified using a single environment variable.
1721 	 *
1722 	 * Work around this by scanning multiple environment variable
1723 	 * names instead of a single one - yuck!
1724 	 */
1725 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1726 
1727 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1728 	found = 0;
1729 	for (i = 0; names[i] != NULL && !found; i++) {
1730 		cp = val = getenv(names[i]);
1731 		while (cp != NULL && *cp != '\0') {
1732 			if ((cp2 = strchr(cp, ' ')) != NULL)
1733 				*cp2 = '\0';
1734 
1735 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1736 			if (n == 3 && bus == b && slot == s && func == f) {
1737 				found = 1;
1738 				break;
1739 			}
1740 
1741 			if (cp2 != NULL)
1742 				*cp2++ = ' ';
1743 
1744 			cp = cp2;
1745 		}
1746 		freeenv(val);
1747 	}
1748 	return (found);
1749 }
1750 
1751 void *
1752 vm_iommu_domain(struct vm *vm)
1753 {
1754 
1755 	return (vm->iommu);
1756 }
1757 
1758 int
1759 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1760     bool from_idle)
1761 {
1762 	int error;
1763 	struct vcpu *vcpu;
1764 
1765 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1766 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1767 
1768 	vcpu = &vm->vcpu[vcpuid];
1769 
1770 	vcpu_lock(vcpu);
1771 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1772 	vcpu_unlock(vcpu);
1773 
1774 	return (error);
1775 }
1776 
1777 enum vcpu_state
1778 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1779 {
1780 	struct vcpu *vcpu;
1781 	enum vcpu_state state;
1782 
1783 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1784 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1785 
1786 	vcpu = &vm->vcpu[vcpuid];
1787 
1788 	vcpu_lock(vcpu);
1789 	state = vcpu->state;
1790 	if (hostcpu != NULL)
1791 		*hostcpu = vcpu->hostcpu;
1792 	vcpu_unlock(vcpu);
1793 
1794 	return (state);
1795 }
1796 
1797 int
1798 vm_activate_cpu(struct vm *vm, int vcpuid)
1799 {
1800 
1801 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1802 		return (EINVAL);
1803 
1804 	if (CPU_ISSET(vcpuid, &vm->active_cpus))
1805 		return (EBUSY);
1806 
1807 	VCPU_CTR0(vm, vcpuid, "activated");
1808 	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
1809 	return (0);
1810 }
1811 
1812 cpuset_t
1813 vm_active_cpus(struct vm *vm)
1814 {
1815 
1816 	return (vm->active_cpus);
1817 }
1818 
1819 cpuset_t
1820 vm_suspended_cpus(struct vm *vm)
1821 {
1822 
1823 	return (vm->suspended_cpus);
1824 }
1825 
1826 void *
1827 vcpu_stats(struct vm *vm, int vcpuid)
1828 {
1829 
1830 	return (vm->vcpu[vcpuid].stats);
1831 }
1832 
1833 int
1834 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1835 {
1836 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1837 		return (EINVAL);
1838 
1839 	*state = vm->vcpu[vcpuid].x2apic_state;
1840 
1841 	return (0);
1842 }
1843 
1844 int
1845 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1846 {
1847 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1848 		return (EINVAL);
1849 
1850 	if (state >= X2APIC_STATE_LAST)
1851 		return (EINVAL);
1852 
1853 	vm->vcpu[vcpuid].x2apic_state = state;
1854 
1855 	vlapic_set_x2apic_state(vm, vcpuid, state);
1856 
1857 	return (0);
1858 }
1859 
1860 /*
1861  * This function is called to ensure that a vcpu "sees" a pending event
1862  * as soon as possible:
1863  * - If the vcpu thread is sleeping then it is woken up.
1864  * - If the vcpu is running on a different host_cpu then an IPI will be directed
1865  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
1866  */
1867 void
1868 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
1869 {
1870 	int hostcpu;
1871 	struct vcpu *vcpu;
1872 
1873 	vcpu = &vm->vcpu[vcpuid];
1874 
1875 	vcpu_lock(vcpu);
1876 	hostcpu = vcpu->hostcpu;
1877 	if (vcpu->state == VCPU_RUNNING) {
1878 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1879 		if (hostcpu != curcpu) {
1880 			if (lapic_intr) {
1881 				vlapic_post_intr(vcpu->vlapic, hostcpu,
1882 				    vmm_ipinum);
1883 			} else {
1884 				ipi_cpu(hostcpu, vmm_ipinum);
1885 			}
1886 		} else {
1887 			/*
1888 			 * If the 'vcpu' is running on 'curcpu' then it must
1889 			 * be sending a notification to itself (e.g. SELF_IPI).
1890 			 * The pending event will be picked up when the vcpu
1891 			 * transitions back to guest context.
1892 			 */
1893 		}
1894 	} else {
1895 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1896 		    "with hostcpu %d", vcpu->state, hostcpu));
1897 		if (vcpu->state == VCPU_SLEEPING)
1898 			wakeup_one(vcpu);
1899 	}
1900 	vcpu_unlock(vcpu);
1901 }
1902 
1903 struct vmspace *
1904 vm_get_vmspace(struct vm *vm)
1905 {
1906 
1907 	return (vm->vmspace);
1908 }
1909 
1910 int
1911 vm_apicid2vcpuid(struct vm *vm, int apicid)
1912 {
1913 	/*
1914 	 * XXX apic id is assumed to be numerically identical to vcpu id
1915 	 */
1916 	return (apicid);
1917 }
1918 
1919 void
1920 vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
1921     vm_rendezvous_func_t func, void *arg)
1922 {
1923 	int i;
1924 
1925 	/*
1926 	 * Enforce that this function is called without any locks
1927 	 */
1928 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
1929 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
1930 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
1931 
1932 restart:
1933 	mtx_lock(&vm->rendezvous_mtx);
1934 	if (vm->rendezvous_func != NULL) {
1935 		/*
1936 		 * If a rendezvous is already in progress then we need to
1937 		 * call the rendezvous handler in case this 'vcpuid' is one
1938 		 * of the targets of the rendezvous.
1939 		 */
1940 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
1941 		mtx_unlock(&vm->rendezvous_mtx);
1942 		vm_handle_rendezvous(vm, vcpuid);
1943 		goto restart;
1944 	}
1945 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
1946 	    "rendezvous is still in progress"));
1947 
1948 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
1949 	vm->rendezvous_req_cpus = dest;
1950 	CPU_ZERO(&vm->rendezvous_done_cpus);
1951 	vm->rendezvous_arg = arg;
1952 	vm_set_rendezvous_func(vm, func);
1953 	mtx_unlock(&vm->rendezvous_mtx);
1954 
1955 	/*
1956 	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
1957 	 * vcpus so they handle the rendezvous as soon as possible.
1958 	 */
1959 	for (i = 0; i < VM_MAXCPU; i++) {
1960 		if (CPU_ISSET(i, &dest))
1961 			vcpu_notify_event(vm, i, false);
1962 	}
1963 
1964 	vm_handle_rendezvous(vm, vcpuid);
1965 }
1966 
1967 struct vatpic *
1968 vm_atpic(struct vm *vm)
1969 {
1970 	return (vm->vatpic);
1971 }
1972 
1973 struct vatpit *
1974 vm_atpit(struct vm *vm)
1975 {
1976 	return (vm->vatpit);
1977 }
1978 
1979 enum vm_reg_name
1980 vm_segment_name(int seg)
1981 {
1982 	static enum vm_reg_name seg_names[] = {
1983 		VM_REG_GUEST_ES,
1984 		VM_REG_GUEST_CS,
1985 		VM_REG_GUEST_SS,
1986 		VM_REG_GUEST_DS,
1987 		VM_REG_GUEST_FS,
1988 		VM_REG_GUEST_GS
1989 	};
1990 
1991 	KASSERT(seg >= 0 && seg < nitems(seg_names),
1992 	    ("%s: invalid segment encoding %d", __func__, seg));
1993 	return (seg_names[seg]);
1994 }
1995 
1996 
1997 /*
1998  * Return the amount of in-use and wired memory for the VM. Since
1999  * these are global stats, only return the values with for vCPU 0
2000  */
2001 VMM_STAT_DECLARE(VMM_MEM_RESIDENT);
2002 VMM_STAT_DECLARE(VMM_MEM_WIRED);
2003 
2004 static void
2005 vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2006 {
2007 
2008 	if (vcpu == 0) {
2009 		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
2010 	       	    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
2011 	}
2012 }
2013 
2014 static void
2015 vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2016 {
2017 
2018 	if (vcpu == 0) {
2019 		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
2020 	      	    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
2021 	}
2022 }
2023 
2024 VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
2025 VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
2026