xref: /freebsd/sys/amd64/include/vmm.h (revision b1f9167f94059fd55c630891d359bcff987bd7eb)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #ifndef _VMM_H_
30 #define	_VMM_H_
31 
32 enum vm_suspend_how {
33 	VM_SUSPEND_NONE,
34 	VM_SUSPEND_RESET,
35 	VM_SUSPEND_POWEROFF,
36 	VM_SUSPEND_HALT,
37 	VM_SUSPEND_LAST
38 };
39 
40 /*
41  * Identifiers for architecturally defined registers.
42  */
43 enum vm_reg_name {
44 	VM_REG_GUEST_RAX,
45 	VM_REG_GUEST_RBX,
46 	VM_REG_GUEST_RCX,
47 	VM_REG_GUEST_RDX,
48 	VM_REG_GUEST_RSI,
49 	VM_REG_GUEST_RDI,
50 	VM_REG_GUEST_RBP,
51 	VM_REG_GUEST_R8,
52 	VM_REG_GUEST_R9,
53 	VM_REG_GUEST_R10,
54 	VM_REG_GUEST_R11,
55 	VM_REG_GUEST_R12,
56 	VM_REG_GUEST_R13,
57 	VM_REG_GUEST_R14,
58 	VM_REG_GUEST_R15,
59 	VM_REG_GUEST_CR0,
60 	VM_REG_GUEST_CR3,
61 	VM_REG_GUEST_CR4,
62 	VM_REG_GUEST_DR7,
63 	VM_REG_GUEST_RSP,
64 	VM_REG_GUEST_RIP,
65 	VM_REG_GUEST_RFLAGS,
66 	VM_REG_GUEST_ES,
67 	VM_REG_GUEST_CS,
68 	VM_REG_GUEST_SS,
69 	VM_REG_GUEST_DS,
70 	VM_REG_GUEST_FS,
71 	VM_REG_GUEST_GS,
72 	VM_REG_GUEST_LDTR,
73 	VM_REG_GUEST_TR,
74 	VM_REG_GUEST_IDTR,
75 	VM_REG_GUEST_GDTR,
76 	VM_REG_GUEST_EFER,
77 	VM_REG_GUEST_CR2,
78 	VM_REG_LAST
79 };
80 
81 enum x2apic_state {
82 	X2APIC_DISABLED,
83 	X2APIC_ENABLED,
84 	X2APIC_STATE_LAST
85 };
86 
87 #ifdef _KERNEL
88 
89 #define	VM_MAX_NAMELEN	32
90 
91 struct vm;
92 struct vm_exception;
93 struct vm_memory_segment;
94 struct seg_desc;
95 struct vm_exit;
96 struct vm_run;
97 struct vhpet;
98 struct vioapic;
99 struct vlapic;
100 struct vmspace;
101 struct vm_object;
102 struct pmap;
103 
104 typedef int	(*vmm_init_func_t)(int ipinum);
105 typedef int	(*vmm_cleanup_func_t)(void);
106 typedef void	(*vmm_resume_func_t)(void);
107 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
108 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
109 				  struct pmap *pmap, void *rendezvous_cookie,
110 				  void *suspend_cookie);
111 typedef void	(*vmi_cleanup_func_t)(void *vmi);
112 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
113 				      uint64_t *retval);
114 typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
115 				      uint64_t val);
116 typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
117 				  struct seg_desc *desc);
118 typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
119 				  struct seg_desc *desc);
120 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
121 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
122 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
123 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
124 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
125 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
126 
127 struct vmm_ops {
128 	vmm_init_func_t		init;		/* module wide initialization */
129 	vmm_cleanup_func_t	cleanup;
130 	vmm_resume_func_t	resume;
131 
132 	vmi_init_func_t		vminit;		/* vm-specific initialization */
133 	vmi_run_func_t		vmrun;
134 	vmi_cleanup_func_t	vmcleanup;
135 	vmi_get_register_t	vmgetreg;
136 	vmi_set_register_t	vmsetreg;
137 	vmi_get_desc_t		vmgetdesc;
138 	vmi_set_desc_t		vmsetdesc;
139 	vmi_get_cap_t		vmgetcap;
140 	vmi_set_cap_t		vmsetcap;
141 	vmi_vmspace_alloc	vmspace_alloc;
142 	vmi_vmspace_free	vmspace_free;
143 	vmi_vlapic_init		vlapic_init;
144 	vmi_vlapic_cleanup	vlapic_cleanup;
145 };
146 
147 extern struct vmm_ops vmm_ops_intel;
148 extern struct vmm_ops vmm_ops_amd;
149 
150 int vm_create(const char *name, struct vm **retvm);
151 void vm_destroy(struct vm *vm);
152 int vm_reinit(struct vm *vm);
153 const char *vm_name(struct vm *vm);
154 int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
155 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
156 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
157 void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
158 		  void **cookie);
159 void vm_gpa_release(void *cookie);
160 int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
161 	      struct vm_memory_segment *seg);
162 int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
163 		  vm_offset_t *offset, struct vm_object **object);
164 boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
165 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
166 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
167 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
168 		    struct seg_desc *ret_desc);
169 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
170 		    struct seg_desc *desc);
171 int vm_run(struct vm *vm, struct vm_run *vmrun);
172 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
173 int vm_inject_nmi(struct vm *vm, int vcpu);
174 int vm_nmi_pending(struct vm *vm, int vcpuid);
175 void vm_nmi_clear(struct vm *vm, int vcpuid);
176 int vm_inject_extint(struct vm *vm, int vcpu);
177 int vm_extint_pending(struct vm *vm, int vcpuid);
178 void vm_extint_clear(struct vm *vm, int vcpuid);
179 uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
180 struct vlapic *vm_lapic(struct vm *vm, int cpu);
181 struct vioapic *vm_ioapic(struct vm *vm);
182 struct vhpet *vm_hpet(struct vm *vm);
183 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
184 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
185 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
186 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
187 int vm_apicid2vcpuid(struct vm *vm, int apicid);
188 int vm_activate_cpu(struct vm *vm, int vcpu);
189 cpuset_t vm_active_cpus(struct vm *vm);
190 cpuset_t vm_suspended_cpus(struct vm *vm);
191 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
192 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
193 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
194 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
195 
196 /*
197  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
198  * The rendezvous 'func(arg)' is not allowed to do anything that will
199  * cause the thread to be put to sleep.
200  *
201  * If the rendezvous is being initiated from a vcpu context then the
202  * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
203  *
204  * The caller cannot hold any locks when initiating the rendezvous.
205  *
206  * The implementation of this API may cause vcpus other than those specified
207  * by 'dest' to be stalled. The caller should not rely on any vcpus making
208  * forward progress when the rendezvous is in progress.
209  */
210 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
211 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
212     vm_rendezvous_func_t func, void *arg);
213 
214 static __inline int
215 vcpu_rendezvous_pending(void *rendezvous_cookie)
216 {
217 
218 	return (*(uintptr_t *)rendezvous_cookie != 0);
219 }
220 
221 static __inline int
222 vcpu_suspended(void *suspend_cookie)
223 {
224 
225 	return (*(int *)suspend_cookie);
226 }
227 
228 /*
229  * Return 1 if device indicated by bus/slot/func is supposed to be a
230  * pci passthrough device.
231  *
232  * Return 0 otherwise.
233  */
234 int vmm_is_pptdev(int bus, int slot, int func);
235 
236 void *vm_iommu_domain(struct vm *vm);
237 
238 enum vcpu_state {
239 	VCPU_IDLE,
240 	VCPU_FROZEN,
241 	VCPU_RUNNING,
242 	VCPU_SLEEPING,
243 };
244 
245 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
246     bool from_idle);
247 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
248 
249 static int __inline
250 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
251 {
252 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
253 }
254 
255 void *vcpu_stats(struct vm *vm, int vcpu);
256 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
257 struct vmspace *vm_get_vmspace(struct vm *vm);
258 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
259 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
260 struct vatpic *vm_atpic(struct vm *vm);
261 struct vatpit *vm_atpit(struct vm *vm);
262 
263 /*
264  * Inject exception 'vme' into the guest vcpu. This function returns 0 on
265  * success and non-zero on failure.
266  *
267  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
268  * this function directly because they enforce the trap-like or fault-like
269  * behavior of an exception.
270  *
271  * This function should only be called in the context of the thread that is
272  * executing this vcpu.
273  */
274 int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
275 
276 /*
277  * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an
278  * exception is pending and also updates 'vme'. The pending exception is
279  * cleared when this function returns.
280  *
281  * This function should only be called in the context of the thread that is
282  * executing this vcpu.
283  */
284 int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme);
285 
286 void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
287 void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
288 void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2);
289 
290 enum vm_reg_name vm_segment_name(int seg_encoding);
291 
292 #endif	/* KERNEL */
293 
294 #define	VM_MAXCPU	16			/* maximum virtual cpus */
295 
296 /*
297  * Identifiers for optional vmm capabilities
298  */
299 enum vm_cap_type {
300 	VM_CAP_HALT_EXIT,
301 	VM_CAP_MTRAP_EXIT,
302 	VM_CAP_PAUSE_EXIT,
303 	VM_CAP_UNRESTRICTED_GUEST,
304 	VM_CAP_ENABLE_INVPCID,
305 	VM_CAP_MAX
306 };
307 
308 enum vm_intr_trigger {
309 	EDGE_TRIGGER,
310 	LEVEL_TRIGGER
311 };
312 
313 /*
314  * The 'access' field has the format specified in Table 21-2 of the Intel
315  * Architecture Manual vol 3b.
316  *
317  * XXX The contents of the 'access' field are architecturally defined except
318  * bit 16 - Segment Unusable.
319  */
320 struct seg_desc {
321 	uint64_t	base;
322 	uint32_t	limit;
323 	uint32_t	access;
324 };
325 #define	SEG_DESC_TYPE(desc)		((desc)->access & 0x001f)
326 #define	SEG_DESC_PRESENT(desc)		((desc)->access & 0x0080)
327 #define	SEG_DESC_DEF32(desc)		((desc)->access & 0x4000)
328 #define	SEG_DESC_GRANULARITY(desc)	((desc)->access & 0x8000)
329 #define	SEG_DESC_UNUSABLE(desc)		((desc)->access & 0x10000)
330 
331 enum vm_cpu_mode {
332 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
333 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
334 };
335 
336 enum vm_paging_mode {
337 	PAGING_MODE_FLAT,
338 	PAGING_MODE_32,
339 	PAGING_MODE_PAE,
340 	PAGING_MODE_64,
341 };
342 
343 struct vm_guest_paging {
344 	uint64_t	cr3;
345 	int		cpl;
346 	enum vm_cpu_mode cpu_mode;
347 	enum vm_paging_mode paging_mode;
348 };
349 
350 /*
351  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
352  * consumers of instruction decoding. The only reason why their contents
353  * need to be exposed is because they are part of the 'vm_exit' structure.
354  */
355 struct vie_op {
356 	uint8_t		op_byte;	/* actual opcode byte */
357 	uint8_t		op_type;	/* type of operation (e.g. MOV) */
358 	uint16_t	op_flags;
359 };
360 
361 #define	VIE_INST_SIZE	15
362 struct vie {
363 	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
364 	uint8_t		num_valid;		/* size of the instruction */
365 	uint8_t		num_processed;
366 
367 	uint8_t		rex_w:1,		/* REX prefix */
368 			rex_r:1,
369 			rex_x:1,
370 			rex_b:1,
371 			rex_present:1;
372 
373 	uint8_t		mod:2,			/* ModRM byte */
374 			reg:4,
375 			rm:4;
376 
377 	uint8_t		ss:2,			/* SIB byte */
378 			index:4,
379 			base:4;
380 
381 	uint8_t		disp_bytes;
382 	uint8_t		imm_bytes;
383 
384 	uint8_t		scale;
385 	int		base_register;		/* VM_REG_GUEST_xyz */
386 	int		index_register;		/* VM_REG_GUEST_xyz */
387 
388 	int64_t		displacement;		/* optional addr displacement */
389 	int64_t		immediate;		/* optional immediate operand */
390 
391 	uint8_t		decoded;	/* set to 1 if successfully decoded */
392 
393 	struct vie_op	op;			/* opcode description */
394 };
395 
396 enum vm_exitcode {
397 	VM_EXITCODE_INOUT,
398 	VM_EXITCODE_VMX,
399 	VM_EXITCODE_BOGUS,
400 	VM_EXITCODE_RDMSR,
401 	VM_EXITCODE_WRMSR,
402 	VM_EXITCODE_HLT,
403 	VM_EXITCODE_MTRAP,
404 	VM_EXITCODE_PAUSE,
405 	VM_EXITCODE_PAGING,
406 	VM_EXITCODE_INST_EMUL,
407 	VM_EXITCODE_SPINUP_AP,
408 	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
409 	VM_EXITCODE_RENDEZVOUS,
410 	VM_EXITCODE_IOAPIC_EOI,
411 	VM_EXITCODE_SUSPENDED,
412 	VM_EXITCODE_INOUT_STR,
413 	VM_EXITCODE_MAX
414 };
415 
416 struct vm_inout {
417 	uint16_t	bytes:3;	/* 1 or 2 or 4 */
418 	uint16_t	in:1;
419 	uint16_t	string:1;
420 	uint16_t	rep:1;
421 	uint16_t	port;
422 	uint32_t	eax;		/* valid for out */
423 };
424 
425 struct vm_inout_str {
426 	struct vm_inout	inout;		/* must be the first element */
427 	struct vm_guest_paging paging;
428 	uint64_t	rflags;
429 	uint64_t	cr0;
430 	uint64_t	index;
431 	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
432 	int		addrsize;
433 	enum vm_reg_name seg_name;
434 	struct seg_desc seg_desc;
435 };
436 
437 struct vm_exit {
438 	enum vm_exitcode	exitcode;
439 	int			inst_length;	/* 0 means unknown */
440 	uint64_t		rip;
441 	union {
442 		struct vm_inout	inout;
443 		struct vm_inout_str inout_str;
444 		struct {
445 			uint64_t	gpa;
446 			int		fault_type;
447 		} paging;
448 		struct {
449 			uint64_t	gpa;
450 			uint64_t	gla;
451 			struct vm_guest_paging paging;
452 			struct vie	vie;
453 		} inst_emul;
454 		/*
455 		 * VMX specific payload. Used when there is no "better"
456 		 * exitcode to represent the VM-exit.
457 		 */
458 		struct {
459 			int		status;		/* vmx inst status */
460 			/*
461 			 * 'exit_reason' and 'exit_qualification' are valid
462 			 * only if 'status' is zero.
463 			 */
464 			uint32_t	exit_reason;
465 			uint64_t	exit_qualification;
466 			/*
467 			 * 'inst_error' and 'inst_type' are valid
468 			 * only if 'status' is non-zero.
469 			 */
470 			int		inst_type;
471 			int		inst_error;
472 		} vmx;
473 		struct {
474 			uint32_t	code;		/* ecx value */
475 			uint64_t	wval;
476 		} msr;
477 		struct {
478 			int		vcpu;
479 			uint64_t	rip;
480 		} spinup_ap;
481 		struct {
482 			uint64_t	rflags;
483 		} hlt;
484 		struct {
485 			int		vector;
486 		} ioapic_eoi;
487 		struct {
488 			enum vm_suspend_how how;
489 		} suspended;
490 	} u;
491 };
492 
493 #endif	/* _VMM_H_ */
494