xref: /illumos-gate/usr/src/uts/intel/sys/vmm.h (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2022 Oxide Computer Company
43  */
44 
45 #ifndef _VMM_H_
46 #define	_VMM_H_
47 
48 enum vm_suspend_how {
49 	VM_SUSPEND_NONE,
50 	VM_SUSPEND_RESET,
51 	VM_SUSPEND_POWEROFF,
52 	VM_SUSPEND_HALT,
53 	VM_SUSPEND_TRIPLEFAULT,
54 	VM_SUSPEND_LAST
55 };
56 
57 /*
58  * Identifiers for architecturally defined registers.
59  */
60 enum vm_reg_name {
61 	VM_REG_GUEST_RAX,
62 	VM_REG_GUEST_RBX,
63 	VM_REG_GUEST_RCX,
64 	VM_REG_GUEST_RDX,
65 	VM_REG_GUEST_RSI,
66 	VM_REG_GUEST_RDI,
67 	VM_REG_GUEST_RBP,
68 	VM_REG_GUEST_R8,
69 	VM_REG_GUEST_R9,
70 	VM_REG_GUEST_R10,
71 	VM_REG_GUEST_R11,
72 	VM_REG_GUEST_R12,
73 	VM_REG_GUEST_R13,
74 	VM_REG_GUEST_R14,
75 	VM_REG_GUEST_R15,
76 	VM_REG_GUEST_CR0,
77 	VM_REG_GUEST_CR3,
78 	VM_REG_GUEST_CR4,
79 	VM_REG_GUEST_DR7,
80 	VM_REG_GUEST_RSP,
81 	VM_REG_GUEST_RIP,
82 	VM_REG_GUEST_RFLAGS,
83 	VM_REG_GUEST_ES,
84 	VM_REG_GUEST_CS,
85 	VM_REG_GUEST_SS,
86 	VM_REG_GUEST_DS,
87 	VM_REG_GUEST_FS,
88 	VM_REG_GUEST_GS,
89 	VM_REG_GUEST_LDTR,
90 	VM_REG_GUEST_TR,
91 	VM_REG_GUEST_IDTR,
92 	VM_REG_GUEST_GDTR,
93 	VM_REG_GUEST_EFER,
94 	VM_REG_GUEST_CR2,
95 	VM_REG_GUEST_PDPTE0,
96 	VM_REG_GUEST_PDPTE1,
97 	VM_REG_GUEST_PDPTE2,
98 	VM_REG_GUEST_PDPTE3,
99 	VM_REG_GUEST_INTR_SHADOW,
100 	VM_REG_GUEST_DR0,
101 	VM_REG_GUEST_DR1,
102 	VM_REG_GUEST_DR2,
103 	VM_REG_GUEST_DR3,
104 	VM_REG_GUEST_DR6,
105 	VM_REG_GUEST_ENTRY_INST_LENGTH,
106 	VM_REG_GUEST_XCR0,
107 	VM_REG_LAST
108 };
109 
110 enum x2apic_state {
111 	X2APIC_DISABLED,
112 	X2APIC_ENABLED,
113 	X2APIC_STATE_LAST
114 };
115 
116 #define	VM_INTINFO_MASK_VECTOR	0xffUL
117 #define	VM_INTINFO_MASK_TYPE	0x700UL
118 #define	VM_INTINFO_MASK_RSVD	0x7ffff000UL
119 #define	VM_INTINFO_SHIFT_ERRCODE 32
120 
121 #define	VM_INTINFO_VECTOR(val)	((val) & VM_INTINFO_MASK_VECTOR)
122 #define	VM_INTINFO_TYPE(val)	((val) & VM_INTINFO_MASK_TYPE)
123 #define	VM_INTINFO_ERRCODE(val)	((val) >> VM_INTINFO_SHIFT_ERRCODE)
124 #define	VM_INTINFO_PENDING(val)	(((val) & VM_INTINFO_VALID) != 0)
125 #define	VM_INTINFO_HAS_ERRCODE(val) (((val) & VM_INTINFO_DEL_ERRCODE) != 0)
126 
127 #define	VM_INTINFO_VALID	(1UL << 31)
128 #define	VM_INTINFO_DEL_ERRCODE	(1UL << 11)
129 
130 #define	VM_INTINFO_HWINTR	(0 << 8)
131 #define	VM_INTINFO_NMI		(2 << 8)
132 #define	VM_INTINFO_HWEXCP	(3 << 8)
133 #define	VM_INTINFO_SWINTR	(4 << 8)
134 /* Reserved for CPU (read: Intel) specific types */
135 #define	VM_INTINFO_RESV1	(1 << 8)
136 #define	VM_INTINFO_RESV5	(5 << 8)
137 #define	VM_INTINFO_RESV6	(6 << 8)
138 #define	VM_INTINFO_RESV7	(7 << 8)
139 
140 /*
141  * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
142  * To simplify structure definitions, an arbitrary limit has been chosen.
143  * This same limit is used for memory segment names
144  */
145 
146 #define	VM_MAX_NAMELEN		128
147 #define	VM_MAX_SEG_NAMELEN	128
148 
149 #ifdef _KERNEL
150 #define	VM_MAXCPU	32			/* maximum virtual cpus */
151 #endif
152 
153 /*
154  * Identifiers for optional vmm capabilities
155  */
156 enum vm_cap_type {
157 	VM_CAP_HALT_EXIT,
158 	VM_CAP_MTRAP_EXIT,
159 	VM_CAP_PAUSE_EXIT,
160 	VM_CAP_ENABLE_INVPCID,
161 	VM_CAP_BPT_EXIT,
162 	VM_CAP_MAX
163 };
164 
165 enum vmx_caps {
166 	VMX_CAP_NONE		= 0,
167 	VMX_CAP_TPR_SHADOW	= (1UL << 0),
168 	VMX_CAP_APICV		= (1UL << 1),
169 	VMX_CAP_APICV_X2APIC	= (1UL << 2),
170 	VMX_CAP_APICV_PIR	= (1UL << 3),
171 };
172 
173 enum vm_intr_trigger {
174 	EDGE_TRIGGER,
175 	LEVEL_TRIGGER
176 };
177 
178 /*
179  * The 'access' field has the format specified in Table 21-2 of the Intel
180  * Architecture Manual vol 3b.
181  *
182  * XXX The contents of the 'access' field are architecturally defined except
183  * bit 16 - Segment Unusable.
184  */
185 struct seg_desc {
186 	uint64_t	base;
187 	uint32_t	limit;
188 	uint32_t	access;
189 };
190 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
191 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
192 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
193 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
194 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
195 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
196 
197 enum vm_cpu_mode {
198 	CPU_MODE_REAL,
199 	CPU_MODE_PROTECTED,
200 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
201 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
202 };
203 
204 enum vm_paging_mode {
205 	PAGING_MODE_FLAT,
206 	PAGING_MODE_32,
207 	PAGING_MODE_PAE,
208 	PAGING_MODE_64,
209 };
210 
211 struct vm_guest_paging {
212 	uint64_t	cr3;
213 	int		cpl;
214 	enum vm_cpu_mode cpu_mode;
215 	enum vm_paging_mode paging_mode;
216 };
217 
218 enum vm_exitcode {
219 	VM_EXITCODE_INOUT,
220 	VM_EXITCODE_VMX,
221 	VM_EXITCODE_BOGUS,
222 	VM_EXITCODE_RDMSR,
223 	VM_EXITCODE_WRMSR,
224 	VM_EXITCODE_HLT,
225 	VM_EXITCODE_MTRAP,
226 	VM_EXITCODE_PAUSE,
227 	VM_EXITCODE_PAGING,
228 	VM_EXITCODE_INST_EMUL,
229 	VM_EXITCODE_RUN_STATE,
230 	VM_EXITCODE_MMIO_EMUL,
231 	VM_EXITCODE_DEPRECATED,	/* formerly RUNBLOCK */
232 	VM_EXITCODE_IOAPIC_EOI,
233 	VM_EXITCODE_SUSPENDED,
234 	VM_EXITCODE_MMIO,
235 	VM_EXITCODE_TASK_SWITCH,
236 	VM_EXITCODE_MONITOR,
237 	VM_EXITCODE_MWAIT,
238 	VM_EXITCODE_SVM,
239 	VM_EXITCODE_REQIDLE,
240 	VM_EXITCODE_DEBUG,
241 	VM_EXITCODE_VMINSN,
242 	VM_EXITCODE_BPT,
243 	VM_EXITCODE_HT,
244 	VM_EXITCODE_MAX
245 };
246 
247 enum inout_flags {
248 	INOUT_IN	= (1U << 0), /* direction: 'in' when set, else 'out' */
249 
250 	/*
251 	 * The following flags are used only for in-kernel emulation logic and
252 	 * are not exposed to userspace.
253 	 */
254 	INOUT_STR	= (1U << 1), /* ins/outs operation */
255 	INOUT_REP	= (1U << 2), /* 'rep' prefix present on instruction */
256 };
257 
258 struct vm_inout {
259 	uint32_t	eax;
260 	uint16_t	port;
261 	uint8_t		bytes;		/* 1 or 2 or 4 */
262 	uint8_t		flags;		/* see: inout_flags */
263 
264 	/*
265 	 * The address size and segment are relevant to INS/OUTS operations.
266 	 * Userspace is not concerned with them since the in-kernel emulation
267 	 * handles those specific aspects.
268 	 */
269 	uint8_t		addrsize;
270 	uint8_t		segment;
271 };
272 
273 struct vm_mmio {
274 	uint8_t		bytes;		/* 1/2/4/8 bytes */
275 	uint8_t		read;		/* read: 1, write: 0 */
276 	uint16_t	_pad[3];
277 	uint64_t	gpa;
278 	uint64_t	data;
279 };
280 
281 enum task_switch_reason {
282 	TSR_CALL,
283 	TSR_IRET,
284 	TSR_JMP,
285 	TSR_IDT_GATE,	/* task gate in IDT */
286 };
287 
288 struct vm_task_switch {
289 	uint16_t	tsssel;		/* new TSS selector */
290 	int		ext;		/* task switch due to external event */
291 	uint32_t	errcode;
292 	int		errcode_valid;	/* push 'errcode' on the new stack */
293 	enum task_switch_reason reason;
294 	struct vm_guest_paging paging;
295 };
296 
297 enum vcpu_run_state {
298 	VRS_HALT		= 0,
299 	VRS_INIT		= (1 << 0),
300 	VRS_RUN			= (1 << 1),
301 
302 	VRS_PEND_INIT		= (1 << 14),
303 	VRS_PEND_SIPI		= (1 << 15),
304 };
305 #define VRS_MASK_VALID(v)	\
306 	((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
307 #define VRS_IS_VALID(v)		((v) == VRS_MASK_VALID(v))
308 
309 struct vm_exit {
310 	enum vm_exitcode	exitcode;
311 	int			inst_length;	/* 0 means unknown */
312 	uint64_t		rip;
313 	union {
314 		struct vm_inout	inout;
315 		struct vm_mmio	mmio;
316 		struct {
317 			uint64_t	gpa;
318 			int		fault_type;
319 		} paging;
320 		/*
321 		 * Kernel-internal MMIO decoding and emulation.
322 		 * Userspace should not expect to see this, but rather a
323 		 * VM_EXITCODE_MMIO with the above 'mmio' context.
324 		 */
325 		struct {
326 			uint64_t	gpa;
327 			uint64_t	gla;
328 			uint64_t	cs_base;
329 			int		cs_d;		/* CS.D */
330 		} mmio_emul;
331 		struct {
332 			uint8_t		inst[15];
333 			uint8_t		num_valid;
334 		} inst_emul;
335 		/*
336 		 * VMX specific payload. Used when there is no "better"
337 		 * exitcode to represent the VM-exit.
338 		 */
339 		struct {
340 			int		status;		/* vmx inst status */
341 			/*
342 			 * 'exit_reason' and 'exit_qualification' are valid
343 			 * only if 'status' is zero.
344 			 */
345 			uint32_t	exit_reason;
346 			uint64_t	exit_qualification;
347 			/*
348 			 * 'inst_error' and 'inst_type' are valid
349 			 * only if 'status' is non-zero.
350 			 */
351 			int		inst_type;
352 			int		inst_error;
353 		} vmx;
354 		/*
355 		 * SVM specific payload.
356 		 */
357 		struct {
358 			uint64_t	exitcode;
359 			uint64_t	exitinfo1;
360 			uint64_t	exitinfo2;
361 		} svm;
362 		struct {
363 			int		inst_length;
364 		} bpt;
365 		struct {
366 			uint32_t	code;		/* ecx value */
367 			uint64_t	wval;
368 		} msr;
369 		struct {
370 			uint64_t	rflags;
371 		} hlt;
372 		struct {
373 			int		vector;
374 		} ioapic_eoi;
375 		struct {
376 			enum vm_suspend_how how;
377 		} suspended;
378 		struct vm_task_switch task_switch;
379 	} u;
380 };
381 
382 enum vm_entry_cmds {
383 	VEC_DEFAULT = 0,
384 	VEC_DISCARD_INSTR,	/* discard inst emul state */
385 	VEC_FULFILL_MMIO,	/* entry includes result for mmio emul */
386 	VEC_FULFILL_INOUT,	/* entry includes result for inout emul */
387 };
388 
389 struct vm_entry {
390 	int cpuid;
391 	uint_t cmd;		/* see: vm_entry_cmds */
392 	void *exit_data;
393 	union {
394 		struct vm_inout inout;
395 		struct vm_mmio mmio;
396 	} u;
397 };
398 
399 int vm_restart_instruction(void *vm, int vcpuid);
400 
401 enum vm_create_flags {
402 	/*
403 	 * Allocate guest memory segments from existing reservoir capacity,
404 	 * rather than attempting to create transient allocations.
405 	 */
406 	VCF_RESERVOIR_MEM = (1 << 0),
407 
408 	/*
409 	 * Enable dirty page tracking for the guest.
410 	 */
411 	VCF_TRACK_DIRTY = (1 << 1),
412 };
413 
414 /*
415  * Describes an entry for `cpuid` emulation.
416  * Used internally by bhyve (kernel) in addition to exposed ioctl(2) interface.
417  */
418 struct vcpu_cpuid_entry {
419 	uint32_t	vce_function;
420 	uint32_t	vce_index;
421 	uint32_t	vce_flags;
422 	uint32_t	vce_eax;
423 	uint32_t	vce_ebx;
424 	uint32_t	vce_ecx;
425 	uint32_t	vce_edx;
426 	uint32_t	_pad;
427 };
428 
429 /*
430  * Defined flags for vcpu_cpuid_entry`vce_flags are below.
431  */
432 
433 /* Use index (ecx) input value when matching entry */
434 #define	VCE_FLAG_MATCH_INDEX		(1 << 0)
435 
436 /* All valid flacts for vcpu_cpuid_entry`vce_flags */
437 #define	VCE_FLAGS_VALID		VCE_FLAG_MATCH_INDEX
438 
439 /*
440  * Defined flags for vcpu_cpuid configuration are below.
441  * These are used by both the ioctl(2) interface via vm_vcpu_cpuid_config and
442  * internally in the kernel vmm.
443  */
444 
445 /* Use legacy hard-coded cpuid masking tables applied to the host CPU */
446 #define	VCC_FLAG_LEGACY_HANDLING	(1 << 0)
447 /*
448  * Emulate Intel-style fallback behavior (emit highest "standard" entry) if the
449  * queried function/index do not match.  If not set, emulate AMD-style, where
450  * all zeroes are returned in such cases.
451  */
452 #define	VCC_FLAG_INTEL_FALLBACK		(1 << 1)
453 
454 /* All valid flacts for vm_vcpu_cpuid_config`vvcc_flags */
455 #define	VCC_FLAGS_VALID		\
456 	(VCC_FLAG_LEGACY_HANDLING | VCC_FLAG_INTEL_FALLBACK)
457 
458 /* Maximum vcpu_cpuid_entry records per vCPU */
459 #define	VMM_MAX_CPUID_ENTRIES		256
460 
461 #endif	/* _VMM_H_ */
462