xref: /illumos-gate/usr/src/uts/intel/sys/vmm.h (revision ae5a8bed14db6c16225cac733ea042c27e242d18)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2022 Oxide Computer Company
43  */
44 
45 #ifndef _VMM_H_
46 #define	_VMM_H_
47 
48 enum vm_suspend_how {
49 	VM_SUSPEND_NONE,
50 	VM_SUSPEND_RESET,
51 	VM_SUSPEND_POWEROFF,
52 	VM_SUSPEND_HALT,
53 	VM_SUSPEND_TRIPLEFAULT,
54 	VM_SUSPEND_LAST
55 };
56 
57 /*
58  * Identifiers for architecturally defined registers.
59  */
60 enum vm_reg_name {
61 	VM_REG_GUEST_RAX,
62 	VM_REG_GUEST_RBX,
63 	VM_REG_GUEST_RCX,
64 	VM_REG_GUEST_RDX,
65 	VM_REG_GUEST_RSI,
66 	VM_REG_GUEST_RDI,
67 	VM_REG_GUEST_RBP,
68 	VM_REG_GUEST_R8,
69 	VM_REG_GUEST_R9,
70 	VM_REG_GUEST_R10,
71 	VM_REG_GUEST_R11,
72 	VM_REG_GUEST_R12,
73 	VM_REG_GUEST_R13,
74 	VM_REG_GUEST_R14,
75 	VM_REG_GUEST_R15,
76 	VM_REG_GUEST_CR0,
77 	VM_REG_GUEST_CR3,
78 	VM_REG_GUEST_CR4,
79 	VM_REG_GUEST_DR7,
80 	VM_REG_GUEST_RSP,
81 	VM_REG_GUEST_RIP,
82 	VM_REG_GUEST_RFLAGS,
83 	VM_REG_GUEST_ES,
84 	VM_REG_GUEST_CS,
85 	VM_REG_GUEST_SS,
86 	VM_REG_GUEST_DS,
87 	VM_REG_GUEST_FS,
88 	VM_REG_GUEST_GS,
89 	VM_REG_GUEST_LDTR,
90 	VM_REG_GUEST_TR,
91 	VM_REG_GUEST_IDTR,
92 	VM_REG_GUEST_GDTR,
93 	VM_REG_GUEST_EFER,
94 	VM_REG_GUEST_CR2,
95 	VM_REG_GUEST_PDPTE0,
96 	VM_REG_GUEST_PDPTE1,
97 	VM_REG_GUEST_PDPTE2,
98 	VM_REG_GUEST_PDPTE3,
99 	VM_REG_GUEST_INTR_SHADOW,
100 	VM_REG_GUEST_DR0,
101 	VM_REG_GUEST_DR1,
102 	VM_REG_GUEST_DR2,
103 	VM_REG_GUEST_DR3,
104 	VM_REG_GUEST_DR6,
105 	VM_REG_GUEST_ENTRY_INST_LENGTH,
106 	VM_REG_LAST
107 };
108 
109 enum x2apic_state {
110 	X2APIC_DISABLED,
111 	X2APIC_ENABLED,
112 	X2APIC_STATE_LAST
113 };
114 
115 #define	VM_INTINFO_MASK_VECTOR	0xffUL
116 #define	VM_INTINFO_MASK_TYPE	0x700UL
117 #define	VM_INTINFO_MASK_RSVD	0x7ffff000UL
118 #define	VM_INTINFO_SHIFT_ERRCODE 32
119 
120 #define	VM_INTINFO_VECTOR(val)	((val) & VM_INTINFO_MASK_VECTOR)
121 #define	VM_INTINFO_TYPE(val)	((val) & VM_INTINFO_MASK_TYPE)
122 #define	VM_INTINFO_ERRCODE(val)	((val) >> VM_INTINFO_SHIFT_ERRCODE)
123 #define	VM_INTINFO_PENDING(val)	(((val) & VM_INTINFO_VALID) != 0)
124 #define	VM_INTINFO_HAS_ERRCODE(val) (((val) & VM_INTINFO_DEL_ERRCODE) != 0)
125 
126 #define	VM_INTINFO_VALID	(1UL << 31)
127 #define	VM_INTINFO_DEL_ERRCODE	(1UL << 11)
128 
129 #define	VM_INTINFO_HWINTR	(0 << 8)
130 #define	VM_INTINFO_NMI		(2 << 8)
131 #define	VM_INTINFO_HWEXCP	(3 << 8)
132 #define	VM_INTINFO_SWINTR	(4 << 8)
133 /* Reserved for CPU (read: Intel) specific types */
134 #define	VM_INTINFO_RESV1	(1 << 8)
135 #define	VM_INTINFO_RESV5	(5 << 8)
136 #define	VM_INTINFO_RESV6	(6 << 8)
137 #define	VM_INTINFO_RESV7	(7 << 8)
138 
139 /*
140  * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
141  * To simplify structure definitions, an arbitrary limit has been chosen.
142  * This same limit is used for memory segment names
143  */
144 
145 #define	VM_MAX_NAMELEN		128
146 #define	VM_MAX_SEG_NAMELEN	128
147 
148 #ifdef _KERNEL
149 #define	VM_MAXCPU	32			/* maximum virtual cpus */
150 #endif
151 
152 /*
153  * Identifiers for optional vmm capabilities
154  */
155 enum vm_cap_type {
156 	VM_CAP_HALT_EXIT,
157 	VM_CAP_MTRAP_EXIT,
158 	VM_CAP_PAUSE_EXIT,
159 	VM_CAP_ENABLE_INVPCID,
160 	VM_CAP_BPT_EXIT,
161 	VM_CAP_MAX
162 };
163 
164 enum vmx_caps {
165 	VMX_CAP_NONE		= 0,
166 	VMX_CAP_TPR_SHADOW	= (1UL << 0),
167 	VMX_CAP_APICV		= (1UL << 1),
168 	VMX_CAP_APICV_X2APIC	= (1UL << 2),
169 	VMX_CAP_APICV_PIR	= (1UL << 3),
170 };
171 
172 enum vm_intr_trigger {
173 	EDGE_TRIGGER,
174 	LEVEL_TRIGGER
175 };
176 
177 /*
178  * The 'access' field has the format specified in Table 21-2 of the Intel
179  * Architecture Manual vol 3b.
180  *
181  * XXX The contents of the 'access' field are architecturally defined except
182  * bit 16 - Segment Unusable.
183  */
184 struct seg_desc {
185 	uint64_t	base;
186 	uint32_t	limit;
187 	uint32_t	access;
188 };
189 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
190 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
191 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
192 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
193 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
194 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
195 
196 enum vm_cpu_mode {
197 	CPU_MODE_REAL,
198 	CPU_MODE_PROTECTED,
199 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
200 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
201 };
202 
203 enum vm_paging_mode {
204 	PAGING_MODE_FLAT,
205 	PAGING_MODE_32,
206 	PAGING_MODE_PAE,
207 	PAGING_MODE_64,
208 };
209 
210 struct vm_guest_paging {
211 	uint64_t	cr3;
212 	int		cpl;
213 	enum vm_cpu_mode cpu_mode;
214 	enum vm_paging_mode paging_mode;
215 };
216 
217 enum vm_exitcode {
218 	VM_EXITCODE_INOUT,
219 	VM_EXITCODE_VMX,
220 	VM_EXITCODE_BOGUS,
221 	VM_EXITCODE_RDMSR,
222 	VM_EXITCODE_WRMSR,
223 	VM_EXITCODE_HLT,
224 	VM_EXITCODE_MTRAP,
225 	VM_EXITCODE_PAUSE,
226 	VM_EXITCODE_PAGING,
227 	VM_EXITCODE_INST_EMUL,
228 	VM_EXITCODE_RUN_STATE,
229 	VM_EXITCODE_MMIO_EMUL,
230 	VM_EXITCODE_DEPRECATED,	/* formerly RUNBLOCK */
231 	VM_EXITCODE_IOAPIC_EOI,
232 	VM_EXITCODE_SUSPENDED,
233 	VM_EXITCODE_MMIO,
234 	VM_EXITCODE_TASK_SWITCH,
235 	VM_EXITCODE_MONITOR,
236 	VM_EXITCODE_MWAIT,
237 	VM_EXITCODE_SVM,
238 	VM_EXITCODE_REQIDLE,
239 	VM_EXITCODE_DEBUG,
240 	VM_EXITCODE_VMINSN,
241 	VM_EXITCODE_BPT,
242 	VM_EXITCODE_HT,
243 	VM_EXITCODE_MAX
244 };
245 
246 enum inout_flags {
247 	INOUT_IN	= (1U << 0), /* direction: 'in' when set, else 'out' */
248 
249 	/*
250 	 * The following flags are used only for in-kernel emulation logic and
251 	 * are not exposed to userspace.
252 	 */
253 	INOUT_STR	= (1U << 1), /* ins/outs operation */
254 	INOUT_REP	= (1U << 2), /* 'rep' prefix present on instruction */
255 };
256 
257 struct vm_inout {
258 	uint32_t	eax;
259 	uint16_t	port;
260 	uint8_t		bytes;		/* 1 or 2 or 4 */
261 	uint8_t		flags;		/* see: inout_flags */
262 
263 	/*
264 	 * The address size and segment are relevant to INS/OUTS operations.
265 	 * Userspace is not concerned with them since the in-kernel emulation
266 	 * handles those specific aspects.
267 	 */
268 	uint8_t		addrsize;
269 	uint8_t		segment;
270 };
271 
272 struct vm_mmio {
273 	uint8_t		bytes;		/* 1/2/4/8 bytes */
274 	uint8_t		read;		/* read: 1, write: 0 */
275 	uint16_t	_pad[3];
276 	uint64_t	gpa;
277 	uint64_t	data;
278 };
279 
280 enum task_switch_reason {
281 	TSR_CALL,
282 	TSR_IRET,
283 	TSR_JMP,
284 	TSR_IDT_GATE,	/* task gate in IDT */
285 };
286 
287 struct vm_task_switch {
288 	uint16_t	tsssel;		/* new TSS selector */
289 	int		ext;		/* task switch due to external event */
290 	uint32_t	errcode;
291 	int		errcode_valid;	/* push 'errcode' on the new stack */
292 	enum task_switch_reason reason;
293 	struct vm_guest_paging paging;
294 };
295 
296 enum vcpu_run_state {
297 	VRS_HALT		= 0,
298 	VRS_INIT		= (1 << 0),
299 	VRS_RUN			= (1 << 1),
300 
301 	VRS_PEND_INIT		= (1 << 14),
302 	VRS_PEND_SIPI		= (1 << 15),
303 };
304 #define VRS_MASK_VALID(v)	\
305 	((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
306 #define VRS_IS_VALID(v)		((v) == VRS_MASK_VALID(v))
307 
308 struct vm_exit {
309 	enum vm_exitcode	exitcode;
310 	int			inst_length;	/* 0 means unknown */
311 	uint64_t		rip;
312 	union {
313 		struct vm_inout	inout;
314 		struct vm_mmio	mmio;
315 		struct {
316 			uint64_t	gpa;
317 			int		fault_type;
318 		} paging;
319 		/*
320 		 * Kernel-internal MMIO decoding and emulation.
321 		 * Userspace should not expect to see this, but rather a
322 		 * VM_EXITCODE_MMIO with the above 'mmio' context.
323 		 */
324 		struct {
325 			uint64_t	gpa;
326 			uint64_t	gla;
327 			uint64_t	cs_base;
328 			int		cs_d;		/* CS.D */
329 		} mmio_emul;
330 		struct {
331 			uint8_t		inst[15];
332 			uint8_t		num_valid;
333 		} inst_emul;
334 		/*
335 		 * VMX specific payload. Used when there is no "better"
336 		 * exitcode to represent the VM-exit.
337 		 */
338 		struct {
339 			int		status;		/* vmx inst status */
340 			/*
341 			 * 'exit_reason' and 'exit_qualification' are valid
342 			 * only if 'status' is zero.
343 			 */
344 			uint32_t	exit_reason;
345 			uint64_t	exit_qualification;
346 			/*
347 			 * 'inst_error' and 'inst_type' are valid
348 			 * only if 'status' is non-zero.
349 			 */
350 			int		inst_type;
351 			int		inst_error;
352 		} vmx;
353 		/*
354 		 * SVM specific payload.
355 		 */
356 		struct {
357 			uint64_t	exitcode;
358 			uint64_t	exitinfo1;
359 			uint64_t	exitinfo2;
360 		} svm;
361 		struct {
362 			int		inst_length;
363 		} bpt;
364 		struct {
365 			uint32_t	code;		/* ecx value */
366 			uint64_t	wval;
367 		} msr;
368 		struct {
369 			uint64_t	rflags;
370 		} hlt;
371 		struct {
372 			int		vector;
373 		} ioapic_eoi;
374 		struct {
375 			enum vm_suspend_how how;
376 		} suspended;
377 		struct vm_task_switch task_switch;
378 	} u;
379 };
380 
381 enum vm_entry_cmds {
382 	VEC_DEFAULT = 0,
383 	VEC_DISCARD_INSTR,	/* discard inst emul state */
384 	VEC_FULFILL_MMIO,	/* entry includes result for mmio emul */
385 	VEC_FULFILL_INOUT,	/* entry includes result for inout emul */
386 };
387 
388 struct vm_entry {
389 	int cpuid;
390 	uint_t cmd;		/* see: vm_entry_cmds */
391 	void *exit_data;
392 	union {
393 		struct vm_inout inout;
394 		struct vm_mmio mmio;
395 	} u;
396 };
397 
398 int vm_restart_instruction(void *vm, int vcpuid);
399 
400 enum vm_create_flags {
401 	/*
402 	 * Allocate guest memory segments from existing reservoir capacity,
403 	 * rather than attempting to create transient allocations.
404 	 */
405 	VCF_RESERVOIR_MEM = (1 << 0),
406 };
407 
408 #endif	/* _VMM_H_ */
409