xref: /freebsd/sys/amd64/vmm/intel/vmcs.c (revision c1cdf6a42f0d951ba720688dfc6ce07608b02f6e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_ddb.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/sysctl.h>
38 #include <sys/systm.h>
39 #include <sys/pcpu.h>
40 
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 
44 #include <machine/segments.h>
45 #include <machine/vmm.h>
46 #include "vmm_host.h"
47 #include "vmx_cpufunc.h"
48 #include "vmcs.h"
49 #include "ept.h"
50 #include "vmx.h"
51 
52 #ifdef DDB
53 #include <ddb/ddb.h>
54 #endif
55 
56 SYSCTL_DECL(_hw_vmm_vmx);
57 
58 static int no_flush_rsb;
59 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
60     &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
61 
62 static uint64_t
63 vmcs_fix_regval(uint32_t encoding, uint64_t val)
64 {
65 
66 	switch (encoding) {
67 	case VMCS_GUEST_CR0:
68 		val = vmx_fix_cr0(val);
69 		break;
70 	case VMCS_GUEST_CR4:
71 		val = vmx_fix_cr4(val);
72 		break;
73 	default:
74 		break;
75 	}
76 	return (val);
77 }
78 
79 static uint32_t
80 vmcs_field_encoding(int ident)
81 {
82 	switch (ident) {
83 	case VM_REG_GUEST_CR0:
84 		return (VMCS_GUEST_CR0);
85 	case VM_REG_GUEST_CR3:
86 		return (VMCS_GUEST_CR3);
87 	case VM_REG_GUEST_CR4:
88 		return (VMCS_GUEST_CR4);
89 	case VM_REG_GUEST_DR7:
90 		return (VMCS_GUEST_DR7);
91 	case VM_REG_GUEST_RSP:
92 		return (VMCS_GUEST_RSP);
93 	case VM_REG_GUEST_RIP:
94 		return (VMCS_GUEST_RIP);
95 	case VM_REG_GUEST_RFLAGS:
96 		return (VMCS_GUEST_RFLAGS);
97 	case VM_REG_GUEST_ES:
98 		return (VMCS_GUEST_ES_SELECTOR);
99 	case VM_REG_GUEST_CS:
100 		return (VMCS_GUEST_CS_SELECTOR);
101 	case VM_REG_GUEST_SS:
102 		return (VMCS_GUEST_SS_SELECTOR);
103 	case VM_REG_GUEST_DS:
104 		return (VMCS_GUEST_DS_SELECTOR);
105 	case VM_REG_GUEST_FS:
106 		return (VMCS_GUEST_FS_SELECTOR);
107 	case VM_REG_GUEST_GS:
108 		return (VMCS_GUEST_GS_SELECTOR);
109 	case VM_REG_GUEST_TR:
110 		return (VMCS_GUEST_TR_SELECTOR);
111 	case VM_REG_GUEST_LDTR:
112 		return (VMCS_GUEST_LDTR_SELECTOR);
113 	case VM_REG_GUEST_EFER:
114 		return (VMCS_GUEST_IA32_EFER);
115 	case VM_REG_GUEST_PDPTE0:
116 		return (VMCS_GUEST_PDPTE0);
117 	case VM_REG_GUEST_PDPTE1:
118 		return (VMCS_GUEST_PDPTE1);
119 	case VM_REG_GUEST_PDPTE2:
120 		return (VMCS_GUEST_PDPTE2);
121 	case VM_REG_GUEST_PDPTE3:
122 		return (VMCS_GUEST_PDPTE3);
123 	default:
124 		return (-1);
125 	}
126 
127 }
128 
129 static int
130 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
131 {
132 
133 	switch (seg) {
134 	case VM_REG_GUEST_ES:
135 		*base = VMCS_GUEST_ES_BASE;
136 		*lim = VMCS_GUEST_ES_LIMIT;
137 		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
138 		break;
139 	case VM_REG_GUEST_CS:
140 		*base = VMCS_GUEST_CS_BASE;
141 		*lim = VMCS_GUEST_CS_LIMIT;
142 		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
143 		break;
144 	case VM_REG_GUEST_SS:
145 		*base = VMCS_GUEST_SS_BASE;
146 		*lim = VMCS_GUEST_SS_LIMIT;
147 		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
148 		break;
149 	case VM_REG_GUEST_DS:
150 		*base = VMCS_GUEST_DS_BASE;
151 		*lim = VMCS_GUEST_DS_LIMIT;
152 		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
153 		break;
154 	case VM_REG_GUEST_FS:
155 		*base = VMCS_GUEST_FS_BASE;
156 		*lim = VMCS_GUEST_FS_LIMIT;
157 		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
158 		break;
159 	case VM_REG_GUEST_GS:
160 		*base = VMCS_GUEST_GS_BASE;
161 		*lim = VMCS_GUEST_GS_LIMIT;
162 		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
163 		break;
164 	case VM_REG_GUEST_TR:
165 		*base = VMCS_GUEST_TR_BASE;
166 		*lim = VMCS_GUEST_TR_LIMIT;
167 		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
168 		break;
169 	case VM_REG_GUEST_LDTR:
170 		*base = VMCS_GUEST_LDTR_BASE;
171 		*lim = VMCS_GUEST_LDTR_LIMIT;
172 		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
173 		break;
174 	case VM_REG_GUEST_IDTR:
175 		*base = VMCS_GUEST_IDTR_BASE;
176 		*lim = VMCS_GUEST_IDTR_LIMIT;
177 		*acc = VMCS_INVALID_ENCODING;
178 		break;
179 	case VM_REG_GUEST_GDTR:
180 		*base = VMCS_GUEST_GDTR_BASE;
181 		*lim = VMCS_GUEST_GDTR_LIMIT;
182 		*acc = VMCS_INVALID_ENCODING;
183 		break;
184 	default:
185 		return (EINVAL);
186 	}
187 
188 	return (0);
189 }
190 
191 int
192 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
193 {
194 	int error;
195 	uint32_t encoding;
196 
197 	/*
198 	 * If we need to get at vmx-specific state in the VMCS we can bypass
199 	 * the translation of 'ident' to 'encoding' by simply setting the
200 	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
201 	 * set to 0) in the encodings for the VMCS so we are free to use the
202 	 * sign bit.
203 	 */
204 	if (ident < 0)
205 		encoding = ident & 0x7fffffff;
206 	else
207 		encoding = vmcs_field_encoding(ident);
208 
209 	if (encoding == (uint32_t)-1)
210 		return (EINVAL);
211 
212 	if (!running)
213 		VMPTRLD(vmcs);
214 
215 	error = vmread(encoding, retval);
216 
217 	if (!running)
218 		VMCLEAR(vmcs);
219 
220 	return (error);
221 }
222 
223 int
224 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
225 {
226 	int error;
227 	uint32_t encoding;
228 
229 	if (ident < 0)
230 		encoding = ident & 0x7fffffff;
231 	else
232 		encoding = vmcs_field_encoding(ident);
233 
234 	if (encoding == (uint32_t)-1)
235 		return (EINVAL);
236 
237 	val = vmcs_fix_regval(encoding, val);
238 
239 	if (!running)
240 		VMPTRLD(vmcs);
241 
242 	error = vmwrite(encoding, val);
243 
244 	if (!running)
245 		VMCLEAR(vmcs);
246 
247 	return (error);
248 }
249 
250 int
251 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
252 {
253 	int error;
254 	uint32_t base, limit, access;
255 
256 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
257 	if (error != 0)
258 		panic("vmcs_setdesc: invalid segment register %d", seg);
259 
260 	if (!running)
261 		VMPTRLD(vmcs);
262 	if ((error = vmwrite(base, desc->base)) != 0)
263 		goto done;
264 
265 	if ((error = vmwrite(limit, desc->limit)) != 0)
266 		goto done;
267 
268 	if (access != VMCS_INVALID_ENCODING) {
269 		if ((error = vmwrite(access, desc->access)) != 0)
270 			goto done;
271 	}
272 done:
273 	if (!running)
274 		VMCLEAR(vmcs);
275 	return (error);
276 }
277 
278 int
279 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
280 {
281 	int error;
282 	uint32_t base, limit, access;
283 	uint64_t u64;
284 
285 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
286 	if (error != 0)
287 		panic("vmcs_getdesc: invalid segment register %d", seg);
288 
289 	if (!running)
290 		VMPTRLD(vmcs);
291 	if ((error = vmread(base, &u64)) != 0)
292 		goto done;
293 	desc->base = u64;
294 
295 	if ((error = vmread(limit, &u64)) != 0)
296 		goto done;
297 	desc->limit = u64;
298 
299 	if (access != VMCS_INVALID_ENCODING) {
300 		if ((error = vmread(access, &u64)) != 0)
301 			goto done;
302 		desc->access = u64;
303 	}
304 done:
305 	if (!running)
306 		VMCLEAR(vmcs);
307 	return (error);
308 }
309 
310 int
311 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
312 {
313 	int error;
314 
315 	VMPTRLD(vmcs);
316 
317 	/*
318 	 * Guest MSRs are saved in the VM-exit MSR-store area.
319 	 * Guest MSRs are loaded from the VM-entry MSR-load area.
320 	 * Both areas point to the same location in memory.
321 	 */
322 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
323 		goto done;
324 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
325 		goto done;
326 
327 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
328 		goto done;
329 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
330 		goto done;
331 
332 	error = 0;
333 done:
334 	VMCLEAR(vmcs);
335 	return (error);
336 }
337 
338 int
339 vmcs_init(struct vmcs *vmcs)
340 {
341 	int error, codesel, datasel, tsssel;
342 	u_long cr0, cr4, efer;
343 	uint64_t pat, fsbase, idtrbase;
344 
345 	codesel = vmm_get_host_codesel();
346 	datasel = vmm_get_host_datasel();
347 	tsssel = vmm_get_host_tsssel();
348 
349 	/*
350 	 * Make sure we have a "current" VMCS to work with.
351 	 */
352 	VMPTRLD(vmcs);
353 
354 	/* Host state */
355 
356 	/* Initialize host IA32_PAT MSR */
357 	pat = vmm_get_host_pat();
358 	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
359 		goto done;
360 
361 	/* Load the IA32_EFER MSR */
362 	efer = vmm_get_host_efer();
363 	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
364 		goto done;
365 
366 	/* Load the control registers */
367 
368 	cr0 = vmm_get_host_cr0();
369 	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
370 		goto done;
371 
372 	cr4 = vmm_get_host_cr4() | CR4_VMXE;
373 	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
374 		goto done;
375 
376 	/* Load the segment selectors */
377 	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
378 		goto done;
379 
380 	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
381 		goto done;
382 
383 	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
384 		goto done;
385 
386 	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
387 		goto done;
388 
389 	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
390 		goto done;
391 
392 	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
393 		goto done;
394 
395 	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
396 		goto done;
397 
398 	/*
399 	 * Load the Base-Address for %fs and idtr.
400 	 *
401 	 * Note that we exclude %gs, tss and gdtr here because their base
402 	 * address is pcpu specific.
403 	 */
404 	fsbase = vmm_get_host_fsbase();
405 	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
406 		goto done;
407 
408 	idtrbase = vmm_get_host_idtrbase();
409 	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
410 		goto done;
411 
412 	/* instruction pointer */
413 	if (no_flush_rsb) {
414 		if ((error = vmwrite(VMCS_HOST_RIP,
415 		    (u_long)vmx_exit_guest)) != 0)
416 			goto done;
417 	} else {
418 		if ((error = vmwrite(VMCS_HOST_RIP,
419 		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
420 			goto done;
421 	}
422 
423 	/* link pointer */
424 	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
425 		goto done;
426 done:
427 	VMCLEAR(vmcs);
428 	return (error);
429 }
430 
431 #ifdef DDB
432 extern int vmxon_enabled[];
433 
434 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
435 {
436 	uint64_t cur_vmcs, val;
437 	uint32_t exit;
438 
439 	if (!vmxon_enabled[curcpu]) {
440 		db_printf("VMX not enabled\n");
441 		return;
442 	}
443 
444 	if (have_addr) {
445 		db_printf("Only current VMCS supported\n");
446 		return;
447 	}
448 
449 	vmptrst(&cur_vmcs);
450 	if (cur_vmcs == VMCS_INITIAL) {
451 		db_printf("No current VM context\n");
452 		return;
453 	}
454 	db_printf("VMCS: %jx\n", cur_vmcs);
455 	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
456 	db_printf("Activity: ");
457 	val = vmcs_read(VMCS_GUEST_ACTIVITY);
458 	switch (val) {
459 	case 0:
460 		db_printf("Active");
461 		break;
462 	case 1:
463 		db_printf("HLT");
464 		break;
465 	case 2:
466 		db_printf("Shutdown");
467 		break;
468 	case 3:
469 		db_printf("Wait for SIPI");
470 		break;
471 	default:
472 		db_printf("Unknown: %#lx", val);
473 	}
474 	db_printf("\n");
475 	exit = vmcs_read(VMCS_EXIT_REASON);
476 	if (exit & 0x80000000)
477 		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
478 	else
479 		db_printf("Exit Reason: %u\n", exit & 0xffff);
480 	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
481 	db_printf("Guest Linear Address: %#lx\n",
482 	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
483 	switch (exit & 0x8000ffff) {
484 	case EXIT_REASON_EXCEPTION:
485 	case EXIT_REASON_EXT_INTR:
486 		val = vmcs_read(VMCS_EXIT_INTR_INFO);
487 		db_printf("Interrupt Type: ");
488 		switch (val >> 8 & 0x7) {
489 		case 0:
490 			db_printf("external");
491 			break;
492 		case 2:
493 			db_printf("NMI");
494 			break;
495 		case 3:
496 			db_printf("HW exception");
497 			break;
498 		case 4:
499 			db_printf("SW exception");
500 			break;
501 		default:
502 			db_printf("?? %lu", val >> 8 & 0x7);
503 			break;
504 		}
505 		db_printf("  Vector: %lu", val & 0xff);
506 		if (val & 0x800)
507 			db_printf("  Error Code: %lx",
508 			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
509 		db_printf("\n");
510 		break;
511 	case EXIT_REASON_EPT_FAULT:
512 	case EXIT_REASON_EPT_MISCONFIG:
513 		db_printf("Guest Physical Address: %#lx\n",
514 		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
515 		break;
516 	}
517 	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
518 }
519 #endif
520