xref: /freebsd/sys/amd64/vmm/intel/vmcs.c (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include "opt_bhyve_snapshot.h"
32 #include "opt_ddb.h"
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/sysctl.h>
39 #include <sys/systm.h>
40 #include <sys/pcpu.h>
41 
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44 
45 #include <machine/segments.h>
46 #include <machine/vmm.h>
47 #include <machine/vmm_snapshot.h>
48 #include "vmm_host.h"
49 #include "vmx_cpufunc.h"
50 #include "vmcs.h"
51 #include "ept.h"
52 #include "vmx.h"
53 
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57 
58 SYSCTL_DECL(_hw_vmm_vmx);
59 
60 static int no_flush_rsb;
61 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
62     &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
63 
64 static uint64_t
65 vmcs_fix_regval(uint32_t encoding, uint64_t val)
66 {
67 
68 	switch (encoding) {
69 	case VMCS_GUEST_CR0:
70 		val = vmx_fix_cr0(val);
71 		break;
72 	case VMCS_GUEST_CR4:
73 		val = vmx_fix_cr4(val);
74 		break;
75 	default:
76 		break;
77 	}
78 	return (val);
79 }
80 
81 static uint32_t
82 vmcs_field_encoding(int ident)
83 {
84 	switch (ident) {
85 	case VM_REG_GUEST_CR0:
86 		return (VMCS_GUEST_CR0);
87 	case VM_REG_GUEST_CR3:
88 		return (VMCS_GUEST_CR3);
89 	case VM_REG_GUEST_CR4:
90 		return (VMCS_GUEST_CR4);
91 	case VM_REG_GUEST_DR7:
92 		return (VMCS_GUEST_DR7);
93 	case VM_REG_GUEST_RSP:
94 		return (VMCS_GUEST_RSP);
95 	case VM_REG_GUEST_RIP:
96 		return (VMCS_GUEST_RIP);
97 	case VM_REG_GUEST_RFLAGS:
98 		return (VMCS_GUEST_RFLAGS);
99 	case VM_REG_GUEST_ES:
100 		return (VMCS_GUEST_ES_SELECTOR);
101 	case VM_REG_GUEST_CS:
102 		return (VMCS_GUEST_CS_SELECTOR);
103 	case VM_REG_GUEST_SS:
104 		return (VMCS_GUEST_SS_SELECTOR);
105 	case VM_REG_GUEST_DS:
106 		return (VMCS_GUEST_DS_SELECTOR);
107 	case VM_REG_GUEST_FS:
108 		return (VMCS_GUEST_FS_SELECTOR);
109 	case VM_REG_GUEST_GS:
110 		return (VMCS_GUEST_GS_SELECTOR);
111 	case VM_REG_GUEST_TR:
112 		return (VMCS_GUEST_TR_SELECTOR);
113 	case VM_REG_GUEST_LDTR:
114 		return (VMCS_GUEST_LDTR_SELECTOR);
115 	case VM_REG_GUEST_EFER:
116 		return (VMCS_GUEST_IA32_EFER);
117 	case VM_REG_GUEST_PDPTE0:
118 		return (VMCS_GUEST_PDPTE0);
119 	case VM_REG_GUEST_PDPTE1:
120 		return (VMCS_GUEST_PDPTE1);
121 	case VM_REG_GUEST_PDPTE2:
122 		return (VMCS_GUEST_PDPTE2);
123 	case VM_REG_GUEST_PDPTE3:
124 		return (VMCS_GUEST_PDPTE3);
125 	case VM_REG_GUEST_ENTRY_INST_LENGTH:
126 		return (VMCS_ENTRY_INST_LENGTH);
127 	default:
128 		return (-1);
129 	}
130 
131 }
132 
133 static int
134 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
135 {
136 
137 	switch (seg) {
138 	case VM_REG_GUEST_ES:
139 		*base = VMCS_GUEST_ES_BASE;
140 		*lim = VMCS_GUEST_ES_LIMIT;
141 		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
142 		break;
143 	case VM_REG_GUEST_CS:
144 		*base = VMCS_GUEST_CS_BASE;
145 		*lim = VMCS_GUEST_CS_LIMIT;
146 		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
147 		break;
148 	case VM_REG_GUEST_SS:
149 		*base = VMCS_GUEST_SS_BASE;
150 		*lim = VMCS_GUEST_SS_LIMIT;
151 		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
152 		break;
153 	case VM_REG_GUEST_DS:
154 		*base = VMCS_GUEST_DS_BASE;
155 		*lim = VMCS_GUEST_DS_LIMIT;
156 		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
157 		break;
158 	case VM_REG_GUEST_FS:
159 		*base = VMCS_GUEST_FS_BASE;
160 		*lim = VMCS_GUEST_FS_LIMIT;
161 		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
162 		break;
163 	case VM_REG_GUEST_GS:
164 		*base = VMCS_GUEST_GS_BASE;
165 		*lim = VMCS_GUEST_GS_LIMIT;
166 		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
167 		break;
168 	case VM_REG_GUEST_TR:
169 		*base = VMCS_GUEST_TR_BASE;
170 		*lim = VMCS_GUEST_TR_LIMIT;
171 		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
172 		break;
173 	case VM_REG_GUEST_LDTR:
174 		*base = VMCS_GUEST_LDTR_BASE;
175 		*lim = VMCS_GUEST_LDTR_LIMIT;
176 		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
177 		break;
178 	case VM_REG_GUEST_IDTR:
179 		*base = VMCS_GUEST_IDTR_BASE;
180 		*lim = VMCS_GUEST_IDTR_LIMIT;
181 		*acc = VMCS_INVALID_ENCODING;
182 		break;
183 	case VM_REG_GUEST_GDTR:
184 		*base = VMCS_GUEST_GDTR_BASE;
185 		*lim = VMCS_GUEST_GDTR_LIMIT;
186 		*acc = VMCS_INVALID_ENCODING;
187 		break;
188 	default:
189 		return (EINVAL);
190 	}
191 
192 	return (0);
193 }
194 
195 int
196 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
197 {
198 	int error;
199 	uint32_t encoding;
200 
201 	/*
202 	 * If we need to get at vmx-specific state in the VMCS we can bypass
203 	 * the translation of 'ident' to 'encoding' by simply setting the
204 	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
205 	 * set to 0) in the encodings for the VMCS so we are free to use the
206 	 * sign bit.
207 	 */
208 	if (ident < 0)
209 		encoding = ident & 0x7fffffff;
210 	else
211 		encoding = vmcs_field_encoding(ident);
212 
213 	if (encoding == (uint32_t)-1)
214 		return (EINVAL);
215 
216 	if (!running)
217 		VMPTRLD(vmcs);
218 
219 	error = vmread(encoding, retval);
220 
221 	if (!running)
222 		VMCLEAR(vmcs);
223 
224 	return (error);
225 }
226 
227 int
228 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
229 {
230 	int error;
231 	uint32_t encoding;
232 
233 	if (ident < 0)
234 		encoding = ident & 0x7fffffff;
235 	else
236 		encoding = vmcs_field_encoding(ident);
237 
238 	if (encoding == (uint32_t)-1)
239 		return (EINVAL);
240 
241 	val = vmcs_fix_regval(encoding, val);
242 
243 	if (!running)
244 		VMPTRLD(vmcs);
245 
246 	error = vmwrite(encoding, val);
247 
248 	if (!running)
249 		VMCLEAR(vmcs);
250 
251 	return (error);
252 }
253 
254 int
255 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
256 {
257 	int error;
258 	uint32_t base, limit, access;
259 
260 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
261 	if (error != 0)
262 		panic("vmcs_setdesc: invalid segment register %d", seg);
263 
264 	if (!running)
265 		VMPTRLD(vmcs);
266 	if ((error = vmwrite(base, desc->base)) != 0)
267 		goto done;
268 
269 	if ((error = vmwrite(limit, desc->limit)) != 0)
270 		goto done;
271 
272 	if (access != VMCS_INVALID_ENCODING) {
273 		if ((error = vmwrite(access, desc->access)) != 0)
274 			goto done;
275 	}
276 done:
277 	if (!running)
278 		VMCLEAR(vmcs);
279 	return (error);
280 }
281 
282 int
283 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
284 {
285 	int error;
286 	uint32_t base, limit, access;
287 	uint64_t u64;
288 
289 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
290 	if (error != 0)
291 		panic("vmcs_getdesc: invalid segment register %d", seg);
292 
293 	if (!running)
294 		VMPTRLD(vmcs);
295 	if ((error = vmread(base, &u64)) != 0)
296 		goto done;
297 	desc->base = u64;
298 
299 	if ((error = vmread(limit, &u64)) != 0)
300 		goto done;
301 	desc->limit = u64;
302 
303 	if (access != VMCS_INVALID_ENCODING) {
304 		if ((error = vmread(access, &u64)) != 0)
305 			goto done;
306 		desc->access = u64;
307 	}
308 done:
309 	if (!running)
310 		VMCLEAR(vmcs);
311 	return (error);
312 }
313 
314 int
315 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
316 {
317 	int error;
318 
319 	VMPTRLD(vmcs);
320 
321 	/*
322 	 * Guest MSRs are saved in the VM-exit MSR-store area.
323 	 * Guest MSRs are loaded from the VM-entry MSR-load area.
324 	 * Both areas point to the same location in memory.
325 	 */
326 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
327 		goto done;
328 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
329 		goto done;
330 
331 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
332 		goto done;
333 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
334 		goto done;
335 
336 	error = 0;
337 done:
338 	VMCLEAR(vmcs);
339 	return (error);
340 }
341 
342 int
343 vmcs_init(struct vmcs *vmcs)
344 {
345 	int error, codesel, datasel, tsssel;
346 	u_long cr0, cr4, efer;
347 	uint64_t pat, fsbase, idtrbase;
348 
349 	codesel = vmm_get_host_codesel();
350 	datasel = vmm_get_host_datasel();
351 	tsssel = vmm_get_host_tsssel();
352 
353 	/*
354 	 * Make sure we have a "current" VMCS to work with.
355 	 */
356 	VMPTRLD(vmcs);
357 
358 	/* Host state */
359 
360 	/* Initialize host IA32_PAT MSR */
361 	pat = vmm_get_host_pat();
362 	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
363 		goto done;
364 
365 	/* Load the IA32_EFER MSR */
366 	efer = vmm_get_host_efer();
367 	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
368 		goto done;
369 
370 	/* Load the control registers */
371 
372 	cr0 = vmm_get_host_cr0();
373 	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
374 		goto done;
375 
376 	cr4 = vmm_get_host_cr4() | CR4_VMXE;
377 	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
378 		goto done;
379 
380 	/* Load the segment selectors */
381 	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
382 		goto done;
383 
384 	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
385 		goto done;
386 
387 	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
388 		goto done;
389 
390 	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
391 		goto done;
392 
393 	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
394 		goto done;
395 
396 	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
397 		goto done;
398 
399 	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
400 		goto done;
401 
402 	/*
403 	 * Load the Base-Address for %fs and idtr.
404 	 *
405 	 * Note that we exclude %gs, tss and gdtr here because their base
406 	 * address is pcpu specific.
407 	 */
408 	fsbase = vmm_get_host_fsbase();
409 	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
410 		goto done;
411 
412 	idtrbase = vmm_get_host_idtrbase();
413 	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
414 		goto done;
415 
416 	/* instruction pointer */
417 	if (no_flush_rsb) {
418 		if ((error = vmwrite(VMCS_HOST_RIP,
419 		    (u_long)vmx_exit_guest)) != 0)
420 			goto done;
421 	} else {
422 		if ((error = vmwrite(VMCS_HOST_RIP,
423 		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
424 			goto done;
425 	}
426 
427 	/* link pointer */
428 	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
429 		goto done;
430 done:
431 	VMCLEAR(vmcs);
432 	return (error);
433 }
434 
435 #ifdef BHYVE_SNAPSHOT
436 int
437 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
438 {
439 	int error;
440 
441 	if (!running)
442 		VMPTRLD(vmcs);
443 
444 	error = vmread(ident, val);
445 
446 	if (!running)
447 		VMCLEAR(vmcs);
448 
449 	return (error);
450 }
451 
452 int
453 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
454 {
455 	int error;
456 
457 	if (!running)
458 		VMPTRLD(vmcs);
459 
460 	error = vmwrite(ident, val);
461 
462 	if (!running)
463 		VMCLEAR(vmcs);
464 
465 	return (error);
466 }
467 
468 int
469 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
470 		  struct vm_snapshot_meta *meta)
471 {
472 	int ret;
473 	uint64_t val;
474 
475 	if (meta->op == VM_SNAPSHOT_SAVE) {
476 		ret = vmcs_getreg(vmcs, running, ident, &val);
477 		if (ret != 0)
478 			goto done;
479 
480 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
481 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
482 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
483 
484 		ret = vmcs_setreg(vmcs, running, ident, val);
485 		if (ret != 0)
486 			goto done;
487 	} else {
488 		ret = EINVAL;
489 		goto done;
490 	}
491 
492 done:
493 	return (ret);
494 }
495 
496 int
497 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
498 		   struct vm_snapshot_meta *meta)
499 {
500 	int ret;
501 	struct seg_desc desc;
502 
503 	if (meta->op == VM_SNAPSHOT_SAVE) {
504 		ret = vmcs_getdesc(vmcs, running, seg, &desc);
505 		if (ret != 0)
506 			goto done;
507 
508 		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
509 		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
510 		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
511 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
512 		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
513 		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
514 		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
515 
516 		ret = vmcs_setdesc(vmcs, running, seg, &desc);
517 		if (ret != 0)
518 			goto done;
519 	} else {
520 		ret = EINVAL;
521 		goto done;
522 	}
523 
524 done:
525 	return (ret);
526 }
527 
528 int
529 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
530 		  struct vm_snapshot_meta *meta)
531 {
532 	int ret;
533 	uint64_t val;
534 
535 	if (meta->op == VM_SNAPSHOT_SAVE) {
536 		ret = vmcs_getany(vmcs, running, ident, &val);
537 		if (ret != 0)
538 			goto done;
539 
540 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
541 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
542 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
543 
544 		ret = vmcs_setany(vmcs, running, ident, val);
545 		if (ret != 0)
546 			goto done;
547 	} else {
548 		ret = EINVAL;
549 		goto done;
550 	}
551 
552 done:
553 	return (ret);
554 }
555 #endif
556 
557 #ifdef DDB
558 extern int vmxon_enabled[];
559 
560 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
561 {
562 	uint64_t cur_vmcs, val;
563 	uint32_t exit;
564 
565 	if (!vmxon_enabled[curcpu]) {
566 		db_printf("VMX not enabled\n");
567 		return;
568 	}
569 
570 	if (have_addr) {
571 		db_printf("Only current VMCS supported\n");
572 		return;
573 	}
574 
575 	vmptrst(&cur_vmcs);
576 	if (cur_vmcs == VMCS_INITIAL) {
577 		db_printf("No current VM context\n");
578 		return;
579 	}
580 	db_printf("VMCS: %jx\n", cur_vmcs);
581 	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
582 	db_printf("Activity: ");
583 	val = vmcs_read(VMCS_GUEST_ACTIVITY);
584 	switch (val) {
585 	case 0:
586 		db_printf("Active");
587 		break;
588 	case 1:
589 		db_printf("HLT");
590 		break;
591 	case 2:
592 		db_printf("Shutdown");
593 		break;
594 	case 3:
595 		db_printf("Wait for SIPI");
596 		break;
597 	default:
598 		db_printf("Unknown: %#lx", val);
599 	}
600 	db_printf("\n");
601 	exit = vmcs_read(VMCS_EXIT_REASON);
602 	if (exit & 0x80000000)
603 		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
604 	else
605 		db_printf("Exit Reason: %u\n", exit & 0xffff);
606 	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
607 	db_printf("Guest Linear Address: %#lx\n",
608 	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
609 	switch (exit & 0x8000ffff) {
610 	case EXIT_REASON_EXCEPTION:
611 	case EXIT_REASON_EXT_INTR:
612 		val = vmcs_read(VMCS_EXIT_INTR_INFO);
613 		db_printf("Interrupt Type: ");
614 		switch (val >> 8 & 0x7) {
615 		case 0:
616 			db_printf("external");
617 			break;
618 		case 2:
619 			db_printf("NMI");
620 			break;
621 		case 3:
622 			db_printf("HW exception");
623 			break;
624 		case 4:
625 			db_printf("SW exception");
626 			break;
627 		default:
628 			db_printf("?? %lu", val >> 8 & 0x7);
629 			break;
630 		}
631 		db_printf("  Vector: %lu", val & 0xff);
632 		if (val & 0x800)
633 			db_printf("  Error Code: %lx",
634 			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
635 		db_printf("\n");
636 		break;
637 	case EXIT_REASON_EPT_FAULT:
638 	case EXIT_REASON_EPT_MISCONFIG:
639 		db_printf("Guest Physical Address: %#lx\n",
640 		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
641 		break;
642 	}
643 	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
644 }
645 #endif
646