xref: /freebsd/sys/amd64/vmm/intel/vmcs.c (revision 6ef6ba9950260f42b47499d17874d00ca9290955)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ddb.h"
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/pcpu.h>
37 
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 
41 #include <machine/segments.h>
42 #include <machine/vmm.h>
43 #include "vmm_host.h"
44 #include "vmcs.h"
45 #include "vmx_cpufunc.h"
46 #include "ept.h"
47 #include "vmx.h"
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 static uint64_t
54 vmcs_fix_regval(uint32_t encoding, uint64_t val)
55 {
56 
57 	switch (encoding) {
58 	case VMCS_GUEST_CR0:
59 		val = vmx_fix_cr0(val);
60 		break;
61 	case VMCS_GUEST_CR4:
62 		val = vmx_fix_cr4(val);
63 		break;
64 	default:
65 		break;
66 	}
67 	return (val);
68 }
69 
70 static uint32_t
71 vmcs_field_encoding(int ident)
72 {
73 	switch (ident) {
74 	case VM_REG_GUEST_CR0:
75 		return (VMCS_GUEST_CR0);
76 	case VM_REG_GUEST_CR3:
77 		return (VMCS_GUEST_CR3);
78 	case VM_REG_GUEST_CR4:
79 		return (VMCS_GUEST_CR4);
80 	case VM_REG_GUEST_DR7:
81 		return (VMCS_GUEST_DR7);
82 	case VM_REG_GUEST_RSP:
83 		return (VMCS_GUEST_RSP);
84 	case VM_REG_GUEST_RIP:
85 		return (VMCS_GUEST_RIP);
86 	case VM_REG_GUEST_RFLAGS:
87 		return (VMCS_GUEST_RFLAGS);
88 	case VM_REG_GUEST_ES:
89 		return (VMCS_GUEST_ES_SELECTOR);
90 	case VM_REG_GUEST_CS:
91 		return (VMCS_GUEST_CS_SELECTOR);
92 	case VM_REG_GUEST_SS:
93 		return (VMCS_GUEST_SS_SELECTOR);
94 	case VM_REG_GUEST_DS:
95 		return (VMCS_GUEST_DS_SELECTOR);
96 	case VM_REG_GUEST_FS:
97 		return (VMCS_GUEST_FS_SELECTOR);
98 	case VM_REG_GUEST_GS:
99 		return (VMCS_GUEST_GS_SELECTOR);
100 	case VM_REG_GUEST_TR:
101 		return (VMCS_GUEST_TR_SELECTOR);
102 	case VM_REG_GUEST_LDTR:
103 		return (VMCS_GUEST_LDTR_SELECTOR);
104 	case VM_REG_GUEST_EFER:
105 		return (VMCS_GUEST_IA32_EFER);
106 	default:
107 		return (-1);
108 	}
109 
110 }
111 
112 static int
113 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
114 {
115 
116 	switch (seg) {
117 	case VM_REG_GUEST_ES:
118 		*base = VMCS_GUEST_ES_BASE;
119 		*lim = VMCS_GUEST_ES_LIMIT;
120 		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
121 		break;
122 	case VM_REG_GUEST_CS:
123 		*base = VMCS_GUEST_CS_BASE;
124 		*lim = VMCS_GUEST_CS_LIMIT;
125 		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
126 		break;
127 	case VM_REG_GUEST_SS:
128 		*base = VMCS_GUEST_SS_BASE;
129 		*lim = VMCS_GUEST_SS_LIMIT;
130 		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
131 		break;
132 	case VM_REG_GUEST_DS:
133 		*base = VMCS_GUEST_DS_BASE;
134 		*lim = VMCS_GUEST_DS_LIMIT;
135 		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
136 		break;
137 	case VM_REG_GUEST_FS:
138 		*base = VMCS_GUEST_FS_BASE;
139 		*lim = VMCS_GUEST_FS_LIMIT;
140 		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
141 		break;
142 	case VM_REG_GUEST_GS:
143 		*base = VMCS_GUEST_GS_BASE;
144 		*lim = VMCS_GUEST_GS_LIMIT;
145 		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
146 		break;
147 	case VM_REG_GUEST_TR:
148 		*base = VMCS_GUEST_TR_BASE;
149 		*lim = VMCS_GUEST_TR_LIMIT;
150 		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
151 		break;
152 	case VM_REG_GUEST_LDTR:
153 		*base = VMCS_GUEST_LDTR_BASE;
154 		*lim = VMCS_GUEST_LDTR_LIMIT;
155 		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
156 		break;
157 	case VM_REG_GUEST_IDTR:
158 		*base = VMCS_GUEST_IDTR_BASE;
159 		*lim = VMCS_GUEST_IDTR_LIMIT;
160 		*acc = VMCS_INVALID_ENCODING;
161 		break;
162 	case VM_REG_GUEST_GDTR:
163 		*base = VMCS_GUEST_GDTR_BASE;
164 		*lim = VMCS_GUEST_GDTR_LIMIT;
165 		*acc = VMCS_INVALID_ENCODING;
166 		break;
167 	default:
168 		return (EINVAL);
169 	}
170 
171 	return (0);
172 }
173 
174 int
175 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
176 {
177 	int error;
178 	uint32_t encoding;
179 
180 	/*
181 	 * If we need to get at vmx-specific state in the VMCS we can bypass
182 	 * the translation of 'ident' to 'encoding' by simply setting the
183 	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
184 	 * set to 0) in the encodings for the VMCS so we are free to use the
185 	 * sign bit.
186 	 */
187 	if (ident < 0)
188 		encoding = ident & 0x7fffffff;
189 	else
190 		encoding = vmcs_field_encoding(ident);
191 
192 	if (encoding == (uint32_t)-1)
193 		return (EINVAL);
194 
195 	if (!running)
196 		VMPTRLD(vmcs);
197 
198 	error = vmread(encoding, retval);
199 
200 	if (!running)
201 		VMCLEAR(vmcs);
202 
203 	return (error);
204 }
205 
206 int
207 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
208 {
209 	int error;
210 	uint32_t encoding;
211 
212 	if (ident < 0)
213 		encoding = ident & 0x7fffffff;
214 	else
215 		encoding = vmcs_field_encoding(ident);
216 
217 	if (encoding == (uint32_t)-1)
218 		return (EINVAL);
219 
220 	val = vmcs_fix_regval(encoding, val);
221 
222 	if (!running)
223 		VMPTRLD(vmcs);
224 
225 	error = vmwrite(encoding, val);
226 
227 	if (!running)
228 		VMCLEAR(vmcs);
229 
230 	return (error);
231 }
232 
233 int
234 vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
235 {
236 	int error;
237 	uint32_t base, limit, access;
238 
239 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
240 	if (error != 0)
241 		panic("vmcs_setdesc: invalid segment register %d", seg);
242 
243 	VMPTRLD(vmcs);
244 	if ((error = vmwrite(base, desc->base)) != 0)
245 		goto done;
246 
247 	if ((error = vmwrite(limit, desc->limit)) != 0)
248 		goto done;
249 
250 	if (access != VMCS_INVALID_ENCODING) {
251 		if ((error = vmwrite(access, desc->access)) != 0)
252 			goto done;
253 	}
254 done:
255 	VMCLEAR(vmcs);
256 	return (error);
257 }
258 
259 int
260 vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
261 {
262 	int error;
263 	uint32_t base, limit, access;
264 	uint64_t u64;
265 
266 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
267 	if (error != 0)
268 		panic("vmcs_getdesc: invalid segment register %d", seg);
269 
270 	VMPTRLD(vmcs);
271 	if ((error = vmread(base, &u64)) != 0)
272 		goto done;
273 	desc->base = u64;
274 
275 	if ((error = vmread(limit, &u64)) != 0)
276 		goto done;
277 	desc->limit = u64;
278 
279 	if (access != VMCS_INVALID_ENCODING) {
280 		if ((error = vmread(access, &u64)) != 0)
281 			goto done;
282 		desc->access = u64;
283 	}
284 done:
285 	VMCLEAR(vmcs);
286 	return (error);
287 }
288 
289 int
290 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
291 {
292 	int error;
293 
294 	VMPTRLD(vmcs);
295 
296 	/*
297 	 * Guest MSRs are saved in the VM-exit MSR-store area.
298 	 * Guest MSRs are loaded from the VM-entry MSR-load area.
299 	 * Both areas point to the same location in memory.
300 	 */
301 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
302 		goto done;
303 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
304 		goto done;
305 
306 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
307 		goto done;
308 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
309 		goto done;
310 
311 	error = 0;
312 done:
313 	VMCLEAR(vmcs);
314 	return (error);
315 }
316 
317 int
318 vmcs_set_defaults(struct vmcs *vmcs,
319 		  u_long host_rip, u_long host_rsp, uint64_t eptp,
320 		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
321 		  uint32_t procbased_ctls2, uint32_t exit_ctls,
322 		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
323 {
324 	int error, codesel, datasel, tsssel;
325 	u_long cr0, cr4, efer;
326 	uint64_t pat, fsbase, idtrbase;
327 	uint32_t exc_bitmap;
328 
329 	codesel = vmm_get_host_codesel();
330 	datasel = vmm_get_host_datasel();
331 	tsssel = vmm_get_host_tsssel();
332 
333 	/*
334 	 * Make sure we have a "current" VMCS to work with.
335 	 */
336 	VMPTRLD(vmcs);
337 
338 	/*
339 	 * Load the VMX controls
340 	 */
341 	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
342 		goto done;
343 	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
344 		goto done;
345 	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
346 		goto done;
347 	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
348 		goto done;
349 	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
350 		goto done;
351 
352 	/* Guest state */
353 
354 	/* Initialize guest IA32_PAT MSR with the default value */
355 	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
356 	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
357 	      PAT_VALUE(2, PAT_UNCACHED)	|
358 	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
359 	      PAT_VALUE(4, PAT_WRITE_BACK)	|
360 	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
361 	      PAT_VALUE(6, PAT_UNCACHED)	|
362 	      PAT_VALUE(7, PAT_UNCACHEABLE);
363 	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
364 		goto done;
365 
366 	/* Host state */
367 
368 	/* Initialize host IA32_PAT MSR */
369 	pat = vmm_get_host_pat();
370 	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
371 		goto done;
372 
373 	/* Load the IA32_EFER MSR */
374 	efer = vmm_get_host_efer();
375 	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
376 		goto done;
377 
378 	/* Load the control registers */
379 
380 	cr0 = vmm_get_host_cr0();
381 	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
382 		goto done;
383 
384 	cr4 = vmm_get_host_cr4() | CR4_VMXE;
385 	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
386 		goto done;
387 
388 	/* Load the segment selectors */
389 	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
390 		goto done;
391 
392 	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
393 		goto done;
394 
395 	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
396 		goto done;
397 
398 	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
399 		goto done;
400 
401 	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
402 		goto done;
403 
404 	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
405 		goto done;
406 
407 	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
408 		goto done;
409 
410 	/*
411 	 * Load the Base-Address for %fs and idtr.
412 	 *
413 	 * Note that we exclude %gs, tss and gdtr here because their base
414 	 * address is pcpu specific.
415 	 */
416 	fsbase = vmm_get_host_fsbase();
417 	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
418 		goto done;
419 
420 	idtrbase = vmm_get_host_idtrbase();
421 	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
422 		goto done;
423 
424 	/* instruction pointer */
425 	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
426 		goto done;
427 
428 	/* stack pointer */
429 	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
430 		goto done;
431 
432 	/* eptp */
433 	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
434 		goto done;
435 
436 	/* vpid */
437 	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
438 		goto done;
439 
440 	/* msr bitmap */
441 	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
442 		goto done;
443 
444 	/* exception bitmap */
445 	exc_bitmap = 1 << IDT_MC;
446 	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
447 		goto done;
448 
449 	/* link pointer */
450 	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
451 		goto done;
452 done:
453 	VMCLEAR(vmcs);
454 	return (error);
455 }
456 
457 uint64_t
458 vmcs_read(uint32_t encoding)
459 {
460 	int error;
461 	uint64_t val;
462 
463 	error = vmread(encoding, &val);
464 	if (error != 0)
465 		panic("vmcs_read(%u) error %d", encoding, error);
466 
467 	return (val);
468 }
469 
470 #ifdef DDB
471 extern int vmxon_enabled[];
472 
473 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
474 {
475 	uint64_t cur_vmcs, val;
476 	uint32_t exit;
477 
478 	if (!vmxon_enabled[curcpu]) {
479 		db_printf("VMX not enabled\n");
480 		return;
481 	}
482 
483 	if (have_addr) {
484 		db_printf("Only current VMCS supported\n");
485 		return;
486 	}
487 
488 	vmptrst(&cur_vmcs);
489 	if (cur_vmcs == VMCS_INITIAL) {
490 		db_printf("No current VM context\n");
491 		return;
492 	}
493 	db_printf("VMCS: %jx\n", cur_vmcs);
494 	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
495 	db_printf("Activity: ");
496 	val = vmcs_read(VMCS_GUEST_ACTIVITY);
497 	switch (val) {
498 	case 0:
499 		db_printf("Active");
500 		break;
501 	case 1:
502 		db_printf("HLT");
503 		break;
504 	case 2:
505 		db_printf("Shutdown");
506 		break;
507 	case 3:
508 		db_printf("Wait for SIPI");
509 		break;
510 	default:
511 		db_printf("Unknown: %#lx", val);
512 	}
513 	db_printf("\n");
514 	exit = vmcs_read(VMCS_EXIT_REASON);
515 	if (exit & 0x80000000)
516 		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
517 	else
518 		db_printf("Exit Reason: %u\n", exit & 0xffff);
519 	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
520 	db_printf("Guest Linear Address: %#lx\n",
521 	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
522 	switch (exit & 0x8000ffff) {
523 	case EXIT_REASON_EXCEPTION:
524 	case EXIT_REASON_EXT_INTR:
525 		val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
526 		db_printf("Interrupt Type: ");
527 		switch (val >> 8 & 0x7) {
528 		case 0:
529 			db_printf("external");
530 			break;
531 		case 2:
532 			db_printf("NMI");
533 			break;
534 		case 3:
535 			db_printf("HW exception");
536 			break;
537 		case 4:
538 			db_printf("SW exception");
539 			break;
540 		default:
541 			db_printf("?? %lu", val >> 8 & 0x7);
542 			break;
543 		}
544 		db_printf("  Vector: %lu", val & 0xff);
545 		if (val & 0x800)
546 			db_printf("  Error Code: %lx",
547 			    vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
548 		db_printf("\n");
549 		break;
550 	case EXIT_REASON_EPT_FAULT:
551 	case EXIT_REASON_EPT_MISCONFIG:
552 		db_printf("Guest Physical Address: %#lx\n",
553 		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
554 		break;
555 	}
556 	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
557 }
558 #endif
559