1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include "opt_bhyve_snapshot.h"
30 #include "opt_ddb.h"
31
32 #include <sys/param.h>
33 #include <sys/sysctl.h>
34 #include <sys/systm.h>
35 #include <sys/pcpu.h>
36
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <machine/segments.h>
41 #include <machine/vmm.h>
42 #include <machine/vmm_snapshot.h>
43 #include "vmm_host.h"
44 #include "vmx_cpufunc.h"
45 #include "vmcs.h"
46 #include "ept.h"
47 #include "vmx.h"
48
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52
53 SYSCTL_DECL(_hw_vmm_vmx);
54
55 static int no_flush_rsb;
56 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
57 &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
58
59 static uint64_t
vmcs_fix_regval(uint32_t encoding,uint64_t val)60 vmcs_fix_regval(uint32_t encoding, uint64_t val)
61 {
62
63 switch (encoding) {
64 case VMCS_GUEST_CR0:
65 val = vmx_fix_cr0(val);
66 break;
67 case VMCS_GUEST_CR4:
68 val = vmx_fix_cr4(val);
69 break;
70 default:
71 break;
72 }
73 return (val);
74 }
75
76 static uint32_t
vmcs_field_encoding(int ident)77 vmcs_field_encoding(int ident)
78 {
79 switch (ident) {
80 case VM_REG_GUEST_CR0:
81 return (VMCS_GUEST_CR0);
82 case VM_REG_GUEST_CR3:
83 return (VMCS_GUEST_CR3);
84 case VM_REG_GUEST_CR4:
85 return (VMCS_GUEST_CR4);
86 case VM_REG_GUEST_DR7:
87 return (VMCS_GUEST_DR7);
88 case VM_REG_GUEST_RSP:
89 return (VMCS_GUEST_RSP);
90 case VM_REG_GUEST_RIP:
91 return (VMCS_GUEST_RIP);
92 case VM_REG_GUEST_RFLAGS:
93 return (VMCS_GUEST_RFLAGS);
94 case VM_REG_GUEST_ES:
95 return (VMCS_GUEST_ES_SELECTOR);
96 case VM_REG_GUEST_CS:
97 return (VMCS_GUEST_CS_SELECTOR);
98 case VM_REG_GUEST_SS:
99 return (VMCS_GUEST_SS_SELECTOR);
100 case VM_REG_GUEST_DS:
101 return (VMCS_GUEST_DS_SELECTOR);
102 case VM_REG_GUEST_FS:
103 return (VMCS_GUEST_FS_SELECTOR);
104 case VM_REG_GUEST_GS:
105 return (VMCS_GUEST_GS_SELECTOR);
106 case VM_REG_GUEST_TR:
107 return (VMCS_GUEST_TR_SELECTOR);
108 case VM_REG_GUEST_LDTR:
109 return (VMCS_GUEST_LDTR_SELECTOR);
110 case VM_REG_GUEST_EFER:
111 return (VMCS_GUEST_IA32_EFER);
112 case VM_REG_GUEST_PDPTE0:
113 return (VMCS_GUEST_PDPTE0);
114 case VM_REG_GUEST_PDPTE1:
115 return (VMCS_GUEST_PDPTE1);
116 case VM_REG_GUEST_PDPTE2:
117 return (VMCS_GUEST_PDPTE2);
118 case VM_REG_GUEST_PDPTE3:
119 return (VMCS_GUEST_PDPTE3);
120 case VM_REG_GUEST_ENTRY_INST_LENGTH:
121 return (VMCS_ENTRY_INST_LENGTH);
122 case VM_REG_GUEST_FS_BASE:
123 return (VMCS_GUEST_FS_BASE);
124 case VM_REG_GUEST_GS_BASE:
125 return (VMCS_GUEST_GS_BASE);
126 default:
127 return (-1);
128 }
129 }
130
131 static int
vmcs_seg_desc_encoding(int seg,uint32_t * base,uint32_t * lim,uint32_t * acc)132 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
133 {
134
135 switch (seg) {
136 case VM_REG_GUEST_ES:
137 *base = VMCS_GUEST_ES_BASE;
138 *lim = VMCS_GUEST_ES_LIMIT;
139 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
140 break;
141 case VM_REG_GUEST_CS:
142 *base = VMCS_GUEST_CS_BASE;
143 *lim = VMCS_GUEST_CS_LIMIT;
144 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
145 break;
146 case VM_REG_GUEST_SS:
147 *base = VMCS_GUEST_SS_BASE;
148 *lim = VMCS_GUEST_SS_LIMIT;
149 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
150 break;
151 case VM_REG_GUEST_DS:
152 *base = VMCS_GUEST_DS_BASE;
153 *lim = VMCS_GUEST_DS_LIMIT;
154 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
155 break;
156 case VM_REG_GUEST_FS:
157 *base = VMCS_GUEST_FS_BASE;
158 *lim = VMCS_GUEST_FS_LIMIT;
159 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
160 break;
161 case VM_REG_GUEST_GS:
162 *base = VMCS_GUEST_GS_BASE;
163 *lim = VMCS_GUEST_GS_LIMIT;
164 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
165 break;
166 case VM_REG_GUEST_TR:
167 *base = VMCS_GUEST_TR_BASE;
168 *lim = VMCS_GUEST_TR_LIMIT;
169 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
170 break;
171 case VM_REG_GUEST_LDTR:
172 *base = VMCS_GUEST_LDTR_BASE;
173 *lim = VMCS_GUEST_LDTR_LIMIT;
174 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
175 break;
176 case VM_REG_GUEST_IDTR:
177 *base = VMCS_GUEST_IDTR_BASE;
178 *lim = VMCS_GUEST_IDTR_LIMIT;
179 *acc = VMCS_INVALID_ENCODING;
180 break;
181 case VM_REG_GUEST_GDTR:
182 *base = VMCS_GUEST_GDTR_BASE;
183 *lim = VMCS_GUEST_GDTR_LIMIT;
184 *acc = VMCS_INVALID_ENCODING;
185 break;
186 default:
187 return (EINVAL);
188 }
189
190 return (0);
191 }
192
193 int
vmcs_getreg(struct vmcs * vmcs,int running,int ident,uint64_t * retval)194 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
195 {
196 int error;
197 uint32_t encoding;
198
199 /*
200 * If we need to get at vmx-specific state in the VMCS we can bypass
201 * the translation of 'ident' to 'encoding' by simply setting the
202 * sign bit. As it so happens the upper 16 bits are reserved (i.e
203 * set to 0) in the encodings for the VMCS so we are free to use the
204 * sign bit.
205 */
206 if (ident < 0)
207 encoding = ident & 0x7fffffff;
208 else
209 encoding = vmcs_field_encoding(ident);
210
211 if (encoding == (uint32_t)-1)
212 return (EINVAL);
213
214 if (!running)
215 VMPTRLD(vmcs);
216
217 error = vmread(encoding, retval);
218
219 if (!running)
220 VMCLEAR(vmcs);
221
222 return (error);
223 }
224
225 int
vmcs_setreg(struct vmcs * vmcs,int running,int ident,uint64_t val)226 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
227 {
228 int error;
229 uint32_t encoding;
230
231 if (ident < 0)
232 encoding = ident & 0x7fffffff;
233 else
234 encoding = vmcs_field_encoding(ident);
235
236 if (encoding == (uint32_t)-1)
237 return (EINVAL);
238
239 val = vmcs_fix_regval(encoding, val);
240
241 if (!running)
242 VMPTRLD(vmcs);
243
244 error = vmwrite(encoding, val);
245
246 if (!running)
247 VMCLEAR(vmcs);
248
249 return (error);
250 }
251
252 int
vmcs_setdesc(struct vmcs * vmcs,int running,int seg,struct seg_desc * desc)253 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
254 {
255 int error;
256 uint32_t base, limit, access;
257
258 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
259 if (error != 0)
260 panic("vmcs_setdesc: invalid segment register %d", seg);
261
262 if (!running)
263 VMPTRLD(vmcs);
264 if ((error = vmwrite(base, desc->base)) != 0)
265 goto done;
266
267 if ((error = vmwrite(limit, desc->limit)) != 0)
268 goto done;
269
270 if (access != VMCS_INVALID_ENCODING) {
271 if ((error = vmwrite(access, desc->access)) != 0)
272 goto done;
273 }
274 done:
275 if (!running)
276 VMCLEAR(vmcs);
277 return (error);
278 }
279
280 int
vmcs_getdesc(struct vmcs * vmcs,int running,int seg,struct seg_desc * desc)281 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
282 {
283 int error;
284 uint32_t base, limit, access;
285 uint64_t u64;
286
287 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
288 if (error != 0)
289 panic("vmcs_getdesc: invalid segment register %d", seg);
290
291 if (!running)
292 VMPTRLD(vmcs);
293 if ((error = vmread(base, &u64)) != 0)
294 goto done;
295 desc->base = u64;
296
297 if ((error = vmread(limit, &u64)) != 0)
298 goto done;
299 desc->limit = u64;
300
301 if (access != VMCS_INVALID_ENCODING) {
302 if ((error = vmread(access, &u64)) != 0)
303 goto done;
304 desc->access = u64;
305 }
306 done:
307 if (!running)
308 VMCLEAR(vmcs);
309 return (error);
310 }
311
312 int
vmcs_set_msr_save(struct vmcs * vmcs,u_long g_area,u_int g_count)313 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
314 {
315 int error;
316
317 VMPTRLD(vmcs);
318
319 /*
320 * Guest MSRs are saved in the VM-exit MSR-store area.
321 * Guest MSRs are loaded from the VM-entry MSR-load area.
322 * Both areas point to the same location in memory.
323 */
324 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
325 goto done;
326 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
327 goto done;
328
329 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
330 goto done;
331 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
332 goto done;
333
334 error = 0;
335 done:
336 VMCLEAR(vmcs);
337 return (error);
338 }
339
340 int
vmcs_init(struct vmcs * vmcs)341 vmcs_init(struct vmcs *vmcs)
342 {
343 int error, codesel, datasel, tsssel;
344 u_long cr0, cr4, efer;
345 uint64_t pat, fsbase, idtrbase;
346
347 codesel = vmm_get_host_codesel();
348 datasel = vmm_get_host_datasel();
349 tsssel = vmm_get_host_tsssel();
350
351 /*
352 * Make sure we have a "current" VMCS to work with.
353 */
354 VMPTRLD(vmcs);
355
356 /* Host state */
357
358 /* Initialize host IA32_PAT MSR */
359 pat = vmm_get_host_pat();
360 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
361 goto done;
362
363 /* Load the IA32_EFER MSR */
364 efer = vmm_get_host_efer();
365 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
366 goto done;
367
368 /* Load the control registers */
369
370 cr0 = vmm_get_host_cr0();
371 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
372 goto done;
373
374 cr4 = vmm_get_host_cr4() | CR4_VMXE;
375 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
376 goto done;
377
378 /* Load the segment selectors */
379 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
380 goto done;
381
382 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
383 goto done;
384
385 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
386 goto done;
387
388 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
389 goto done;
390
391 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
392 goto done;
393
394 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
395 goto done;
396
397 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
398 goto done;
399
400 /*
401 * Load the Base-Address for %fs and idtr.
402 *
403 * Note that we exclude %gs, tss and gdtr here because their base
404 * address is pcpu specific.
405 */
406 fsbase = vmm_get_host_fsbase();
407 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
408 goto done;
409
410 idtrbase = vmm_get_host_idtrbase();
411 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
412 goto done;
413
414 /* instruction pointer */
415 if (no_flush_rsb) {
416 if ((error = vmwrite(VMCS_HOST_RIP,
417 (u_long)vmx_exit_guest)) != 0)
418 goto done;
419 } else {
420 if ((error = vmwrite(VMCS_HOST_RIP,
421 (u_long)vmx_exit_guest_flush_rsb)) != 0)
422 goto done;
423 }
424
425 /* link pointer */
426 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
427 goto done;
428 done:
429 VMCLEAR(vmcs);
430 return (error);
431 }
432
433 #ifdef BHYVE_SNAPSHOT
434 int
vmcs_getany(struct vmcs * vmcs,int running,int ident,uint64_t * val)435 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
436 {
437 int error;
438
439 if (!running)
440 VMPTRLD(vmcs);
441
442 error = vmread(ident, val);
443
444 if (!running)
445 VMCLEAR(vmcs);
446
447 return (error);
448 }
449
450 int
vmcs_setany(struct vmcs * vmcs,int running,int ident,uint64_t val)451 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
452 {
453 int error;
454
455 if (!running)
456 VMPTRLD(vmcs);
457
458 error = vmwrite(ident, val);
459
460 if (!running)
461 VMCLEAR(vmcs);
462
463 return (error);
464 }
465
466 int
vmcs_snapshot_reg(struct vmcs * vmcs,int running,int ident,struct vm_snapshot_meta * meta)467 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
468 struct vm_snapshot_meta *meta)
469 {
470 int ret;
471 uint64_t val;
472
473 if (meta->op == VM_SNAPSHOT_SAVE) {
474 ret = vmcs_getreg(vmcs, running, ident, &val);
475 if (ret != 0)
476 goto done;
477
478 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
479 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
480 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
481
482 ret = vmcs_setreg(vmcs, running, ident, val);
483 if (ret != 0)
484 goto done;
485 } else {
486 ret = EINVAL;
487 goto done;
488 }
489
490 done:
491 return (ret);
492 }
493
494 int
vmcs_snapshot_desc(struct vmcs * vmcs,int running,int seg,struct vm_snapshot_meta * meta)495 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
496 struct vm_snapshot_meta *meta)
497 {
498 int ret;
499 struct seg_desc desc;
500
501 if (meta->op == VM_SNAPSHOT_SAVE) {
502 ret = vmcs_getdesc(vmcs, running, seg, &desc);
503 if (ret != 0)
504 goto done;
505
506 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
507 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
508 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
509 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
510 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
511 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
512 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
513
514 ret = vmcs_setdesc(vmcs, running, seg, &desc);
515 if (ret != 0)
516 goto done;
517 } else {
518 ret = EINVAL;
519 goto done;
520 }
521
522 done:
523 return (ret);
524 }
525
526 int
vmcs_snapshot_any(struct vmcs * vmcs,int running,int ident,struct vm_snapshot_meta * meta)527 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
528 struct vm_snapshot_meta *meta)
529 {
530 int ret;
531 uint64_t val;
532
533 if (meta->op == VM_SNAPSHOT_SAVE) {
534 ret = vmcs_getany(vmcs, running, ident, &val);
535 if (ret != 0)
536 goto done;
537
538 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
539 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
540 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
541
542 ret = vmcs_setany(vmcs, running, ident, val);
543 if (ret != 0)
544 goto done;
545 } else {
546 ret = EINVAL;
547 goto done;
548 }
549
550 done:
551 return (ret);
552 }
553 #endif
554
555 #ifdef DDB
556 extern int vmxon_enabled[];
557
DB_SHOW_COMMAND(vmcs,db_show_vmcs)558 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
559 {
560 uint64_t cur_vmcs, val;
561 uint32_t exit;
562
563 if (!vmxon_enabled[curcpu]) {
564 db_printf("VMX not enabled\n");
565 return;
566 }
567
568 if (have_addr) {
569 db_printf("Only current VMCS supported\n");
570 return;
571 }
572
573 vmptrst(&cur_vmcs);
574 if (cur_vmcs == VMCS_INITIAL) {
575 db_printf("No current VM context\n");
576 return;
577 }
578 db_printf("VMCS: %jx\n", cur_vmcs);
579 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
580 db_printf("Activity: ");
581 val = vmcs_read(VMCS_GUEST_ACTIVITY);
582 switch (val) {
583 case 0:
584 db_printf("Active");
585 break;
586 case 1:
587 db_printf("HLT");
588 break;
589 case 2:
590 db_printf("Shutdown");
591 break;
592 case 3:
593 db_printf("Wait for SIPI");
594 break;
595 default:
596 db_printf("Unknown: %#lx", val);
597 }
598 db_printf("\n");
599 exit = vmcs_read(VMCS_EXIT_REASON);
600 if (exit & 0x80000000)
601 db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
602 else
603 db_printf("Exit Reason: %u\n", exit & 0xffff);
604 db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
605 db_printf("Guest Linear Address: %#lx\n",
606 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
607 switch (exit & 0x8000ffff) {
608 case EXIT_REASON_EXCEPTION:
609 case EXIT_REASON_EXT_INTR:
610 val = vmcs_read(VMCS_EXIT_INTR_INFO);
611 db_printf("Interrupt Type: ");
612 switch (val >> 8 & 0x7) {
613 case 0:
614 db_printf("external");
615 break;
616 case 2:
617 db_printf("NMI");
618 break;
619 case 3:
620 db_printf("HW exception");
621 break;
622 case 4:
623 db_printf("SW exception");
624 break;
625 default:
626 db_printf("?? %lu", val >> 8 & 0x7);
627 break;
628 }
629 db_printf(" Vector: %lu", val & 0xff);
630 if (val & 0x800)
631 db_printf(" Error Code: %lx",
632 vmcs_read(VMCS_EXIT_INTR_ERRCODE));
633 db_printf("\n");
634 break;
635 case EXIT_REASON_EPT_FAULT:
636 case EXIT_REASON_EPT_MISCONFIG:
637 db_printf("Guest Physical Address: %#lx\n",
638 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
639 break;
640 }
641 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
642 }
643 #endif
644