1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2023 Oxide Computer Company
14 */
15
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <strings.h>
20 #include <assert.h>
21 #include <errno.h>
22
23 #include <sys/types.h>
24 #include <sys/segments.h>
25 #include <sys/psw.h>
26 #include <sys/controlregs.h>
27 #include <sys/sysmacros.h>
28 #include <sys/varargs.h>
29 #include <sys/debug.h>
30 #include <sys/mman.h>
31
32 #include <sys/vmm.h>
33 #include <sys/vmm_dev.h>
34 #include <vmmapi.h>
35
36 #include "in_guest.h"
37
38
39 #define PT_VALID 0x01
40 #define PT_WRITABLE 0x02
41 #define PT_WRITETHRU 0x08
42 #define PT_NOCACHE 0x10
43 #define PT_PAGESIZE 0x80
44
45 #define SEG_ACCESS_TYPE_MASK 0x1f
46 #define SEG_ACCESS_DPL_MASK 0x60
47 #define SEG_ACCESS_P (1 << 7)
48 #define SEG_ACCESS_AVL (1 << 12)
49 #define SEG_ACCESS_L (1 << 13)
50 #define SEG_ACCESS_D (1 << 14)
51 #define SEG_ACCESS_G (1 << 15)
52 #define SEG_ACCESS_UNUSABLE (1 << 16)
53
54
55 /*
56 * Keep the test name and VM context around so the consumer is not required to
57 * pass either of them to us for subsequent test-related operations after the
58 * initialization has been performed.
59 *
60 * The test code is not designed to be reentrant at this point.
61 */
62 static struct vmctx *test_vmctx = NULL;
63 static const char *test_name = NULL;
64
65 static uint64_t test_msg_addr = 0;
66
67 static int
setup_rom(struct vmctx * ctx)68 setup_rom(struct vmctx *ctx)
69 {
70 const size_t seg_sz = 0x1000;
71 const uintptr_t seg_addr = MEM_LOC_ROM;
72 const int fd = vm_get_device_fd(ctx);
73 int err;
74
75 struct vm_memseg memseg = {
76 .segid = VM_BOOTROM,
77 .len = 0x1000,
78 };
79 (void) strlcpy(memseg.name, "testrom", sizeof (memseg.name));
80 err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg);
81 if (err != 0) {
82 return (err);
83 }
84 err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz,
85 PROT_READ | PROT_EXEC);
86 return (err);
87 }
88
89 static void
populate_identity_table(struct vmctx * ctx)90 populate_identity_table(struct vmctx *ctx)
91 {
92 uint64_t gpa, pte_loc;
93
94 /* Set up 2MiB PTEs for everything up through 0xffffffff */
95 for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
96 gpa < 0x100000000;
97 pte_loc += PAGE_SIZE) {
98 uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
99
100 for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
101 *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
102 /* Make traditional MMIO space uncachable */
103 if (gpa >= 0xc0000000) {
104 *ptep |= PT_WRITETHRU | PT_NOCACHE;
105 }
106 }
107 }
108 assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
109
110 uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
111 pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
112 pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
113 pdep[2] =
114 (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
115 pdep[3] =
116 (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
117
118 pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
119 pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
120 }
121
122 static void
populate_desc_tables(struct vmctx * ctx)123 populate_desc_tables(struct vmctx *ctx)
124 {
125
126 }
127
128 void
test_cleanup(bool is_failure)129 test_cleanup(bool is_failure)
130 {
131 if (test_vmctx != NULL) {
132 bool keep_on_fail = false;
133
134 const char *keep_var;
135 if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
136 if (strlen(keep_var) != 0 &&
137 strcmp(keep_var, "0") != 0) {
138 keep_on_fail = true;
139 }
140 }
141
142 /*
143 * Destroy the instance unless the test failed and it was
144 * requested that we keep it around.
145 */
146 if (!is_failure || !keep_on_fail) {
147 vm_destroy(test_vmctx);
148 }
149 test_name = NULL;
150 test_vmctx = NULL;
151 }
152 }
153
fail_finish(void)154 static void fail_finish(void)
155 {
156 assert(test_name != NULL);
157 (void) printf("FAIL %s\n", test_name);
158
159 test_cleanup(true);
160 exit(EXIT_FAILURE);
161 }
162
163 void
test_fail(void)164 test_fail(void)
165 {
166 fail_finish();
167 }
168
169 void
test_fail_errno(int err,const char * msg)170 test_fail_errno(int err, const char *msg)
171 {
172 const char *err_str = strerror(err);
173
174 (void) fprintf(stderr, "%s: %s\n", msg, err_str);
175 fail_finish();
176 }
177
178 void
test_fail_msg(const char * fmt,...)179 test_fail_msg(const char *fmt, ...)
180 {
181 va_list ap;
182
183 va_start(ap, fmt);
184 (void) vfprintf(stderr, fmt, ap);
185
186 fail_finish();
187 }
188
189 void
test_fail_vmexit(const struct vm_exit * vexit)190 test_fail_vmexit(const struct vm_exit *vexit)
191 {
192 const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
193
194 switch (vexit->exitcode) {
195 case VM_EXITCODE_INOUT:
196 (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
197 (void) fprintf(stderr,
198 "\teax: %08x\n"
199 "\tport: %04x\n"
200 "\tbytes: %u\n"
201 "\tflags: %x\n",
202 vexit->u.inout.eax,
203 vexit->u.inout.port,
204 vexit->u.inout.bytes,
205 vexit->u.inout.flags);
206 break;
207 case VM_EXITCODE_RDMSR:
208 (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip);
209 (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code);
210 break;
211 case VM_EXITCODE_WRMSR:
212 (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip);
213 (void) fprintf(stderr,
214 "\tcode: %08x\n"
215 "\twval: %016lx\n",
216 vexit->u.msr.code, vexit->u.msr.wval);
217 break;
218 case VM_EXITCODE_MMIO:
219 (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
220 (void) fprintf(stderr,
221 "\tbytes: %u\n"
222 "\ttype: %s\n"
223 "\tgpa: %x\n"
224 "\tdata: %016x\n",
225 vexit->u.mmio.bytes,
226 vexit->u.mmio.read == 0 ? "write" : "read",
227 vexit->u.mmio.gpa,
228 vexit->u.mmio.data);
229 break;
230 case VM_EXITCODE_VMX:
231 (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
232 (void) fprintf(stderr,
233 "\tstatus: %x\n"
234 "\treason: %x\n"
235 "\tqualification: %lx\n"
236 "\tinst_type: %x\n"
237 "\tinst_error: %x\n",
238 vexit->u.vmx.status,
239 vexit->u.vmx.exit_reason,
240 vexit->u.vmx.exit_qualification,
241 vexit->u.vmx.inst_type,
242 vexit->u.vmx.inst_error);
243 break;
244 case VM_EXITCODE_SVM:
245 (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
246 break;
247 case VM_EXITCODE_INST_EMUL:
248 (void) fprintf(stderr, hdr_fmt, "instruction emulation",
249 vexit->rip);
250 const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
251 vexit->u.inst_emul.num_valid : 15;
252 (void) fprintf(stderr, "\tinstruction bytes: [");
253 for (uint_t i = 0; i < len; i++) {
254 (void) fprintf(stderr, "%s%02x",
255 i == 0 ? "" : ", ",
256 vexit->u.inst_emul.inst[i]);
257 }
258 (void) fprintf(stderr, "]\n");
259 break;
260 case VM_EXITCODE_SUSPENDED:
261 (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
262 switch (vexit->u.suspended.how) {
263 case VM_SUSPEND_RESET:
264 (void) fprintf(stderr, "\thow: reset");
265 break;
266 case VM_SUSPEND_POWEROFF:
267 (void) fprintf(stderr, "\thow: poweroff");
268 break;
269 case VM_SUSPEND_HALT:
270 (void) fprintf(stderr, "\thow: halt");
271 break;
272 case VM_SUSPEND_TRIPLEFAULT:
273 (void) fprintf(stderr, "\thow: triple-fault");
274 break;
275 default:
276 (void) fprintf(stderr, "\thow: unknown - %d",
277 vexit->u.suspended.how);
278 break;
279 }
280 break;
281 default:
282 (void) fprintf(stderr, "Unexpected code %d exit:\n"
283 "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
284 break;
285 }
286 fail_finish();
287 }
288
289 void
test_pass(void)290 test_pass(void)
291 {
292 assert(test_name != NULL);
293 (void) printf("PASS %s\n", test_name);
294 test_cleanup(false);
295 exit(EXIT_SUCCESS);
296 }
297
298 const char *
test_msg_get(struct vmctx * ctx)299 test_msg_get(struct vmctx *ctx)
300 {
301 /* Disregard if the message address is still NULL */
302 const uint64_t msg_addr = test_msg_addr;
303 if (msg_addr == 0) {
304 return (NULL);
305 }
306
307 /*
308 * We want to try to map up to one page after the specified message
309 * address, keeping in mind the end of lowmem. (The payload, and
310 * thus message, is assumed to be in lowmem at this time.)
311 */
312 const uint64_t lowmem_end = vm_get_lowmem_size(ctx);
313 const uint64_t msg_map_end = MIN(msg_addr + PAGE_SIZE, lowmem_end);
314
315 if (msg_map_end >= lowmem_end || msg_map_end <= msg_addr) {
316 return (NULL);
317 }
318 const uint64_t max_msg_len = msg_map_end - msg_addr;
319
320 /*
321 * Get the mapping to that guest memory. This assumes that the payload
322 * has provided a guest-physical address to us.
323 */
324 const char *result = vm_map_gpa(ctx, msg_addr, max_msg_len);
325 if (result == NULL) {
326 return (NULL);
327 }
328
329 /* Demand a NUL-terminated string shorter than the map limit */
330 if (strnlen(result, max_msg_len) >= max_msg_len) {
331 return (NULL);
332 }
333
334 return (result);
335 }
336
337 void
test_msg_print(struct vmctx * ctx)338 test_msg_print(struct vmctx *ctx)
339 {
340 const char *payload_msg = test_msg_get(ctx);
341
342 if (payload_msg != NULL) {
343 (void) fprintf(stderr, "MSG: %s\n", payload_msg);
344 }
345 }
346
347 static int
load_payload(struct vmctx * ctx)348 load_payload(struct vmctx *ctx)
349 {
350 extern uint8_t payload_data;
351 extern uint32_t payload_size;
352
353 const uint32_t len = payload_size;
354 const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
355
356 if (len > cap) {
357 test_fail_msg("Payload size %u > capacity %u\n", len, cap);
358 }
359
360 const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
361 void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
362 bcopy(&payload_data, outp, len);
363
364 return (0);
365 }
366
367 struct vmctx *
test_initialize(const char * tname)368 test_initialize(const char *tname)
369 {
370 return (test_initialize_flags(tname, 0));
371 }
372
373 struct vmctx *
test_initialize_flags(const char * tname,uint64_t create_flags)374 test_initialize_flags(const char *tname, uint64_t create_flags)
375 {
376 char vm_name[VM_MAX_NAMELEN];
377 int err;
378 struct vmctx *ctx;
379
380 assert(test_vmctx == NULL);
381 assert(test_name == NULL);
382
383 test_name = strdup(tname);
384 (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
385 test_name, getpid());
386
387 err = vm_create(vm_name, create_flags);
388 if (err != 0) {
389 test_fail_errno(err, "Could not create VM");
390 }
391
392 ctx = vm_open(vm_name);
393 if (ctx == NULL) {
394 test_fail_errno(errno, "Could not open VM");
395 }
396 test_vmctx = ctx;
397
398 err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
399 if (err != 0) {
400 test_fail_errno(err, "Could not set up VM memory");
401 }
402
403 err = setup_rom(ctx);
404 if (err != 0) {
405 test_fail_errno(err, "Could not set up VM ROM segment");
406 }
407
408 populate_identity_table(ctx);
409 populate_desc_tables(ctx);
410
411 err = load_payload(ctx);
412 if (err != 0) {
413 test_fail_errno(err, "Could not load payload");
414 }
415
416 return (ctx);
417 }
418
419 void
test_reinitialize(struct vmctx * ctx,uint64_t flags)420 test_reinitialize(struct vmctx *ctx, uint64_t flags)
421 {
422 int err;
423
424 if ((err = vm_reinit(ctx, flags)) != 0) {
425 test_fail_errno(err, "Could not reinit VM");
426 }
427
428 /* Reload tables and payload in case they were altered */
429
430 populate_identity_table(ctx);
431 populate_desc_tables(ctx);
432
433 err = load_payload(ctx);
434 if (err != 0) {
435 test_fail_errno(err, "Could not load payload");
436 }
437 }
438
439 int
test_setup_vcpu(struct vcpu * vcpu,uint64_t rip,uint64_t rsp)440 test_setup_vcpu(struct vcpu *vcpu, uint64_t rip, uint64_t rsp)
441 {
442 int err;
443
444 err = vm_activate_cpu(vcpu);
445 if (err != 0 && err != EBUSY) {
446 return (err);
447 }
448
449 /*
450 * Granularity bit important here for VMX validity:
451 * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
452 */
453 err = vm_set_desc(vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
454 SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
455 if (err != 0) {
456 return (err);
457 }
458
459 err = vm_set_desc(vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
460 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
461 SEG_ACCESS_D | SEG_ACCESS_G);
462 if (err != 0) {
463 return (err);
464 }
465
466 err = vm_set_desc(vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
467 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
468 if (err != 0) {
469 return (err);
470 }
471
472 /*
473 * While SVM will happilly run with an otherwise unusable TR, VMX
474 * includes it among its entry checks.
475 */
476 err = vm_set_desc(vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
477 SDT_SYSTSSBSY | SEG_ACCESS_P);
478 if (err != 0) {
479 return (err);
480 }
481 err = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
482 if (err != 0) {
483 return (err);
484 }
485 err = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
486 if (err != 0) {
487 return (err);
488 }
489
490 /* Mark unused segments as explicitly unusable (for VMX) */
491 const int unsable_segs[] = {
492 VM_REG_GUEST_ES,
493 VM_REG_GUEST_FS,
494 VM_REG_GUEST_GS,
495 VM_REG_GUEST_LDTR,
496 };
497 for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
498 err = vm_set_desc(vcpu, unsable_segs[i], 0, 0,
499 SEG_ACCESS_UNUSABLE);
500 if (err != 0) {
501 return (err);
502 }
503 }
504
505 /* Place CPU directly in long mode */
506 const int regnums[] = {
507 VM_REG_GUEST_CR0,
508 VM_REG_GUEST_CR3,
509 VM_REG_GUEST_CR4,
510 VM_REG_GUEST_EFER,
511 VM_REG_GUEST_RFLAGS,
512 VM_REG_GUEST_RIP,
513 VM_REG_GUEST_RSP,
514 VM_REG_GUEST_CS,
515 VM_REG_GUEST_SS,
516 VM_REG_GUEST_DS,
517 VM_REG_GUEST_TR,
518 };
519 uint64_t regvals[] = {
520 CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
521 CR0_MP | CR0_PE,
522 MEM_LOC_PAGE_TABLE_512G,
523 CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
524 AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
525 /* start with interrupts disabled */
526 PS_MB1,
527 rip,
528 rsp,
529 (GDT_KCODE << 3),
530 (GDT_KDATA << 3),
531 (GDT_KDATA << 3),
532 (GDT_KTSS << 3),
533 };
534 assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
535
536 err = vm_set_register_set(vcpu, ARRAY_SIZE(regnums), regnums,
537 regvals);
538 if (err != 0) {
539 return (err);
540 }
541
542 err = vm_set_run_state(vcpu, VRS_RUN, 0);
543 if (err != 0) {
544 return (err);
545 }
546
547 return (0);
548 }
549
550 static enum vm_exit_kind
which_exit_kind(struct vm_entry * ventry,const struct vm_exit * vexit)551 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
552 {
553 const struct vm_inout *inout = &vexit->u.inout;
554
555 switch (vexit->exitcode) {
556 case VM_EXITCODE_BOGUS:
557 bzero(ventry, sizeof (ventry));
558 return (VEK_REENTR);
559 case VM_EXITCODE_INOUT:
560 if (inout->port == IOP_TEST_RESULT &&
561 (inout->flags & INOUT_IN) == 0) {
562 if (inout->eax == TEST_RESULT_PASS) {
563 return (VEK_TEST_PASS);
564 } else {
565 return (VEK_TEST_FAIL);
566 }
567 }
568 if (inout->port == IOP_TEST_MSG &&
569 (inout->flags & INOUT_IN) == 0 &&
570 inout->bytes == 4) {
571 test_msg_addr = inout->eax;
572 ventry_fulfill_inout(vexit, ventry, 0);
573 return (VEK_TEST_MSG);
574 }
575 break;
576 default:
577 break;
578 }
579 return (VEK_UNHANDLED);
580 }
581
582 enum vm_exit_kind
test_run_vcpu(struct vcpu * vcpu,struct vm_entry * ventry,struct vm_exit * vexit)583 test_run_vcpu(struct vcpu *vcpu, struct vm_entry *ventry, struct vm_exit *vexit)
584 {
585 int err;
586
587 err = vm_run(vcpu, ventry, vexit);
588 if (err != 0) {
589 test_fail_errno(err, "Failure during vcpu entry");
590 }
591
592 return (which_exit_kind(ventry, vexit));
593 }
594
595 void
ventry_fulfill_inout(const struct vm_exit * vexit,struct vm_entry * ventry,uint32_t data)596 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
597 uint32_t data)
598 {
599 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
600
601 ventry->cmd = VEC_FULFILL_INOUT;
602 bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
603 if ((ventry->u.inout.flags & INOUT_IN) != 0) {
604 ventry->u.inout.eax = data;
605 }
606 }
607
608 void
ventry_fulfill_mmio(const struct vm_exit * vexit,struct vm_entry * ventry,uint64_t data)609 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
610 uint64_t data)
611 {
612 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
613
614 ventry->cmd = VEC_FULFILL_MMIO;
615 bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
616 if (ventry->u.mmio.read != 0) {
617 ventry->u.mmio.data = data;
618 }
619 }
620
621 bool
vexit_match_inout(const struct vm_exit * vexit,bool is_read,uint16_t port,uint_t len,uint32_t * valp)622 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
623 uint_t len, uint32_t *valp)
624 {
625 if (vexit->exitcode != VM_EXITCODE_INOUT) {
626 return (false);
627 }
628
629 const uint_t flag = is_read ? INOUT_IN : 0;
630 if (vexit->u.inout.port != port ||
631 vexit->u.inout.bytes != len ||
632 (vexit->u.inout.flags & INOUT_IN) != flag) {
633 return (false);
634 }
635
636 if (!is_read && valp != NULL) {
637 *valp = vexit->u.inout.eax;
638 }
639 return (true);
640 }
641
642 bool
vexit_match_mmio(const struct vm_exit * vexit,bool is_read,uint64_t addr,uint_t len,uint64_t * valp)643 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
644 uint_t len, uint64_t *valp)
645 {
646 if (vexit->exitcode != VM_EXITCODE_MMIO) {
647 return (false);
648 }
649
650 if (vexit->u.mmio.gpa != addr ||
651 vexit->u.mmio.bytes != len ||
652 (vexit->u.mmio.read != 0) != is_read) {
653 return (false);
654 }
655
656 if (!is_read && valp != NULL) {
657 *valp = vexit->u.mmio.data;
658 }
659 return (true);
660 }
661