1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2022 Oxide Computer Company 14 */ 15 16 #include <stdio.h> 17 #include <unistd.h> 18 #include <stdlib.h> 19 #include <strings.h> 20 #include <assert.h> 21 #include <errno.h> 22 23 #include <sys/types.h> 24 #include <sys/segments.h> 25 #include <sys/psw.h> 26 #include <sys/controlregs.h> 27 #include <sys/sysmacros.h> 28 #include <sys/varargs.h> 29 #include <sys/debug.h> 30 31 #include <sys/vmm.h> 32 #include <sys/vmm_dev.h> 33 #include <vmmapi.h> 34 35 #include "in_guest.h" 36 37 38 #define PT_VALID 0x01 39 #define PT_WRITABLE 0x02 40 #define PT_WRITETHRU 0x08 41 #define PT_NOCACHE 0x10 42 #define PT_PAGESIZE 0x80 43 44 #define SEG_ACCESS_TYPE_MASK 0x1f 45 #define SEG_ACCESS_DPL_MASK 0x60 46 #define SEG_ACCESS_P (1 << 7) 47 #define SEG_ACCESS_AVL (1 << 12) 48 #define SEG_ACCESS_L (1 << 13) 49 #define SEG_ACCESS_D (1 << 14) 50 #define SEG_ACCESS_G (1 << 15) 51 #define SEG_ACCESS_UNUSABLE (1 << 16) 52 53 54 /* 55 * Keep the test name and VM context around so the consumer is not required to 56 * pass either of them to us for subsequent test-related operations after the 57 * initialization has been performed. 58 * 59 * The test code is not designed to be reentrant at this point. 60 */ 61 static struct vmctx *test_vmctx = NULL; 62 static const char *test_name = NULL; 63 64 static void 65 populate_identity_table(struct vmctx *ctx) 66 { 67 uint64_t gpa, pte_loc; 68 69 /* Set up 2MiB PTEs for everything up through 0xffffffff */ 70 for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M; 71 gpa < 0x100000000; 72 pte_loc += PAGE_SIZE) { 73 uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE); 74 75 for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) { 76 *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE; 77 /* Make traditional MMIO space uncachable */ 78 if (gpa >= 0xc0000000) { 79 *ptep |= PT_WRITETHRU | PT_NOCACHE; 80 } 81 } 82 } 83 assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G); 84 85 uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE); 86 pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE; 87 pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE; 88 pdep[2] = 89 (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 90 pdep[3] = 91 (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 92 93 pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE); 94 pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE; 95 } 96 97 static void 98 populate_desc_tables(struct vmctx *ctx) 99 { 100 101 } 102 103 static void 104 test_cleanup(bool is_failure) 105 { 106 if (test_vmctx != NULL) { 107 bool keep_on_fail = false; 108 109 const char *keep_var; 110 if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) { 111 if (strlen(keep_var) != 0 && 112 strcmp(keep_var, "0") != 0) { 113 keep_on_fail = true; 114 } 115 } 116 117 /* 118 * Destroy the instance unless the test failed and it was 119 * requested that we keep it around. 120 */ 121 if (!is_failure || !keep_on_fail) { 122 vm_destroy(test_vmctx); 123 } 124 test_vmctx = NULL; 125 } 126 } 127 128 static void fail_finish(void) 129 { 130 assert(test_name != NULL); 131 (void) printf("FAIL %s\n", test_name); 132 133 test_cleanup(true); 134 exit(EXIT_FAILURE); 135 } 136 137 void 138 test_fail_errno(int err, const char *msg) 139 { 140 const char *err_str = strerror(err); 141 142 (void) fprintf(stderr, "%s: %s\n", msg, err_str); 143 fail_finish(); 144 } 145 146 void 147 test_fail_msg(const char *fmt, ...) 148 { 149 va_list ap; 150 151 va_start(ap, fmt); 152 (void) vfprintf(stderr, fmt, ap); 153 154 fail_finish(); 155 } 156 157 void 158 test_fail_vmexit(const struct vm_exit *vexit) 159 { 160 const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n"; 161 162 switch (vexit->exitcode) { 163 case VM_EXITCODE_INOUT: 164 (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip); 165 (void) fprintf(stderr, 166 "\teax: %08x\n" 167 "\tport: %04x\n" 168 "\tbytes: %u\n" 169 "\tflags: %x\n", 170 vexit->u.inout.eax, 171 vexit->u.inout.port, 172 vexit->u.inout.bytes, 173 vexit->u.inout.flags); 174 break; 175 case VM_EXITCODE_RDMSR: 176 (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip); 177 (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code); 178 break; 179 case VM_EXITCODE_WRMSR: 180 (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip); 181 (void) fprintf(stderr, 182 "\tcode: %08x\n" 183 "\twval: %016lx\n", 184 vexit->u.msr.code, vexit->u.msr.wval); 185 break; 186 case VM_EXITCODE_MMIO: 187 (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); 188 (void) fprintf(stderr, 189 "\tbytes: %u\n" 190 "\ttype: %s\n" 191 "\tgpa: %x\n" 192 "\tdata: %016x\n", 193 vexit->u.mmio.bytes, 194 vexit->u.mmio.read == 0 ? "write" : "read", 195 vexit->u.mmio.gpa, 196 vexit->u.mmio.data); 197 break; 198 case VM_EXITCODE_VMX: 199 (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip); 200 (void) fprintf(stderr, 201 "\tstatus: %x\n" 202 "\treason: %x\n" 203 "\tqualification: %lx\n" 204 "\tinst_type: %x\n" 205 "\tinst_error: %x\n", 206 vexit->u.vmx.status, 207 vexit->u.vmx.exit_reason, 208 vexit->u.vmx.exit_qualification, 209 vexit->u.vmx.inst_type, 210 vexit->u.vmx.inst_error); 211 break; 212 case VM_EXITCODE_SVM: 213 (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip); 214 break; 215 case VM_EXITCODE_INST_EMUL: 216 (void) fprintf(stderr, hdr_fmt, "instruction emulation", 217 vexit->rip); 218 const uint_t len = vexit->u.inst_emul.num_valid > 0 ? 219 vexit->u.inst_emul.num_valid : 15; 220 (void) fprintf(stderr, "\tinstruction bytes: ["); 221 for (uint_t i = 0; i < len; i++) { 222 (void) fprintf(stderr, "%s%02x", 223 i == 0 ? "" : ", ", 224 vexit->u.inst_emul.inst[i]); 225 } 226 (void) fprintf(stderr, "]\n"); 227 break; 228 case VM_EXITCODE_SUSPENDED: 229 (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip); 230 switch (vexit->u.suspended.how) { 231 case VM_SUSPEND_RESET: 232 (void) fprintf(stderr, "\thow: reset"); 233 break; 234 case VM_SUSPEND_POWEROFF: 235 (void) fprintf(stderr, "\thow: poweroff"); 236 break; 237 case VM_SUSPEND_HALT: 238 (void) fprintf(stderr, "\thow: halt"); 239 break; 240 case VM_SUSPEND_TRIPLEFAULT: 241 (void) fprintf(stderr, "\thow: triple-fault"); 242 break; 243 default: 244 (void) fprintf(stderr, "\thow: unknown - %d", 245 vexit->u.suspended.how); 246 break; 247 } 248 break; 249 default: 250 (void) fprintf(stderr, "Unexpected code %d exit:\n" 251 "\t%%rip: %lx\n", vexit->exitcode, vexit->rip); 252 break; 253 } 254 fail_finish(); 255 } 256 257 void 258 test_pass(void) 259 { 260 assert(test_name != NULL); 261 (void) printf("PASS %s\n", test_name); 262 test_cleanup(false); 263 exit(EXIT_SUCCESS); 264 } 265 266 static int 267 load_payload(struct vmctx *ctx) 268 { 269 extern uint8_t payload_data; 270 extern uint32_t payload_size; 271 272 const uint32_t len = payload_size; 273 const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD); 274 275 if (len > cap) { 276 test_fail_msg("Payload size %u > capacity %u\n", len, cap); 277 } 278 279 const size_t map_len = P2ROUNDUP(len, PAGE_SIZE); 280 void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len); 281 bcopy(&payload_data, outp, len); 282 283 return (0); 284 } 285 286 struct vmctx * 287 test_initialize(const char *tname) 288 { 289 char vm_name[VM_MAX_NAMELEN]; 290 int err; 291 struct vmctx *ctx; 292 293 assert(test_vmctx == NULL); 294 assert(test_name == NULL); 295 296 test_name = strdup(tname); 297 (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", 298 test_name, getpid()); 299 300 err = vm_create(vm_name, 0); 301 if (err != 0) { 302 test_fail_errno(err, "Could not create VM"); 303 } 304 305 ctx = vm_open(vm_name); 306 if (ctx == NULL) { 307 test_fail_errno(errno, "Could not open VM"); 308 } 309 test_vmctx = ctx; 310 311 err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL); 312 if (err != 0) { 313 test_fail_errno(err, "Could not set up VM memory"); 314 } 315 316 populate_identity_table(ctx); 317 populate_desc_tables(ctx); 318 319 err = load_payload(ctx); 320 if (err != 0) { 321 test_fail_errno(err, "Could not load payload"); 322 } 323 324 return (ctx); 325 } 326 327 int 328 test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp) 329 { 330 int err; 331 332 err = vm_activate_cpu(ctx, vcpu); 333 if (err != 0 && err != EBUSY) { 334 return (err); 335 } 336 337 /* 338 * Granularity bit important here for VMX validity: 339 * "If any bit in the limit field in the range 31:20 is 1, G must be 1" 340 */ 341 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX, 342 SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G); 343 if (err != 0) { 344 return (err); 345 } 346 347 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX, 348 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L | 349 SEG_ACCESS_D | SEG_ACCESS_G); 350 if (err != 0) { 351 return (err); 352 } 353 354 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX, 355 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G); 356 if (err != 0) { 357 return (err); 358 } 359 360 /* 361 * While SVM will happilly run with an otherwise unusable TR, VMX 362 * includes it among its entry checks. 363 */ 364 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff, 365 SDT_SYSTSSBSY | SEG_ACCESS_P); 366 if (err != 0) { 367 return (err); 368 } 369 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0); 370 if (err != 0) { 371 return (err); 372 } 373 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0); 374 if (err != 0) { 375 return (err); 376 } 377 378 /* Mark unused segments as explicitly unusable (for VMX) */ 379 const int unsable_segs[] = { 380 VM_REG_GUEST_ES, 381 VM_REG_GUEST_FS, 382 VM_REG_GUEST_GS, 383 VM_REG_GUEST_LDTR, 384 }; 385 for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) { 386 err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0, 387 SEG_ACCESS_UNUSABLE); 388 if (err != 0) { 389 return (err); 390 } 391 } 392 393 /* Place CPU directly in long mode */ 394 const int regnums[] = { 395 VM_REG_GUEST_CR0, 396 VM_REG_GUEST_CR3, 397 VM_REG_GUEST_CR4, 398 VM_REG_GUEST_EFER, 399 VM_REG_GUEST_RFLAGS, 400 VM_REG_GUEST_RIP, 401 VM_REG_GUEST_RSP, 402 VM_REG_GUEST_CS, 403 VM_REG_GUEST_SS, 404 VM_REG_GUEST_DS, 405 VM_REG_GUEST_TR, 406 }; 407 uint64_t regvals[] = { 408 CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS | 409 CR0_MP | CR0_PE, 410 MEM_LOC_PAGE_TABLE_512G, 411 CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE, 412 AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE, 413 /* start with interrupts disabled */ 414 PS_MB1, 415 rip, 416 rsp, 417 (GDT_KCODE << 3), 418 (GDT_KDATA << 3), 419 (GDT_KDATA << 3), 420 (GDT_KTSS << 3), 421 }; 422 assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals)); 423 424 err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums, 425 regvals); 426 if (err != 0) { 427 return (err); 428 } 429 430 err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0); 431 if (err != 0) { 432 return (err); 433 } 434 435 return (0); 436 } 437 438 static enum vm_exit_kind 439 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) 440 { 441 const struct vm_inout *inout = &vexit->u.inout; 442 443 switch (vexit->exitcode) { 444 case VM_EXITCODE_BOGUS: 445 case VM_EXITCODE_REQIDLE: 446 bzero(ventry, sizeof (ventry)); 447 return (VEK_REENTR); 448 case VM_EXITCODE_INOUT: 449 if (inout->port == IOP_TEST_RESULT && 450 (inout->flags & INOUT_IN) == 0) { 451 if (inout->eax == TEST_RESULT_PASS) { 452 return (VEK_TEST_PASS); 453 } else { 454 return (VEK_TEST_FAIL); 455 } 456 } 457 break; 458 default: 459 break; 460 } 461 return (VEK_UNHANDLED); 462 } 463 464 enum vm_exit_kind 465 test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry, 466 struct vm_exit *vexit) 467 { 468 int err; 469 470 err = vm_run(ctx, vcpu, ventry, vexit); 471 if (err != 0) { 472 test_fail_errno(err, "Failure during vcpu entry"); 473 } 474 475 return (which_exit_kind(ventry, vexit)); 476 } 477 478 void 479 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry, 480 uint32_t data) 481 { 482 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT); 483 484 ventry->cmd = VEC_FULFILL_INOUT; 485 bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout)); 486 if ((ventry->u.inout.flags & INOUT_IN) != 0) { 487 ventry->u.inout.eax = data; 488 } 489 } 490 491 void 492 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry, 493 uint64_t data) 494 { 495 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO); 496 497 ventry->cmd = VEC_FULFILL_MMIO; 498 bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio)); 499 if (ventry->u.mmio.read != 0) { 500 ventry->u.mmio.data = data; 501 } 502 } 503 504 bool 505 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port, 506 uint_t len, uint32_t *valp) 507 { 508 if (vexit->exitcode != VM_EXITCODE_INOUT) { 509 return (false); 510 } 511 512 const uint_t flag = is_read ? INOUT_IN : 0; 513 if (vexit->u.inout.port != port || 514 vexit->u.inout.bytes != len || 515 (vexit->u.inout.flags & INOUT_IN) != flag) { 516 return (false); 517 } 518 519 if (!is_read && valp != NULL) { 520 *valp = vexit->u.inout.eax; 521 } 522 return (true); 523 } 524 525 bool 526 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr, 527 uint_t len, uint64_t *valp) 528 { 529 if (vexit->exitcode != VM_EXITCODE_MMIO) { 530 return (false); 531 } 532 533 if (vexit->u.mmio.gpa != addr || 534 vexit->u.mmio.bytes != len || 535 (vexit->u.mmio.read != 0) != is_read) { 536 return (false); 537 } 538 539 if (!is_read && valp != NULL) { 540 *valp = vexit->u.mmio.data; 541 } 542 return (true); 543 } 544