1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2020 Oxide Computer Company 15 */ 16 17 /* 18 * Library for native code to access bhyve VMs, without the need to use 19 * FreeBSD compat headers 20 */ 21 22 #include <sys/param.h> 23 #include <sys/list.h> 24 #include <sys/stddef.h> 25 #include <sys/mman.h> 26 #include <sys/kdi_regs.h> 27 #include <sys/sysmacros.h> 28 #include <sys/controlregs.h> 29 #include <sys/note.h> 30 #include <sys/debug.h> 31 #include <errno.h> 32 #include <stdlib.h> 33 #include <strings.h> 34 #include <unistd.h> 35 #include <assert.h> 36 37 #include <machine/vmm.h> 38 #include <vmmapi.h> 39 40 #include <libvmm.h> 41 42 typedef struct vmm_memseg vmm_memseg_t; 43 44 #define VMM_MEMSEG_DEVMEM 0x1 45 46 struct vmm_memseg { 47 list_node_t vms_list; 48 int vms_segid; 49 int vms_prot; 50 int vms_flags; 51 uintptr_t vms_gpa; 52 off_t vms_segoff; 53 size_t vms_seglen; 54 size_t vms_maplen; 55 char vms_name[64]; 56 }; 57 58 struct vmm { 59 struct vmctx *vmm_ctx; 60 list_t vmm_memlist; 61 char *vmm_mem; 62 size_t vmm_memsize; 63 size_t vmm_ncpu; 64 }; 65 66 67 /* 68 * This code relies on two assumptions: 69 * - CPUs are never removed from the "active set", not even when suspended. 70 * A CPU being active just means that it has been used by the guest OS. 71 * - The CPU numbering is consecutive. 72 */ 73 static void 74 vmm_update_ncpu(vmm_t *vmm) 75 { 76 cpuset_t cpuset; 77 78 assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0); 79 80 for (vmm->vmm_ncpu = 0; 81 CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1; 82 vmm->vmm_ncpu++) 83 ; 84 } 85 86 vmm_t * 87 vmm_open_vm(const char *name) 88 { 89 vmm_t *vmm = NULL; 90 91 vmm = malloc(sizeof (vmm_t)); 92 if (vmm == NULL) 93 return (NULL); 94 95 bzero(vmm, sizeof (vmm_t)); 96 vmm->vmm_mem = MAP_FAILED; 97 98 list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t), 99 offsetof(vmm_memseg_t, vms_list)); 100 101 vmm->vmm_ctx = vm_open(name); 102 if (vmm->vmm_ctx == NULL) { 103 free(vmm); 104 return (NULL); 105 } 106 107 vmm_update_ncpu(vmm); 108 109 /* 110 * If we open a VM that has just been created we may see a state 111 * where it has no CPUs configured yet. We'll just wait for 10ms 112 * and retry until we get a non-zero CPU count. 113 */ 114 if (vmm->vmm_ncpu == 0) { 115 do { 116 (void) usleep(10000); 117 vmm_update_ncpu(vmm); 118 } while (vmm->vmm_ncpu == 0); 119 } 120 121 return (vmm); 122 } 123 124 void 125 vmm_close_vm(vmm_t *vmm) 126 { 127 vmm_unmap(vmm); 128 129 list_destroy(&vmm->vmm_memlist); 130 131 if (vmm->vmm_ctx != NULL) 132 vm_close(vmm->vmm_ctx); 133 134 free(vmm); 135 } 136 137 static vmm_memseg_t * 138 vmm_get_memseg(vmm_t *vmm, uintptr_t gpa) 139 { 140 vmm_memseg_t ms, *ret; 141 int error, flags; 142 143 bzero(&ms, sizeof (vmm_memseg_t)); 144 ms.vms_gpa = gpa; 145 error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid, 146 &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags); 147 if (error) 148 return (NULL); 149 150 error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen, 151 ms.vms_name, sizeof (ms.vms_name)); 152 if (error) 153 return (NULL); 154 155 /* 156 * Regular memory segments don't have a name, but devmem segments do. 157 * We can use that information to set the DEVMEM flag if necessary. 158 */ 159 ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0; 160 161 ret = malloc(sizeof (vmm_memseg_t)); 162 if (ret == NULL) 163 return (NULL); 164 165 *ret = ms; 166 167 return (ret); 168 } 169 170 int 171 vmm_map(vmm_t *vmm, boolean_t writable) 172 { 173 uintptr_t last_gpa = 0; 174 vmm_memseg_t *ms; 175 int prot_write = writable ? PROT_WRITE : 0; 176 177 if (vmm->vmm_mem != MAP_FAILED) { 178 errno = EINVAL; 179 return (-1); 180 } 181 182 assert(list_is_empty(&vmm->vmm_memlist)); 183 184 for (;;) { 185 ms = vmm_get_memseg(vmm, last_gpa); 186 187 if (ms == NULL) 188 break; 189 190 last_gpa = ms->vms_gpa + ms->vms_maplen; 191 list_insert_tail(&vmm->vmm_memlist, ms); 192 } 193 194 vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE, 195 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0); 196 197 if (vmm->vmm_mem == MAP_FAILED) 198 goto fail; 199 200 for (ms = list_head(&vmm->vmm_memlist); 201 ms != NULL; 202 ms = list_next(&vmm->vmm_memlist, ms)) { 203 off_t mapoff; 204 205 if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) == 0) { 206 /* 207 * sysmem segments will be located at an offset 208 * equivalent to their GPA. 209 */ 210 mapoff = ms->vms_gpa; 211 } else { 212 /* 213 * devmem segments are located in a special region away 214 * from the normal GPA space. 215 */ 216 if (vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid, 217 &mapoff) != 0) { 218 goto fail; 219 } 220 } 221 222 /* 223 * While 'mapoff' points to the front of the segment, the actual 224 * mapping may be at some offset beyond that. 225 */ 226 VERIFY(ms->vms_segoff >= 0); 227 mapoff += ms->vms_segoff; 228 229 vmm->vmm_memsize += ms->vms_maplen; 230 231 if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen, 232 PROT_READ | prot_write, MAP_SHARED | MAP_FIXED, 233 vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED) 234 goto fail; 235 } 236 237 return (0); 238 239 fail: 240 vmm_unmap(vmm); 241 242 return (-1); 243 } 244 245 void 246 vmm_unmap(vmm_t *vmm) 247 { 248 while (!list_is_empty(&vmm->vmm_memlist)) { 249 vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist); 250 251 if (vmm->vmm_mem != MAP_FAILED) { 252 (void) munmap(vmm->vmm_mem + ms->vms_gpa, 253 ms->vms_maplen); 254 } 255 256 free(ms); 257 } 258 259 if (vmm->vmm_mem != MAP_FAILED) 260 (void) munmap(vmm->vmm_mem, vmm->vmm_memsize); 261 262 vmm->vmm_mem = MAP_FAILED; 263 vmm->vmm_memsize = 0; 264 } 265 266 ssize_t 267 vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr) 268 { 269 ssize_t count = 0; 270 vmm_memseg_t *ms; 271 ssize_t res = len; 272 273 for (ms = list_head(&vmm->vmm_memlist); 274 ms != NULL && len != 0; 275 ms = list_next(&vmm->vmm_memlist, ms)) { 276 277 if (addr >= ms->vms_gpa && 278 addr < ms->vms_gpa + ms->vms_maplen) { 279 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 280 281 if (res < 0) 282 res = 0; 283 284 bcopy(vmm->vmm_mem + addr, buf, len - res); 285 count += len - res; 286 addr += len - res; 287 len = res; 288 } 289 } 290 291 if (res) 292 errno = EFAULT; 293 else 294 errno = 0; 295 296 return (count); 297 } 298 299 ssize_t 300 vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr) 301 { 302 ssize_t count = 0; 303 vmm_memseg_t *ms; 304 ssize_t res = len; 305 306 for (ms = list_head(&vmm->vmm_memlist); 307 ms != NULL; 308 ms = list_next(&vmm->vmm_memlist, ms)) { 309 if (addr >= ms->vms_gpa && 310 addr < ms->vms_gpa + ms->vms_maplen) { 311 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 312 313 if (res < 0) 314 res = 0; 315 316 bcopy(buf, vmm->vmm_mem + addr, len - res); 317 count += len - res; 318 addr += len - res; 319 len = res; 320 } 321 } 322 323 if (res) 324 errno = EFAULT; 325 else 326 errno = 0; 327 328 return (count); 329 } 330 331 size_t 332 vmm_ncpu(vmm_t *vmm) 333 { 334 return (vmm->vmm_ncpu); 335 } 336 337 size_t 338 vmm_memsize(vmm_t *vmm) 339 { 340 return (vmm->vmm_memsize); 341 } 342 343 int 344 vmm_cont(vmm_t *vmm) 345 { 346 return (vm_resume_cpu(vmm->vmm_ctx, -1)); 347 } 348 349 int 350 vmm_step(vmm_t *vmm, int vcpu) 351 { 352 cpuset_t cpuset; 353 int ret; 354 355 if (vcpu >= vmm->vmm_ncpu) { 356 errno = EINVAL; 357 return (-1); 358 } 359 360 ret = vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); 361 if (ret != 0) 362 return (-1); 363 364 assert(vm_resume_cpu(vmm->vmm_ctx, vcpu) == 0); 365 366 do { 367 (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset); 368 } while (!CPU_ISSET(vcpu, &cpuset)); 369 370 (void) vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); 371 372 return (ret); 373 } 374 375 int 376 vmm_stop(vmm_t *vmm) 377 { 378 int ret = vm_suspend_cpu(vmm->vmm_ctx, -1); 379 380 if (ret == 0) 381 vmm_update_ncpu(vmm); 382 383 return (ret); 384 } 385 386 /* 387 * Mapping of KDI-defined registers to vmmapi-defined registers. 388 * Registers not known to vmmapi use VM_REG_LAST, which is invalid and 389 * causes an error in vm_{get,set}_register_set(). 390 * 391 * This array must be kept in sync with the definitions in kdi_regs.h. 392 */ 393 static int vmm_kdi_regmap[] = { 394 VM_REG_LAST, /* KDIREG_SAVFP */ 395 VM_REG_LAST, /* KDIREG_SAVPC */ 396 VM_REG_GUEST_RDI, /* KDIREG_RDI */ 397 VM_REG_GUEST_RSI, /* KDIREG_RSI */ 398 VM_REG_GUEST_RDX, /* KDIREG_RDX */ 399 VM_REG_GUEST_RCX, /* KDIREG_RCX */ 400 VM_REG_GUEST_R8, /* KDIREG_R8 */ 401 VM_REG_GUEST_R9, /* KDIREG_R9 */ 402 VM_REG_GUEST_RAX, /* KDIREG_RAX */ 403 VM_REG_GUEST_RBX, /* KDIREG_RBX */ 404 VM_REG_GUEST_RBP, /* KDIREG_RBP */ 405 VM_REG_GUEST_R10, /* KDIREG_R10 */ 406 VM_REG_GUEST_R11, /* KDIREG_R11 */ 407 VM_REG_GUEST_R12, /* KDIREG_R12 */ 408 VM_REG_GUEST_R13, /* KDIREG_R13 */ 409 VM_REG_GUEST_R14, /* KDIREG_R14 */ 410 VM_REG_GUEST_R15, /* KDIREG_R15 */ 411 VM_REG_LAST, /* KDIREG_FSBASE */ 412 VM_REG_LAST, /* KDIREG_GSBASE */ 413 VM_REG_LAST, /* KDIREG_KGSBASE */ 414 VM_REG_GUEST_CR2, /* KDIREG_CR2 */ 415 VM_REG_GUEST_CR3, /* KDIREG_CR3 */ 416 VM_REG_GUEST_DS, /* KDIREG_DS */ 417 VM_REG_GUEST_ES, /* KDIREG_ES */ 418 VM_REG_GUEST_FS, /* KDIREG_FS */ 419 VM_REG_GUEST_GS, /* KDIREG_GS */ 420 VM_REG_LAST, /* KDIREG_TRAPNO */ 421 VM_REG_LAST, /* KDIREG_ERR */ 422 VM_REG_GUEST_RIP, /* KDIREG_RIP */ 423 VM_REG_GUEST_CS, /* KDIREG_CS */ 424 VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */ 425 VM_REG_GUEST_RSP, /* KDIREG_RSP */ 426 VM_REG_GUEST_SS /* KDIREG_SS */ 427 }; 428 CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG); 429 430 /* 431 * Mapping of libvmm-defined registers to vmmapi-defined registers. 432 * 433 * This array must be kept in sync with the definitions in libvmm.h 434 */ 435 static int vmm_sys_regmap[] = { 436 VM_REG_GUEST_CR0, /* VMM_REG_CR0 */ 437 VM_REG_GUEST_CR2, /* VMM_REG_CR2 */ 438 VM_REG_GUEST_CR3, /* VMM_REG_CR3 */ 439 VM_REG_GUEST_CR4, /* VMM_REG_CR4 */ 440 VM_REG_GUEST_DR0, /* VMM_REG_DR0 */ 441 VM_REG_GUEST_DR1, /* VMM_REG_DR1 */ 442 VM_REG_GUEST_DR2, /* VMM_REG_DR2 */ 443 VM_REG_GUEST_DR3, /* VMM_REG_DR3 */ 444 VM_REG_GUEST_DR6, /* VMM_REG_DR6 */ 445 VM_REG_GUEST_DR7, /* VMM_REG_DR7 */ 446 VM_REG_GUEST_EFER, /* VMM_REG_EFER */ 447 VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */ 448 VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */ 449 VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */ 450 VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */ 451 VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */ 452 }; 453 454 /* 455 * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors. 456 * 457 * This array must be kept in sync with the definitions in libvmm.h 458 */ 459 static int vmm_descmap[] = { 460 VM_REG_GUEST_GDTR, 461 VM_REG_GUEST_LDTR, 462 VM_REG_GUEST_IDTR, 463 VM_REG_GUEST_TR, 464 VM_REG_GUEST_CS, 465 VM_REG_GUEST_DS, 466 VM_REG_GUEST_ES, 467 VM_REG_GUEST_FS, 468 VM_REG_GUEST_GS, 469 VM_REG_GUEST_SS 470 }; 471 472 static int 473 vmm_mapreg(int reg) 474 { 475 errno = 0; 476 477 if (reg < 0) 478 goto fail; 479 480 if (reg < KDIREG_NGREG) 481 return (vmm_kdi_regmap[reg]); 482 483 if (reg >= VMM_REG_OFFSET && 484 reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap)) 485 return (vmm_sys_regmap[reg - VMM_REG_OFFSET]); 486 487 fail: 488 errno = EINVAL; 489 return (VM_REG_LAST); 490 } 491 492 static int 493 vmm_mapdesc(int desc) 494 { 495 errno = 0; 496 497 if (desc >= VMM_DESC_OFFSET && 498 desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap)) 499 return (vmm_descmap[desc - VMM_DESC_OFFSET]); 500 501 errno = EINVAL; 502 return (VM_REG_LAST); 503 } 504 505 int 506 vmm_getreg(vmm_t *vmm, int vcpu, int reg, uint64_t *val) 507 { 508 reg = vmm_mapreg(reg); 509 510 if (reg == VM_REG_LAST) 511 return (-1); 512 513 return (vm_get_register(vmm->vmm_ctx, vcpu, reg, val)); 514 } 515 516 int 517 vmm_setreg(vmm_t *vmm, int vcpu, int reg, uint64_t val) 518 { 519 reg = vmm_mapreg(reg); 520 521 if (reg == VM_REG_LAST) 522 return (-1); 523 524 return (vm_set_register(vmm->vmm_ctx, vcpu, reg, val)); 525 } 526 527 int 528 vmm_get_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, 529 uint64_t *regvals) 530 { 531 int *vm_regnums; 532 int i; 533 int ret = -1; 534 535 vm_regnums = malloc(sizeof (int) * nregs); 536 if (vm_regnums == NULL) 537 return (ret); 538 539 for (i = 0; i != nregs; i++) { 540 vm_regnums[i] = vmm_mapreg(regnums[i]); 541 if (vm_regnums[i] == VM_REG_LAST) 542 goto fail; 543 } 544 545 ret = vm_get_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, 546 regvals); 547 548 fail: 549 free(vm_regnums); 550 return (ret); 551 } 552 553 int 554 vmm_set_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, 555 uint64_t *regvals) 556 { 557 int *vm_regnums; 558 int i; 559 int ret = -1; 560 561 vm_regnums = malloc(sizeof (int) * nregs); 562 if (vm_regnums == NULL) 563 return (ret); 564 565 for (i = 0; i != nregs; i++) { 566 vm_regnums[i] = vmm_mapreg(regnums[i]); 567 if (vm_regnums[i] == VM_REG_LAST) 568 goto fail; 569 } 570 571 ret = vm_set_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, 572 regvals); 573 574 fail: 575 free(vm_regnums); 576 return (ret); 577 } 578 579 int 580 vmm_get_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) 581 { 582 desc = vmm_mapdesc(desc); 583 if (desc == VM_REG_LAST) 584 return (-1); 585 586 return (vm_get_desc(vmm->vmm_ctx, vcpu, desc, &vd->vd_base, &vd->vd_lim, 587 &vd->vd_acc)); 588 } 589 590 int 591 vmm_set_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) 592 { 593 desc = vmm_mapdesc(desc); 594 if (desc == VM_REG_LAST) 595 return (-1); 596 597 return (vm_set_desc(vmm->vmm_ctx, vcpu, desc, vd->vd_base, vd->vd_lim, 598 vd->vd_acc)); 599 } 600 601 /* 602 * Structure to hold MMU state during address translation. 603 * The contents of vmm_mmu_regnum[] must be kept in sync with this. 604 */ 605 typedef struct vmm_mmu { 606 uint64_t vm_cr0; 607 uint64_t vm_cr3; 608 uint64_t vm_cr4; 609 uint64_t vm_efer; 610 } vmm_mmu_t; 611 612 static const int vmm_mmu_regnum[] = { 613 VMM_REG_CR0, 614 VMM_REG_CR3, 615 VMM_REG_CR4, 616 VMM_REG_EFER 617 }; 618 619 #define X86_PTE_P 0x001ULL 620 #define X86_PTE_PS 0x080ULL 621 622 #define X86_PTE_PHYSMASK 0x000ffffffffff000ULL 623 #define X86_PAGE_SHIFT 12 624 #define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT) 625 626 #define X86_SEG_CODE_DATA (1ULL << 4) 627 #define X86_SEG_PRESENT (1ULL << 7) 628 #define X86_SEG_LONG (1ULL << 13) 629 #define X86_SEG_BIG (1ULL << 14) 630 #define X86_SEG_GRANULARITY (1ULL << 15) 631 #define X86_SEG_UNUSABLE (1ULL << 16) 632 633 #define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA) 634 #define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE) 635 636 /* 637 * vmm_pte2paddr: 638 * 639 * Recursively calculate the physical address from a virtual address, 640 * starting at the given PTE level using the given PTE. 641 */ 642 static int 643 vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level, 644 uint64_t vaddr, uint64_t *paddr) 645 { 646 int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t); 647 int off_bits = ia32 ? 10 : 9; 648 boolean_t hugepage = B_FALSE; 649 uint64_t offset; 650 uint64_t off_mask, off_shift; 651 652 if (level < 4 && (pte & X86_PTE_P) == 0) { 653 errno = EFAULT; 654 return (-1); 655 } 656 657 off_shift = X86_PAGE_SHIFT + off_bits * level; 658 off_mask = (1ULL << off_shift) - 1; 659 660 offset = vaddr & off_mask; 661 662 if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) { 663 hugepage = B_TRUE; 664 } else { 665 if (level > 0) { 666 offset >>= off_shift - off_bits; 667 offset <<= X86_PAGE_SHIFT - off_bits; 668 } 669 off_mask = 0xfff; 670 } 671 672 *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset; 673 674 if (level == 0 || hugepage) 675 return (0); 676 677 pte = 0; 678 if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size) 679 return (-1); 680 return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr)); 681 } 682 683 static vmm_mode_t 684 vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpu, vmm_mmu_t *mmu) 685 { 686 if ((mmu->vm_cr0 & CR0_PE) == 0) 687 return (VMM_MODE_REAL); 688 else if ((mmu->vm_cr4 & CR4_PAE) == 0) 689 return (VMM_MODE_PROT); 690 else if ((mmu->vm_efer & AMD_EFER_LME) == 0) 691 return (VMM_MODE_PAE); 692 else 693 return (VMM_MODE_LONG); 694 } 695 696 vmm_mode_t 697 vmm_vcpu_mode(vmm_t *vmm, int vcpu) 698 { 699 vmm_mmu_t mmu = { 0 }; 700 701 if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), 702 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 703 return (VMM_MODE_UNKNOWN); 704 705 return (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)); 706 } 707 708 vmm_isa_t 709 vmm_vcpu_isa(vmm_t *vmm, int vcpu) 710 { 711 vmm_desc_t cs; 712 713 if (vmm_get_desc(vmm, vcpu, VMM_DESC_CS, &cs) != 0) 714 return (VMM_ISA_UNKNOWN); 715 716 switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) { 717 case 0x0: /* 16b code segment */ 718 return (VMM_ISA_16); 719 case X86_SEG_LONG: /* 64b code segment */ 720 return (VMM_ISA_64); 721 case X86_SEG_BIG: /* 32b code segment */ 722 return (VMM_ISA_32); 723 } 724 725 return (VMM_ISA_UNKNOWN); 726 } 727 728 /* 729 * vmm_vtol: 730 * 731 * Translate a virtual address to a physical address on a certain vCPU, 732 * using the specified segment register or descriptor according to the mode. 733 * 734 */ 735 int 736 vmm_vtol(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *laddr) 737 { 738 vmm_desc_t desc; 739 uint64_t limit; 740 741 if (vmm_get_desc(vmm, vcpu, seg, &desc) != 0) 742 return (-1); 743 744 switch (vmm_vcpu_mode(vmm, vcpu)) { 745 case VMM_MODE_REAL: 746 if (seg == VMM_DESC_FS || seg == VMM_DESC_GS) 747 goto fault; 748 /* FALLTHRU */ 749 case VMM_MODE_PROT: 750 case VMM_MODE_PAE: 751 if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE) 752 /* unusable, system segment, or not present */ 753 goto fault; 754 755 limit = desc.vd_lim; 756 if (desc.vd_acc & X86_SEG_GRANULARITY) 757 limit *= 4096; 758 759 if (vaddr > limit) 760 goto fault; 761 /* FALLTHRU */ 762 case VMM_MODE_LONG: 763 *laddr = desc.vd_base + vaddr; 764 return (0); 765 766 default: 767 fault: 768 errno = EFAULT; 769 return (-1); 770 } 771 772 } 773 774 /* 775 * vmm_vtop: 776 * 777 * Translate a virtual address to a guest physical address on a certain vCPU, 778 * according to the mode the vCPU is in. 779 */ 780 int 781 vmm_vtop(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *paddr) 782 { 783 vmm_mmu_t mmu = { 0 }; 784 int ret = 0; 785 786 if (vmm_vtol(vmm, vcpu, seg, vaddr, &vaddr) != 0) 787 return (-1); 788 789 if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), 790 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 791 return (-1); 792 793 if ((mmu.vm_cr0 & CR0_PG) == 0) { 794 /* no paging, physical equals virtual */ 795 *paddr = vaddr; 796 return (0); 797 } 798 799 switch (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)) { 800 case VMM_MODE_PROT: 801 /* protected mode, no PAE: 2-level paging, 32bit PTEs */ 802 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr); 803 break; 804 case VMM_MODE_PAE: 805 /* protected mode with PAE: 3-level paging, 64bit PTEs */ 806 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr); 807 break; 808 case VMM_MODE_LONG: 809 /* long mode: 4-level paging, 64bit PTEs */ 810 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr); 811 break; 812 default: 813 ret = -1; 814 } 815 816 return (ret); 817 } 818 819 ssize_t 820 vmm_vread(vmm_t *vmm, int vcpu, int seg, void *buf, size_t len, uintptr_t addr) 821 { 822 ssize_t res = 0; 823 uint64_t paddr; 824 size_t plen; 825 uint64_t boundary; 826 827 while (len != 0) { 828 if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { 829 errno = EFAULT; 830 return (0); 831 } 832 833 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 834 if (addr + len > boundary) 835 plen = boundary - addr; 836 else 837 plen = len; 838 839 if (vmm_pread(vmm, buf, plen, paddr) != plen) 840 return (0); 841 len -= plen; 842 addr += plen; 843 buf += plen; 844 res += plen; 845 } 846 847 return (res); 848 } 849 850 ssize_t 851 vmm_vwrite(vmm_t *vmm, int vcpu, int seg, const void *buf, size_t len, 852 uintptr_t addr) 853 { 854 ssize_t res = 0; 855 uint64_t paddr; 856 size_t plen; 857 uint64_t boundary; 858 859 while (len != 0) { 860 if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { 861 errno = EFAULT; 862 return (0); 863 } 864 865 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 866 if (addr + len > boundary) 867 plen = boundary - addr; 868 else 869 plen = len; 870 871 if (vmm_pwrite(vmm, buf, plen, paddr) != plen) 872 return (0); 873 len -= plen; 874 addr += plen; 875 buf += plen; 876 res += plen; 877 } 878 879 return (res); 880 } 881