1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2020 Oxide Computer Company 15 * Copyright 2023 OmniOS Community Edition (OmniOSce) Association. 16 */ 17 18 /* 19 * Library for native code to access bhyve VMs, without the need to use 20 * FreeBSD compat headers 21 */ 22 23 #include <sys/param.h> 24 #include <sys/list.h> 25 #include <sys/stddef.h> 26 #include <sys/mman.h> 27 #include <sys/kdi_regs.h> 28 #include <sys/sysmacros.h> 29 #include <sys/controlregs.h> 30 #include <sys/note.h> 31 #include <sys/debug.h> 32 #include <errno.h> 33 #include <stdlib.h> 34 #include <strings.h> 35 #include <unistd.h> 36 #include <assert.h> 37 38 #include <machine/vmm.h> 39 #include <vmmapi.h> 40 41 #include <libvmm.h> 42 43 typedef struct vmm_memseg vmm_memseg_t; 44 45 #define VMM_MEMSEG_DEVMEM 0x1 46 47 struct vmm_memseg { 48 list_node_t vms_list; 49 int vms_segid; 50 int vms_prot; 51 int vms_flags; 52 uintptr_t vms_gpa; 53 off_t vms_segoff; 54 size_t vms_seglen; 55 size_t vms_maplen; 56 char vms_name[64]; 57 }; 58 59 struct vmm { 60 struct vmctx *vmm_ctx; 61 list_t vmm_memlist; 62 char *vmm_mem; 63 size_t vmm_memsize; 64 size_t vmm_ncpu; 65 struct vcpu **vmm_vcpu; 66 }; 67 68 69 /* 70 * This code relies on two assumptions: 71 * - CPUs are never removed from the "active set", not even when suspended. 72 * A CPU being active just means that it has been used by the guest OS. 73 * - The CPU numbering is consecutive. 74 */ 75 static void 76 vmm_update_ncpu(vmm_t *vmm) 77 { 78 cpuset_t cpuset; 79 80 assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0); 81 82 for (vmm->vmm_ncpu = 0; 83 CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1; 84 vmm->vmm_ncpu++) 85 ; 86 } 87 88 vmm_t * 89 vmm_open_vm(const char *name) 90 { 91 vmm_t *vmm = NULL; 92 int _errno; 93 int i; 94 95 vmm = malloc(sizeof (vmm_t)); 96 if (vmm == NULL) 97 return (NULL); 98 99 bzero(vmm, sizeof (vmm_t)); 100 vmm->vmm_mem = MAP_FAILED; 101 102 list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t), 103 offsetof(vmm_memseg_t, vms_list)); 104 105 vmm->vmm_ctx = vm_open(name); 106 if (vmm->vmm_ctx == NULL) { 107 list_destroy(&vmm->vmm_memlist); 108 free(vmm); 109 return (NULL); 110 } 111 112 vmm_update_ncpu(vmm); 113 114 /* 115 * If we open a VM that has just been created we may see a state 116 * where it has no CPUs configured yet. We'll just wait for 10ms 117 * and retry until we get a non-zero CPU count. 118 */ 119 if (vmm->vmm_ncpu == 0) { 120 do { 121 (void) usleep(10000); 122 vmm_update_ncpu(vmm); 123 } while (vmm->vmm_ncpu == 0); 124 } 125 126 vmm->vmm_vcpu = calloc(vmm->vmm_ncpu, sizeof (struct vcpu *)); 127 if (vmm->vmm_vcpu == NULL) 128 goto fail; 129 for (i = 0; i < vmm->vmm_ncpu; i++) { 130 vmm->vmm_vcpu[i] = vm_vcpu_open(vmm->vmm_ctx, i); 131 if (vmm->vmm_vcpu[i] == NULL) { 132 _errno = errno; 133 while (i-- >= 0) 134 vm_vcpu_close(vmm->vmm_vcpu[i]); 135 free(vmm->vmm_vcpu); 136 errno = _errno; 137 goto fail; 138 } 139 } 140 141 return (vmm); 142 143 fail: 144 _errno = errno; 145 vmm_close_vm(vmm); 146 errno = _errno; 147 148 return (NULL); 149 } 150 151 void 152 vmm_close_vm(vmm_t *vmm) 153 { 154 uint_t i; 155 156 vmm_unmap(vmm); 157 158 for (i = 0; i < vmm->vmm_ncpu; i++) 159 vm_vcpu_close(vmm->vmm_vcpu[i]); 160 free(vmm->vmm_vcpu); 161 162 list_destroy(&vmm->vmm_memlist); 163 164 if (vmm->vmm_ctx != NULL) 165 vm_close(vmm->vmm_ctx); 166 167 free(vmm); 168 } 169 170 static vmm_memseg_t * 171 vmm_get_memseg(vmm_t *vmm, uintptr_t gpa) 172 { 173 vmm_memseg_t ms, *ret; 174 int error, flags; 175 176 bzero(&ms, sizeof (vmm_memseg_t)); 177 ms.vms_gpa = gpa; 178 error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid, 179 &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags); 180 if (error) 181 return (NULL); 182 183 error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen, 184 ms.vms_name, sizeof (ms.vms_name)); 185 if (error) 186 return (NULL); 187 188 /* 189 * Regular memory segments don't have a name, but devmem segments do. 190 * We can use that information to set the DEVMEM flag if necessary. 191 */ 192 ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0; 193 194 ret = malloc(sizeof (vmm_memseg_t)); 195 if (ret == NULL) 196 return (NULL); 197 198 *ret = ms; 199 200 return (ret); 201 } 202 203 int 204 vmm_map(vmm_t *vmm, boolean_t writable) 205 { 206 uintptr_t last_gpa = 0; 207 vmm_memseg_t *ms; 208 int prot_write = writable ? PROT_WRITE : 0; 209 210 if (vmm->vmm_mem != MAP_FAILED) { 211 errno = EINVAL; 212 return (-1); 213 } 214 215 assert(list_is_empty(&vmm->vmm_memlist)); 216 217 for (;;) { 218 ms = vmm_get_memseg(vmm, last_gpa); 219 220 if (ms == NULL) 221 break; 222 223 last_gpa = ms->vms_gpa + ms->vms_maplen; 224 list_insert_tail(&vmm->vmm_memlist, ms); 225 } 226 227 vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE, 228 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0); 229 230 if (vmm->vmm_mem == MAP_FAILED) 231 goto fail; 232 233 for (ms = list_head(&vmm->vmm_memlist); 234 ms != NULL; 235 ms = list_next(&vmm->vmm_memlist, ms)) { 236 off_t mapoff; 237 238 if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) == 0) { 239 /* 240 * sysmem segments will be located at an offset 241 * equivalent to their GPA. 242 */ 243 mapoff = ms->vms_gpa; 244 } else { 245 /* 246 * devmem segments are located in a special region away 247 * from the normal GPA space. 248 */ 249 if (vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid, 250 &mapoff) != 0) { 251 goto fail; 252 } 253 } 254 255 /* 256 * While 'mapoff' points to the front of the segment, the actual 257 * mapping may be at some offset beyond that. 258 */ 259 VERIFY(ms->vms_segoff >= 0); 260 mapoff += ms->vms_segoff; 261 262 vmm->vmm_memsize += ms->vms_maplen; 263 264 if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen, 265 PROT_READ | prot_write, MAP_SHARED | MAP_FIXED, 266 vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED) 267 goto fail; 268 } 269 270 return (0); 271 272 fail: 273 vmm_unmap(vmm); 274 275 return (-1); 276 } 277 278 void 279 vmm_unmap(vmm_t *vmm) 280 { 281 while (!list_is_empty(&vmm->vmm_memlist)) { 282 vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist); 283 284 if (vmm->vmm_mem != MAP_FAILED) { 285 (void) munmap(vmm->vmm_mem + ms->vms_gpa, 286 ms->vms_maplen); 287 } 288 289 free(ms); 290 } 291 292 if (vmm->vmm_mem != MAP_FAILED) 293 (void) munmap(vmm->vmm_mem, vmm->vmm_memsize); 294 295 vmm->vmm_mem = MAP_FAILED; 296 vmm->vmm_memsize = 0; 297 } 298 299 ssize_t 300 vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr) 301 { 302 ssize_t count = 0; 303 vmm_memseg_t *ms; 304 ssize_t res = len; 305 306 for (ms = list_head(&vmm->vmm_memlist); 307 ms != NULL && len != 0; 308 ms = list_next(&vmm->vmm_memlist, ms)) { 309 310 if (addr >= ms->vms_gpa && 311 addr < ms->vms_gpa + ms->vms_maplen) { 312 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 313 314 if (res < 0) 315 res = 0; 316 317 bcopy(vmm->vmm_mem + addr, buf, len - res); 318 count += len - res; 319 addr += len - res; 320 len = res; 321 } 322 } 323 324 if (res) 325 errno = EFAULT; 326 else 327 errno = 0; 328 329 return (count); 330 } 331 332 ssize_t 333 vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr) 334 { 335 ssize_t count = 0; 336 vmm_memseg_t *ms; 337 ssize_t res = len; 338 339 for (ms = list_head(&vmm->vmm_memlist); 340 ms != NULL; 341 ms = list_next(&vmm->vmm_memlist, ms)) { 342 if (addr >= ms->vms_gpa && 343 addr < ms->vms_gpa + ms->vms_maplen) { 344 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 345 346 if (res < 0) 347 res = 0; 348 349 bcopy(buf, vmm->vmm_mem + addr, len - res); 350 count += len - res; 351 addr += len - res; 352 len = res; 353 } 354 } 355 356 if (res) 357 errno = EFAULT; 358 else 359 errno = 0; 360 361 return (count); 362 } 363 364 size_t 365 vmm_ncpu(vmm_t *vmm) 366 { 367 return (vmm->vmm_ncpu); 368 } 369 370 size_t 371 vmm_memsize(vmm_t *vmm) 372 { 373 return (vmm->vmm_memsize); 374 } 375 376 int 377 vmm_cont(vmm_t *vmm) 378 { 379 return (vm_resume_all_cpus(vmm->vmm_ctx)); 380 } 381 382 int 383 vmm_step(vmm_t *vmm, int vcpuid) 384 { 385 cpuset_t cpuset; 386 int ret; 387 388 if (vcpuid >= vmm->vmm_ncpu) { 389 errno = EINVAL; 390 return (-1); 391 } 392 393 ret = vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 1); 394 if (ret != 0) 395 return (-1); 396 397 assert(vm_resume_cpu(vmm->vmm_vcpu[vcpuid]) == 0); 398 399 do { 400 (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset); 401 } while (!CPU_ISSET(vcpuid, &cpuset)); 402 403 (void) vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 0); 404 405 return (ret); 406 } 407 408 int 409 vmm_stop(vmm_t *vmm) 410 { 411 int ret = vm_suspend_all_cpus(vmm->vmm_ctx); 412 413 if (ret == 0) 414 vmm_update_ncpu(vmm); 415 416 return (ret); 417 } 418 419 /* 420 * Mapping of KDI-defined registers to vmmapi-defined registers. 421 * Registers not known to vmmapi use VM_REG_LAST, which is invalid and 422 * causes an error in vm_{get,set}_register_set(). 423 * 424 * This array must be kept in sync with the definitions in kdi_regs.h. 425 */ 426 static int vmm_kdi_regmap[] = { 427 VM_REG_LAST, /* KDIREG_SAVFP */ 428 VM_REG_LAST, /* KDIREG_SAVPC */ 429 VM_REG_GUEST_RDI, /* KDIREG_RDI */ 430 VM_REG_GUEST_RSI, /* KDIREG_RSI */ 431 VM_REG_GUEST_RDX, /* KDIREG_RDX */ 432 VM_REG_GUEST_RCX, /* KDIREG_RCX */ 433 VM_REG_GUEST_R8, /* KDIREG_R8 */ 434 VM_REG_GUEST_R9, /* KDIREG_R9 */ 435 VM_REG_GUEST_RAX, /* KDIREG_RAX */ 436 VM_REG_GUEST_RBX, /* KDIREG_RBX */ 437 VM_REG_GUEST_RBP, /* KDIREG_RBP */ 438 VM_REG_GUEST_R10, /* KDIREG_R10 */ 439 VM_REG_GUEST_R11, /* KDIREG_R11 */ 440 VM_REG_GUEST_R12, /* KDIREG_R12 */ 441 VM_REG_GUEST_R13, /* KDIREG_R13 */ 442 VM_REG_GUEST_R14, /* KDIREG_R14 */ 443 VM_REG_GUEST_R15, /* KDIREG_R15 */ 444 VM_REG_LAST, /* KDIREG_FSBASE */ 445 VM_REG_LAST, /* KDIREG_GSBASE */ 446 VM_REG_LAST, /* KDIREG_KGSBASE */ 447 VM_REG_GUEST_CR2, /* KDIREG_CR2 */ 448 VM_REG_GUEST_CR3, /* KDIREG_CR3 */ 449 VM_REG_GUEST_DS, /* KDIREG_DS */ 450 VM_REG_GUEST_ES, /* KDIREG_ES */ 451 VM_REG_GUEST_FS, /* KDIREG_FS */ 452 VM_REG_GUEST_GS, /* KDIREG_GS */ 453 VM_REG_LAST, /* KDIREG_TRAPNO */ 454 VM_REG_LAST, /* KDIREG_ERR */ 455 VM_REG_GUEST_RIP, /* KDIREG_RIP */ 456 VM_REG_GUEST_CS, /* KDIREG_CS */ 457 VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */ 458 VM_REG_GUEST_RSP, /* KDIREG_RSP */ 459 VM_REG_GUEST_SS /* KDIREG_SS */ 460 }; 461 CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG); 462 463 /* 464 * Mapping of libvmm-defined registers to vmmapi-defined registers. 465 * 466 * This array must be kept in sync with the definitions in libvmm.h 467 */ 468 static int vmm_sys_regmap[] = { 469 VM_REG_GUEST_CR0, /* VMM_REG_CR0 */ 470 VM_REG_GUEST_CR2, /* VMM_REG_CR2 */ 471 VM_REG_GUEST_CR3, /* VMM_REG_CR3 */ 472 VM_REG_GUEST_CR4, /* VMM_REG_CR4 */ 473 VM_REG_GUEST_DR0, /* VMM_REG_DR0 */ 474 VM_REG_GUEST_DR1, /* VMM_REG_DR1 */ 475 VM_REG_GUEST_DR2, /* VMM_REG_DR2 */ 476 VM_REG_GUEST_DR3, /* VMM_REG_DR3 */ 477 VM_REG_GUEST_DR6, /* VMM_REG_DR6 */ 478 VM_REG_GUEST_DR7, /* VMM_REG_DR7 */ 479 VM_REG_GUEST_EFER, /* VMM_REG_EFER */ 480 VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */ 481 VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */ 482 VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */ 483 VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */ 484 VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */ 485 }; 486 487 /* 488 * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors. 489 * 490 * This array must be kept in sync with the definitions in libvmm.h 491 */ 492 static int vmm_descmap[] = { 493 VM_REG_GUEST_GDTR, 494 VM_REG_GUEST_LDTR, 495 VM_REG_GUEST_IDTR, 496 VM_REG_GUEST_TR, 497 VM_REG_GUEST_CS, 498 VM_REG_GUEST_DS, 499 VM_REG_GUEST_ES, 500 VM_REG_GUEST_FS, 501 VM_REG_GUEST_GS, 502 VM_REG_GUEST_SS 503 }; 504 505 static int 506 vmm_mapreg(int reg) 507 { 508 errno = 0; 509 510 if (reg < 0) 511 goto fail; 512 513 if (reg < KDIREG_NGREG) 514 return (vmm_kdi_regmap[reg]); 515 516 if (reg >= VMM_REG_OFFSET && 517 reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap)) 518 return (vmm_sys_regmap[reg - VMM_REG_OFFSET]); 519 520 fail: 521 errno = EINVAL; 522 return (VM_REG_LAST); 523 } 524 525 static int 526 vmm_mapdesc(int desc) 527 { 528 errno = 0; 529 530 if (desc >= VMM_DESC_OFFSET && 531 desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap)) 532 return (vmm_descmap[desc - VMM_DESC_OFFSET]); 533 534 errno = EINVAL; 535 return (VM_REG_LAST); 536 } 537 538 int 539 vmm_getreg(vmm_t *vmm, int vcpuid, int reg, uint64_t *val) 540 { 541 reg = vmm_mapreg(reg); 542 543 if (reg == VM_REG_LAST) 544 return (-1); 545 546 return (vm_get_register(vmm->vmm_vcpu[vcpuid], reg, val)); 547 } 548 549 int 550 vmm_setreg(vmm_t *vmm, int vcpuid, int reg, uint64_t val) 551 { 552 reg = vmm_mapreg(reg); 553 554 if (reg == VM_REG_LAST) 555 return (-1); 556 557 return (vm_set_register(vmm->vmm_vcpu[vcpuid], reg, val)); 558 } 559 560 int 561 vmm_get_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums, 562 uint64_t *regvals) 563 { 564 int *vm_regnums; 565 int i; 566 int ret = -1; 567 568 vm_regnums = malloc(sizeof (int) * nregs); 569 if (vm_regnums == NULL) 570 return (ret); 571 572 for (i = 0; i != nregs; i++) { 573 vm_regnums[i] = vmm_mapreg(regnums[i]); 574 if (vm_regnums[i] == VM_REG_LAST) 575 goto fail; 576 } 577 578 ret = vm_get_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums, 579 regvals); 580 581 fail: 582 free(vm_regnums); 583 return (ret); 584 } 585 586 int 587 vmm_set_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums, 588 uint64_t *regvals) 589 { 590 int *vm_regnums; 591 int i; 592 int ret = -1; 593 594 vm_regnums = malloc(sizeof (int) * nregs); 595 if (vm_regnums == NULL) 596 return (ret); 597 598 for (i = 0; i != nregs; i++) { 599 vm_regnums[i] = vmm_mapreg(regnums[i]); 600 if (vm_regnums[i] == VM_REG_LAST) 601 goto fail; 602 } 603 604 ret = vm_set_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums, 605 regvals); 606 607 fail: 608 free(vm_regnums); 609 return (ret); 610 } 611 612 int 613 vmm_get_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd) 614 { 615 desc = vmm_mapdesc(desc); 616 if (desc == VM_REG_LAST) 617 return (-1); 618 619 return (vm_get_desc(vmm->vmm_vcpu[vcpuid], desc, &vd->vd_base, 620 &vd->vd_lim, 621 &vd->vd_acc)); 622 } 623 624 int 625 vmm_set_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd) 626 { 627 desc = vmm_mapdesc(desc); 628 if (desc == VM_REG_LAST) 629 return (-1); 630 631 return (vm_set_desc(vmm->vmm_vcpu[vcpuid], desc, vd->vd_base, 632 vd->vd_lim, vd->vd_acc)); 633 } 634 635 /* 636 * Structure to hold MMU state during address translation. 637 * The contents of vmm_mmu_regnum[] must be kept in sync with this. 638 */ 639 typedef struct vmm_mmu { 640 uint64_t vm_cr0; 641 uint64_t vm_cr3; 642 uint64_t vm_cr4; 643 uint64_t vm_efer; 644 } vmm_mmu_t; 645 646 static const int vmm_mmu_regnum[] = { 647 VMM_REG_CR0, 648 VMM_REG_CR3, 649 VMM_REG_CR4, 650 VMM_REG_EFER 651 }; 652 653 #define X86_PTE_P 0x001ULL 654 #define X86_PTE_PS 0x080ULL 655 656 #define X86_PTE_PHYSMASK 0x000ffffffffff000ULL 657 #define X86_PAGE_SHIFT 12 658 #define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT) 659 660 #define X86_SEG_CODE_DATA (1ULL << 4) 661 #define X86_SEG_PRESENT (1ULL << 7) 662 #define X86_SEG_LONG (1ULL << 13) 663 #define X86_SEG_BIG (1ULL << 14) 664 #define X86_SEG_GRANULARITY (1ULL << 15) 665 #define X86_SEG_UNUSABLE (1ULL << 16) 666 667 #define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA) 668 #define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE) 669 670 /* 671 * vmm_pte2paddr: 672 * 673 * Recursively calculate the physical address from a virtual address, 674 * starting at the given PTE level using the given PTE. 675 */ 676 static int 677 vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level, 678 uint64_t vaddr, uint64_t *paddr) 679 { 680 int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t); 681 int off_bits = ia32 ? 10 : 9; 682 boolean_t hugepage = B_FALSE; 683 uint64_t offset; 684 uint64_t off_mask, off_shift; 685 686 if (level < 4 && (pte & X86_PTE_P) == 0) { 687 errno = EFAULT; 688 return (-1); 689 } 690 691 off_shift = X86_PAGE_SHIFT + off_bits * level; 692 off_mask = (1ULL << off_shift) - 1; 693 694 offset = vaddr & off_mask; 695 696 if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) { 697 hugepage = B_TRUE; 698 } else { 699 if (level > 0) { 700 offset >>= off_shift - off_bits; 701 offset <<= X86_PAGE_SHIFT - off_bits; 702 } 703 off_mask = 0xfff; 704 } 705 706 *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset; 707 708 if (level == 0 || hugepage) 709 return (0); 710 711 pte = 0; 712 if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size) 713 return (-1); 714 return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr)); 715 } 716 717 static vmm_mode_t 718 vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpuid __unused, vmm_mmu_t *mmu) 719 { 720 if ((mmu->vm_cr0 & CR0_PE) == 0) 721 return (VMM_MODE_REAL); 722 else if ((mmu->vm_cr4 & CR4_PAE) == 0) 723 return (VMM_MODE_PROT); 724 else if ((mmu->vm_efer & AMD_EFER_LME) == 0) 725 return (VMM_MODE_PAE); 726 else 727 return (VMM_MODE_LONG); 728 } 729 730 vmm_mode_t 731 vmm_vcpu_mode(vmm_t *vmm, int vcpuid) 732 { 733 vmm_mmu_t mmu = { 0 }; 734 735 if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum), 736 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 737 return (VMM_MODE_UNKNOWN); 738 739 return (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu)); 740 } 741 742 vmm_isa_t 743 vmm_vcpu_isa(vmm_t *vmm, int vcpuid) 744 { 745 vmm_desc_t cs; 746 747 if (vmm_get_desc(vmm, vcpuid, VMM_DESC_CS, &cs) != 0) 748 return (VMM_ISA_UNKNOWN); 749 750 switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) { 751 case 0x0: /* 16b code segment */ 752 return (VMM_ISA_16); 753 case X86_SEG_LONG: /* 64b code segment */ 754 return (VMM_ISA_64); 755 case X86_SEG_BIG: /* 32b code segment */ 756 return (VMM_ISA_32); 757 } 758 759 return (VMM_ISA_UNKNOWN); 760 } 761 762 /* 763 * vmm_vtol: 764 * 765 * Translate a virtual address to a physical address on a certain vCPU, 766 * using the specified segment register or descriptor according to the mode. 767 * 768 */ 769 int 770 vmm_vtol(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *laddr) 771 { 772 vmm_desc_t desc; 773 uint64_t limit; 774 775 if (vmm_get_desc(vmm, vcpuid, seg, &desc) != 0) 776 return (-1); 777 778 switch (vmm_vcpu_mode(vmm, vcpuid)) { 779 case VMM_MODE_REAL: 780 if (seg == VMM_DESC_FS || seg == VMM_DESC_GS) 781 goto fault; 782 /* FALLTHRU */ 783 case VMM_MODE_PROT: 784 case VMM_MODE_PAE: 785 if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE) 786 /* unusable, system segment, or not present */ 787 goto fault; 788 789 limit = desc.vd_lim; 790 if (desc.vd_acc & X86_SEG_GRANULARITY) 791 limit *= 4096; 792 793 if (vaddr > limit) 794 goto fault; 795 /* FALLTHRU */ 796 case VMM_MODE_LONG: 797 *laddr = desc.vd_base + vaddr; 798 return (0); 799 800 default: 801 fault: 802 errno = EFAULT; 803 return (-1); 804 } 805 806 } 807 808 /* 809 * vmm_vtop: 810 * 811 * Translate a virtual address to a guest physical address on a certain vCPU, 812 * according to the mode the vCPU is in. 813 */ 814 int 815 vmm_vtop(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *paddr) 816 { 817 vmm_mmu_t mmu = { 0 }; 818 int ret = 0; 819 820 if (vmm_vtol(vmm, vcpuid, seg, vaddr, &vaddr) != 0) 821 return (-1); 822 823 if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum), 824 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 825 return (-1); 826 827 if ((mmu.vm_cr0 & CR0_PG) == 0) { 828 /* no paging, physical equals virtual */ 829 *paddr = vaddr; 830 return (0); 831 } 832 833 switch (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu)) { 834 case VMM_MODE_PROT: 835 /* protected mode, no PAE: 2-level paging, 32bit PTEs */ 836 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr); 837 break; 838 case VMM_MODE_PAE: 839 /* protected mode with PAE: 3-level paging, 64bit PTEs */ 840 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr); 841 break; 842 case VMM_MODE_LONG: 843 /* long mode: 4-level paging, 64bit PTEs */ 844 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr); 845 break; 846 default: 847 ret = -1; 848 } 849 850 return (ret); 851 } 852 853 ssize_t 854 vmm_vread(vmm_t *vmm, int vcpuid, int seg, void *buf, size_t len, uintptr_t 855 addr) 856 { 857 ssize_t res = 0; 858 uint64_t paddr; 859 size_t plen; 860 uint64_t boundary; 861 862 while (len != 0) { 863 if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) { 864 errno = EFAULT; 865 return (0); 866 } 867 868 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 869 if (addr + len > boundary) 870 plen = boundary - addr; 871 else 872 plen = len; 873 874 if (vmm_pread(vmm, buf, plen, paddr) != plen) 875 return (0); 876 len -= plen; 877 addr += plen; 878 buf += plen; 879 res += plen; 880 } 881 882 return (res); 883 } 884 885 ssize_t 886 vmm_vwrite(vmm_t *vmm, int vcpuid, int seg, const void *buf, size_t len, 887 uintptr_t addr) 888 { 889 ssize_t res = 0; 890 uint64_t paddr; 891 size_t plen; 892 uint64_t boundary; 893 894 while (len != 0) { 895 if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) { 896 errno = EFAULT; 897 return (0); 898 } 899 900 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 901 if (addr + len > boundary) 902 plen = boundary - addr; 903 else 904 plen = len; 905 906 if (vmm_pwrite(vmm, buf, plen, paddr) != plen) 907 return (0); 908 len -= plen; 909 addr += plen; 910 buf += plen; 911 res += plen; 912 } 913 914 return (res); 915 } 916