1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 * Copyright 2020 Oxide Computer Company 43 */ 44 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include <sys/param.h> 49 #include <sys/sysctl.h> 50 #include <sys/ioctl.h> 51 #ifdef __FreeBSD__ 52 #include <sys/linker.h> 53 #endif 54 #include <sys/mman.h> 55 #include <sys/module.h> 56 #include <sys/_iovec.h> 57 #include <sys/cpuset.h> 58 59 #include <x86/segments.h> 60 #include <machine/specialreg.h> 61 62 #include <errno.h> 63 #include <stdio.h> 64 #include <stdlib.h> 65 #include <assert.h> 66 #include <string.h> 67 #include <fcntl.h> 68 #include <unistd.h> 69 70 #include <libutil.h> 71 72 #include <machine/vmm.h> 73 #include <machine/vmm_dev.h> 74 75 #include "vmmapi.h" 76 77 #define MB (1024 * 1024UL) 78 #define GB (1024 * 1024 * 1024UL) 79 80 #ifndef __FreeBSD__ 81 /* shim to no-op for now */ 82 #define MAP_NOCORE 0 83 #define MAP_ALIGNED_SUPER 0 84 85 /* Rely on PROT_NONE for guard purposes */ 86 #define MAP_GUARD (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE) 87 #endif 88 89 /* 90 * Size of the guard region before and after the virtual address space 91 * mapping the guest physical memory. This must be a multiple of the 92 * superpage size for performance reasons. 93 */ 94 #define VM_MMAP_GUARD_SIZE (4 * MB) 95 96 #define PROT_RW (PROT_READ | PROT_WRITE) 97 #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) 98 99 struct vmctx { 100 int fd; 101 uint32_t lowmem_limit; 102 int memflags; 103 size_t lowmem; 104 size_t highmem; 105 char *baseaddr; 106 char *name; 107 }; 108 109 #ifdef __FreeBSD__ 110 #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 111 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 112 #else 113 #define CREATE(x) vm_do_ctl(VMM_CREATE_VM, (x)) 114 #define DESTROY(x) vm_do_ctl(VMM_DESTROY_VM, (x)) 115 116 static int 117 vm_do_ctl(int cmd, const char *name) 118 { 119 int ctl_fd; 120 121 ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR); 122 if (ctl_fd < 0) { 123 return (-1); 124 } 125 126 if (ioctl(ctl_fd, cmd, name) == -1) { 127 int err = errno; 128 129 /* Do not lose ioctl errno through the close(2) */ 130 (void) close(ctl_fd); 131 errno = err; 132 return (-1); 133 } 134 (void) close(ctl_fd); 135 136 return (0); 137 } 138 #endif 139 140 static int 141 vm_device_open(const char *name) 142 { 143 int fd, len; 144 char *vmfile; 145 146 len = strlen("/dev/vmm/") + strlen(name) + 1; 147 vmfile = malloc(len); 148 assert(vmfile != NULL); 149 snprintf(vmfile, len, "/dev/vmm/%s", name); 150 151 /* Open the device file */ 152 fd = open(vmfile, O_RDWR, 0); 153 154 free(vmfile); 155 return (fd); 156 } 157 158 int 159 vm_create(const char *name) 160 { 161 #ifdef __FreeBSD__ 162 /* Try to load vmm(4) module before creating a guest. */ 163 if (modfind("vmm") < 0) 164 kldload("vmm"); 165 #endif 166 return (CREATE((char *)name)); 167 } 168 169 struct vmctx * 170 vm_open(const char *name) 171 { 172 struct vmctx *vm; 173 174 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 175 assert(vm != NULL); 176 177 vm->fd = -1; 178 vm->memflags = 0; 179 vm->lowmem_limit = 3 * GB; 180 vm->name = (char *)(vm + 1); 181 strcpy(vm->name, name); 182 183 if ((vm->fd = vm_device_open(vm->name)) < 0) 184 goto err; 185 186 return (vm); 187 err: 188 #ifdef __FreeBSD__ 189 vm_destroy(vm); 190 #else 191 /* 192 * As libvmmapi is used by other programs to query and control bhyve 193 * VMs, destroying a VM just because the open failed isn't useful. We 194 * have to free what we have allocated, though. 195 */ 196 free(vm); 197 #endif 198 return (NULL); 199 } 200 201 #ifndef __FreeBSD__ 202 void 203 vm_close(struct vmctx *vm) 204 { 205 assert(vm != NULL); 206 assert(vm->fd >= 0); 207 208 (void) close(vm->fd); 209 210 free(vm); 211 } 212 #endif 213 214 void 215 vm_destroy(struct vmctx *vm) 216 { 217 assert(vm != NULL); 218 219 if (vm->fd >= 0) 220 close(vm->fd); 221 DESTROY(vm->name); 222 223 free(vm); 224 } 225 226 int 227 vm_parse_memsize(const char *optarg, size_t *ret_memsize) 228 { 229 char *endptr; 230 size_t optval; 231 int error; 232 233 optval = strtoul(optarg, &endptr, 0); 234 if (*optarg != '\0' && *endptr == '\0') { 235 /* 236 * For the sake of backward compatibility if the memory size 237 * specified on the command line is less than a megabyte then 238 * it is interpreted as being in units of MB. 239 */ 240 if (optval < MB) 241 optval *= MB; 242 *ret_memsize = optval; 243 error = 0; 244 } else 245 error = expand_number(optarg, ret_memsize); 246 247 return (error); 248 } 249 250 uint32_t 251 vm_get_lowmem_limit(struct vmctx *ctx) 252 { 253 254 return (ctx->lowmem_limit); 255 } 256 257 void 258 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) 259 { 260 261 ctx->lowmem_limit = limit; 262 } 263 264 void 265 vm_set_memflags(struct vmctx *ctx, int flags) 266 { 267 268 ctx->memflags = flags; 269 } 270 271 int 272 vm_get_memflags(struct vmctx *ctx) 273 { 274 275 return (ctx->memflags); 276 } 277 278 /* 279 * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). 280 */ 281 int 282 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, 283 size_t len, int prot) 284 { 285 struct vm_memmap memmap; 286 int error, flags; 287 288 memmap.gpa = gpa; 289 memmap.segid = segid; 290 memmap.segoff = off; 291 memmap.len = len; 292 memmap.prot = prot; 293 memmap.flags = 0; 294 295 if (ctx->memflags & VM_MEM_F_WIRED) 296 memmap.flags |= VM_MEMMAP_F_WIRED; 297 298 /* 299 * If this mapping already exists then don't create it again. This 300 * is the common case for SYSMEM mappings created by bhyveload(8). 301 */ 302 error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); 303 if (error == 0 && gpa == memmap.gpa) { 304 if (segid != memmap.segid || off != memmap.segoff || 305 prot != memmap.prot || flags != memmap.flags) { 306 errno = EEXIST; 307 return (-1); 308 } else { 309 return (0); 310 } 311 } 312 313 error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); 314 return (error); 315 } 316 317 int 318 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, 319 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 320 { 321 struct vm_memmap memmap; 322 int error; 323 324 bzero(&memmap, sizeof(struct vm_memmap)); 325 memmap.gpa = *gpa; 326 error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); 327 if (error == 0) { 328 *gpa = memmap.gpa; 329 *segid = memmap.segid; 330 *segoff = memmap.segoff; 331 *len = memmap.len; 332 *prot = memmap.prot; 333 *flags = memmap.flags; 334 } 335 return (error); 336 } 337 338 /* 339 * Return 0 if the segments are identical and non-zero otherwise. 340 * 341 * This is slightly complicated by the fact that only device memory segments 342 * are named. 343 */ 344 static int 345 cmpseg(size_t len, const char *str, size_t len2, const char *str2) 346 { 347 348 if (len == len2) { 349 if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) 350 return (0); 351 } 352 return (-1); 353 } 354 355 static int 356 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) 357 { 358 struct vm_memseg memseg; 359 size_t n; 360 int error; 361 362 /* 363 * If the memory segment has already been created then just return. 364 * This is the usual case for the SYSMEM segment created by userspace 365 * loaders like bhyveload(8). 366 */ 367 error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, 368 sizeof(memseg.name)); 369 if (error) 370 return (error); 371 372 if (memseg.len != 0) { 373 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { 374 errno = EINVAL; 375 return (-1); 376 } else { 377 return (0); 378 } 379 } 380 381 bzero(&memseg, sizeof(struct vm_memseg)); 382 memseg.segid = segid; 383 memseg.len = len; 384 if (name != NULL) { 385 n = strlcpy(memseg.name, name, sizeof(memseg.name)); 386 if (n >= sizeof(memseg.name)) { 387 errno = ENAMETOOLONG; 388 return (-1); 389 } 390 } 391 392 error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); 393 return (error); 394 } 395 396 int 397 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, 398 size_t bufsize) 399 { 400 struct vm_memseg memseg; 401 size_t n; 402 int error; 403 404 memseg.segid = segid; 405 error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); 406 if (error == 0) { 407 *lenp = memseg.len; 408 n = strlcpy(namebuf, memseg.name, bufsize); 409 if (n >= bufsize) { 410 errno = ENAMETOOLONG; 411 error = -1; 412 } 413 } 414 return (error); 415 } 416 417 static int 418 #ifdef __FreeBSD__ 419 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) 420 #else 421 setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len, 422 char *base) 423 #endif 424 { 425 char *ptr; 426 int error, flags; 427 428 /* Map 'len' bytes starting at 'gpa' in the guest address space */ 429 #ifdef __FreeBSD__ 430 error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); 431 #else 432 /* 433 * As we use two segments for lowmem/highmem the offset within the 434 * segment is 0 on illumos. 435 */ 436 error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL); 437 #endif 438 if (error) 439 return (error); 440 441 flags = MAP_SHARED | MAP_FIXED; 442 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 443 flags |= MAP_NOCORE; 444 445 /* mmap into the process address space on the host */ 446 ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); 447 if (ptr == MAP_FAILED) 448 return (-1); 449 450 return (0); 451 } 452 453 int 454 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) 455 { 456 size_t objsize, len; 457 vm_paddr_t gpa; 458 char *baseaddr, *ptr; 459 int error; 460 461 assert(vms == VM_MMAP_ALL); 462 463 /* 464 * If 'memsize' cannot fit entirely in the 'lowmem' segment then 465 * create another 'highmem' segment above 4GB for the remainder. 466 */ 467 if (memsize > ctx->lowmem_limit) { 468 ctx->lowmem = ctx->lowmem_limit; 469 ctx->highmem = memsize - ctx->lowmem_limit; 470 objsize = 4*GB + ctx->highmem; 471 } else { 472 ctx->lowmem = memsize; 473 ctx->highmem = 0; 474 objsize = ctx->lowmem; 475 } 476 477 #ifdef __FreeBSD__ 478 error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); 479 if (error) 480 return (error); 481 #endif 482 483 /* 484 * Stake out a contiguous region covering the guest physical memory 485 * and the adjoining guard regions. 486 */ 487 len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; 488 ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); 489 if (ptr == MAP_FAILED) 490 return (-1); 491 492 baseaddr = ptr + VM_MMAP_GUARD_SIZE; 493 494 #ifdef __FreeBSD__ 495 if (ctx->highmem > 0) { 496 gpa = 4*GB; 497 len = ctx->highmem; 498 error = setup_memory_segment(ctx, gpa, len, baseaddr); 499 if (error) 500 return (error); 501 } 502 503 if (ctx->lowmem > 0) { 504 gpa = 0; 505 len = ctx->lowmem; 506 error = setup_memory_segment(ctx, gpa, len, baseaddr); 507 if (error) 508 return (error); 509 } 510 #else 511 if (ctx->highmem > 0) { 512 error = vm_alloc_memseg(ctx, VM_HIGHMEM, ctx->highmem, NULL); 513 if (error) 514 return (error); 515 gpa = 4*GB; 516 len = ctx->highmem; 517 error = setup_memory_segment(ctx, VM_HIGHMEM, gpa, len, baseaddr); 518 if (error) 519 return (error); 520 } 521 522 if (ctx->lowmem > 0) { 523 error = vm_alloc_memseg(ctx, VM_LOWMEM, ctx->lowmem, NULL); 524 if (error) 525 return (error); 526 gpa = 0; 527 len = ctx->lowmem; 528 error = setup_memory_segment(ctx, VM_LOWMEM, gpa, len, baseaddr); 529 if (error) 530 return (error); 531 } 532 #endif 533 534 ctx->baseaddr = baseaddr; 535 536 return (0); 537 } 538 539 /* 540 * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in 541 * the lowmem or highmem regions. 542 * 543 * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region. 544 * The instruction emulation code depends on this behavior. 545 */ 546 void * 547 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) 548 { 549 550 if (ctx->lowmem > 0) { 551 if (gaddr < ctx->lowmem && len <= ctx->lowmem && 552 gaddr + len <= ctx->lowmem) 553 return (ctx->baseaddr + gaddr); 554 } 555 556 if (ctx->highmem > 0) { 557 if (gaddr >= 4*GB) { 558 if (gaddr < 4*GB + ctx->highmem && 559 len <= ctx->highmem && 560 gaddr + len <= 4*GB + ctx->highmem) 561 return (ctx->baseaddr + gaddr); 562 } 563 } 564 565 return (NULL); 566 } 567 568 size_t 569 vm_get_lowmem_size(struct vmctx *ctx) 570 { 571 572 return (ctx->lowmem); 573 } 574 575 size_t 576 vm_get_highmem_size(struct vmctx *ctx) 577 { 578 579 return (ctx->highmem); 580 } 581 582 #ifndef __FreeBSD__ 583 int 584 vm_get_devmem_offset(struct vmctx *ctx, int segid, off_t *mapoff) 585 { 586 struct vm_devmem_offset vdo; 587 int error; 588 589 vdo.segid = segid; 590 error = ioctl(ctx->fd, VM_DEVMEM_GETOFFSET, &vdo); 591 if (error == 0) 592 *mapoff = vdo.offset; 593 594 return (error); 595 } 596 #endif 597 598 void * 599 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) 600 { 601 #ifdef __FreeBSD__ 602 char pathname[MAXPATHLEN]; 603 #endif 604 size_t len2; 605 char *base, *ptr; 606 int fd, error, flags; 607 off_t mapoff; 608 609 fd = -1; 610 ptr = MAP_FAILED; 611 if (name == NULL || strlen(name) == 0) { 612 errno = EINVAL; 613 goto done; 614 } 615 616 error = vm_alloc_memseg(ctx, segid, len, name); 617 if (error) 618 goto done; 619 620 #ifdef __FreeBSD__ 621 strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname)); 622 strlcat(pathname, ctx->name, sizeof(pathname)); 623 strlcat(pathname, ".", sizeof(pathname)); 624 strlcat(pathname, name, sizeof(pathname)); 625 626 fd = open(pathname, O_RDWR); 627 if (fd < 0) 628 goto done; 629 #else 630 if (vm_get_devmem_offset(ctx, segid, &mapoff) != 0) 631 goto done; 632 #endif 633 634 /* 635 * Stake out a contiguous region covering the device memory and the 636 * adjoining guard regions. 637 */ 638 len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; 639 base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 640 0); 641 if (base == MAP_FAILED) 642 goto done; 643 644 flags = MAP_SHARED | MAP_FIXED; 645 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 646 flags |= MAP_NOCORE; 647 648 #ifdef __FreeBSD__ 649 /* mmap the devmem region in the host address space */ 650 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); 651 #else 652 /* mmap the devmem region in the host address space */ 653 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, ctx->fd, 654 mapoff); 655 #endif 656 done: 657 if (fd >= 0) 658 close(fd); 659 return (ptr); 660 } 661 662 int 663 vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 664 uint64_t base, uint32_t limit, uint32_t access) 665 { 666 int error; 667 struct vm_seg_desc vmsegdesc; 668 669 bzero(&vmsegdesc, sizeof(vmsegdesc)); 670 vmsegdesc.cpuid = vcpu; 671 vmsegdesc.regnum = reg; 672 vmsegdesc.desc.base = base; 673 vmsegdesc.desc.limit = limit; 674 vmsegdesc.desc.access = access; 675 676 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 677 return (error); 678 } 679 680 int 681 vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 682 uint64_t *base, uint32_t *limit, uint32_t *access) 683 { 684 int error; 685 struct vm_seg_desc vmsegdesc; 686 687 bzero(&vmsegdesc, sizeof(vmsegdesc)); 688 vmsegdesc.cpuid = vcpu; 689 vmsegdesc.regnum = reg; 690 691 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 692 if (error == 0) { 693 *base = vmsegdesc.desc.base; 694 *limit = vmsegdesc.desc.limit; 695 *access = vmsegdesc.desc.access; 696 } 697 return (error); 698 } 699 700 int 701 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) 702 { 703 int error; 704 705 error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, 706 &seg_desc->access); 707 return (error); 708 } 709 710 int 711 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 712 { 713 int error; 714 struct vm_register vmreg; 715 716 bzero(&vmreg, sizeof(vmreg)); 717 vmreg.cpuid = vcpu; 718 vmreg.regnum = reg; 719 vmreg.regval = val; 720 721 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 722 return (error); 723 } 724 725 int 726 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 727 { 728 int error; 729 struct vm_register vmreg; 730 731 bzero(&vmreg, sizeof(vmreg)); 732 vmreg.cpuid = vcpu; 733 vmreg.regnum = reg; 734 735 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 736 *ret_val = vmreg.regval; 737 return (error); 738 } 739 740 int 741 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count, 742 const int *regnums, uint64_t *regvals) 743 { 744 int error; 745 struct vm_register_set vmregset; 746 747 bzero(&vmregset, sizeof(vmregset)); 748 vmregset.cpuid = vcpu; 749 vmregset.count = count; 750 vmregset.regnums = regnums; 751 vmregset.regvals = regvals; 752 753 error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset); 754 return (error); 755 } 756 757 int 758 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, 759 const int *regnums, uint64_t *regvals) 760 { 761 int error; 762 struct vm_register_set vmregset; 763 764 bzero(&vmregset, sizeof(vmregset)); 765 vmregset.cpuid = vcpu; 766 vmregset.count = count; 767 vmregset.regnums = regnums; 768 vmregset.regvals = regvals; 769 770 error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset); 771 return (error); 772 } 773 774 int 775 vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) 776 { 777 int error; 778 struct vm_run vmrun; 779 780 bzero(&vmrun, sizeof(vmrun)); 781 vmrun.cpuid = vcpu; 782 783 error = ioctl(ctx->fd, VM_RUN, &vmrun); 784 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 785 return (error); 786 } 787 788 int 789 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) 790 { 791 struct vm_suspend vmsuspend; 792 793 bzero(&vmsuspend, sizeof(vmsuspend)); 794 vmsuspend.how = how; 795 return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); 796 } 797 798 int 799 vm_reinit(struct vmctx *ctx) 800 { 801 802 return (ioctl(ctx->fd, VM_REINIT, 0)); 803 } 804 805 int 806 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid, 807 uint32_t errcode, int restart_instruction) 808 { 809 struct vm_exception exc; 810 811 exc.cpuid = vcpu; 812 exc.vector = vector; 813 exc.error_code = errcode; 814 exc.error_code_valid = errcode_valid; 815 exc.restart_instruction = restart_instruction; 816 817 return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc)); 818 } 819 820 #ifndef __FreeBSD__ 821 void 822 vm_inject_fault(struct vmctx *ctx, int vcpu, int vector, int errcode_valid, 823 int errcode) 824 { 825 int error; 826 struct vm_exception exc; 827 828 exc.cpuid = vcpu; 829 exc.vector = vector; 830 exc.error_code = errcode; 831 exc.error_code_valid = errcode_valid; 832 exc.restart_instruction = 1; 833 error = ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc); 834 835 assert(error == 0); 836 } 837 #endif /* __FreeBSD__ */ 838 839 int 840 vm_apicid2vcpu(struct vmctx *ctx, int apicid) 841 { 842 /* 843 * The apic id associated with the 'vcpu' has the same numerical value 844 * as the 'vcpu' itself. 845 */ 846 return (apicid); 847 } 848 849 int 850 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 851 { 852 struct vm_lapic_irq vmirq; 853 854 bzero(&vmirq, sizeof(vmirq)); 855 vmirq.cpuid = vcpu; 856 vmirq.vector = vector; 857 858 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 859 } 860 861 int 862 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector) 863 { 864 struct vm_lapic_irq vmirq; 865 866 bzero(&vmirq, sizeof(vmirq)); 867 vmirq.cpuid = vcpu; 868 vmirq.vector = vector; 869 870 return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq)); 871 } 872 873 int 874 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) 875 { 876 struct vm_lapic_msi vmmsi; 877 878 bzero(&vmmsi, sizeof(vmmsi)); 879 vmmsi.addr = addr; 880 vmmsi.msg = msg; 881 882 return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); 883 } 884 885 int 886 vm_ioapic_assert_irq(struct vmctx *ctx, int irq) 887 { 888 struct vm_ioapic_irq ioapic_irq; 889 890 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 891 ioapic_irq.irq = irq; 892 893 return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); 894 } 895 896 int 897 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) 898 { 899 struct vm_ioapic_irq ioapic_irq; 900 901 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 902 ioapic_irq.irq = irq; 903 904 return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); 905 } 906 907 int 908 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) 909 { 910 struct vm_ioapic_irq ioapic_irq; 911 912 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 913 ioapic_irq.irq = irq; 914 915 return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); 916 } 917 918 int 919 vm_ioapic_pincount(struct vmctx *ctx, int *pincount) 920 { 921 922 return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); 923 } 924 925 int 926 vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa, 927 bool write, int size, uint64_t *value) 928 { 929 struct vm_readwrite_kernemu_device irp = { 930 .vcpuid = vcpu, 931 .access_width = fls(size) - 1, 932 .gpa = gpa, 933 .value = write ? *value : ~0ul, 934 }; 935 long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); 936 int rc; 937 938 rc = ioctl(ctx->fd, cmd, &irp); 939 if (rc == 0 && !write) 940 *value = irp.value; 941 return (rc); 942 } 943 944 int 945 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 946 { 947 struct vm_isa_irq isa_irq; 948 949 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 950 isa_irq.atpic_irq = atpic_irq; 951 isa_irq.ioapic_irq = ioapic_irq; 952 953 return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); 954 } 955 956 int 957 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 958 { 959 struct vm_isa_irq isa_irq; 960 961 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 962 isa_irq.atpic_irq = atpic_irq; 963 isa_irq.ioapic_irq = ioapic_irq; 964 965 return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); 966 } 967 968 int 969 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 970 { 971 struct vm_isa_irq isa_irq; 972 973 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 974 isa_irq.atpic_irq = atpic_irq; 975 isa_irq.ioapic_irq = ioapic_irq; 976 977 return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); 978 } 979 980 int 981 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, 982 enum vm_intr_trigger trigger) 983 { 984 struct vm_isa_irq_trigger isa_irq_trigger; 985 986 bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); 987 isa_irq_trigger.atpic_irq = atpic_irq; 988 isa_irq_trigger.trigger = trigger; 989 990 return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); 991 } 992 993 int 994 vm_inject_nmi(struct vmctx *ctx, int vcpu) 995 { 996 struct vm_nmi vmnmi; 997 998 bzero(&vmnmi, sizeof(vmnmi)); 999 vmnmi.cpuid = vcpu; 1000 1001 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 1002 } 1003 1004 static const char *capstrmap[] = { 1005 [VM_CAP_HALT_EXIT] = "hlt_exit", 1006 [VM_CAP_MTRAP_EXIT] = "mtrap_exit", 1007 [VM_CAP_PAUSE_EXIT] = "pause_exit", 1008 #ifdef __FreeBSD__ 1009 [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", 1010 #endif 1011 [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", 1012 [VM_CAP_BPT_EXIT] = "bpt_exit", 1013 }; 1014 1015 int 1016 vm_capability_name2type(const char *capname) 1017 { 1018 int i; 1019 1020 for (i = 0; i < nitems(capstrmap); i++) { 1021 if (strcmp(capstrmap[i], capname) == 0) 1022 return (i); 1023 } 1024 1025 return (-1); 1026 } 1027 1028 const char * 1029 vm_capability_type2name(int type) 1030 { 1031 if (type >= 0 && type < nitems(capstrmap)) 1032 return (capstrmap[type]); 1033 1034 return (NULL); 1035 } 1036 1037 int 1038 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 1039 int *retval) 1040 { 1041 int error; 1042 struct vm_capability vmcap; 1043 1044 bzero(&vmcap, sizeof(vmcap)); 1045 vmcap.cpuid = vcpu; 1046 vmcap.captype = cap; 1047 1048 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 1049 *retval = vmcap.capval; 1050 return (error); 1051 } 1052 1053 int 1054 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 1055 { 1056 struct vm_capability vmcap; 1057 1058 bzero(&vmcap, sizeof(vmcap)); 1059 vmcap.cpuid = vcpu; 1060 vmcap.captype = cap; 1061 vmcap.capval = val; 1062 1063 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 1064 } 1065 1066 #ifdef __FreeBSD__ 1067 int 1068 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1069 { 1070 struct vm_pptdev pptdev; 1071 1072 bzero(&pptdev, sizeof(pptdev)); 1073 pptdev.bus = bus; 1074 pptdev.slot = slot; 1075 pptdev.func = func; 1076 1077 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1078 } 1079 1080 int 1081 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1082 { 1083 struct vm_pptdev pptdev; 1084 1085 bzero(&pptdev, sizeof(pptdev)); 1086 pptdev.bus = bus; 1087 pptdev.slot = slot; 1088 pptdev.func = func; 1089 1090 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1091 } 1092 1093 int 1094 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1095 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 1096 { 1097 struct vm_pptdev_mmio pptmmio; 1098 1099 bzero(&pptmmio, sizeof(pptmmio)); 1100 pptmmio.bus = bus; 1101 pptmmio.slot = slot; 1102 pptmmio.func = func; 1103 pptmmio.gpa = gpa; 1104 pptmmio.len = len; 1105 pptmmio.hpa = hpa; 1106 1107 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1108 } 1109 1110 int 1111 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 1112 uint64_t addr, uint64_t msg, int numvec) 1113 { 1114 struct vm_pptdev_msi pptmsi; 1115 1116 bzero(&pptmsi, sizeof(pptmsi)); 1117 pptmsi.vcpu = vcpu; 1118 pptmsi.bus = bus; 1119 pptmsi.slot = slot; 1120 pptmsi.func = func; 1121 pptmsi.msg = msg; 1122 pptmsi.addr = addr; 1123 pptmsi.numvec = numvec; 1124 1125 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1126 } 1127 1128 int 1129 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 1130 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 1131 { 1132 struct vm_pptdev_msix pptmsix; 1133 1134 bzero(&pptmsix, sizeof(pptmsix)); 1135 pptmsix.vcpu = vcpu; 1136 pptmsix.bus = bus; 1137 pptmsix.slot = slot; 1138 pptmsix.func = func; 1139 pptmsix.idx = idx; 1140 pptmsix.msg = msg; 1141 pptmsix.addr = addr; 1142 pptmsix.vector_control = vector_control; 1143 1144 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1145 } 1146 1147 int 1148 vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, 1149 int *msi_limit, int *msix_limit) 1150 { 1151 struct vm_pptdev_limits pptlimits; 1152 int error; 1153 1154 bzero(&pptlimits, sizeof (pptlimits)); 1155 pptlimits.bus = bus; 1156 pptlimits.slot = slot; 1157 pptlimits.func = func; 1158 1159 error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); 1160 1161 *msi_limit = pptlimits.msi_limit; 1162 *msix_limit = pptlimits.msix_limit; 1163 1164 return (error); 1165 } 1166 #else /* __FreeBSD__ */ 1167 int 1168 vm_assign_pptdev(struct vmctx *ctx, int pptfd) 1169 { 1170 struct vm_pptdev pptdev; 1171 1172 pptdev.pptfd = pptfd; 1173 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1174 } 1175 1176 int 1177 vm_unassign_pptdev(struct vmctx *ctx, int pptfd) 1178 { 1179 struct vm_pptdev pptdev; 1180 1181 pptdev.pptfd = pptfd; 1182 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1183 } 1184 1185 int 1186 vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len, 1187 vm_paddr_t hpa) 1188 { 1189 struct vm_pptdev_mmio pptmmio; 1190 1191 pptmmio.pptfd = pptfd; 1192 pptmmio.gpa = gpa; 1193 pptmmio.len = len; 1194 pptmmio.hpa = hpa; 1195 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1196 } 1197 1198 int 1199 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr, 1200 uint64_t msg, int numvec) 1201 { 1202 struct vm_pptdev_msi pptmsi; 1203 1204 pptmsi.vcpu = vcpu; 1205 pptmsi.pptfd = pptfd; 1206 pptmsi.msg = msg; 1207 pptmsi.addr = addr; 1208 pptmsi.numvec = numvec; 1209 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1210 } 1211 1212 int 1213 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx, 1214 uint64_t addr, uint64_t msg, uint32_t vector_control) 1215 { 1216 struct vm_pptdev_msix pptmsix; 1217 1218 pptmsix.vcpu = vcpu; 1219 pptmsix.pptfd = pptfd; 1220 pptmsix.idx = idx; 1221 pptmsix.msg = msg; 1222 pptmsix.addr = addr; 1223 pptmsix.vector_control = vector_control; 1224 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1225 } 1226 1227 int 1228 vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, 1229 int *msix_limit) 1230 { 1231 struct vm_pptdev_limits pptlimits; 1232 int error; 1233 1234 bzero(&pptlimits, sizeof (pptlimits)); 1235 pptlimits.pptfd = pptfd; 1236 error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); 1237 1238 *msi_limit = pptlimits.msi_limit; 1239 *msix_limit = pptlimits.msix_limit; 1240 return (error); 1241 } 1242 #endif /* __FreeBSD__ */ 1243 1244 uint64_t * 1245 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 1246 int *ret_entries) 1247 { 1248 int error; 1249 1250 static struct vm_stats vmstats; 1251 1252 vmstats.cpuid = vcpu; 1253 1254 error = ioctl(ctx->fd, VM_STATS_IOC, &vmstats); 1255 if (error == 0) { 1256 if (ret_entries) 1257 *ret_entries = vmstats.num_entries; 1258 if (ret_tv) 1259 *ret_tv = vmstats.tv; 1260 return (vmstats.statbuf); 1261 } else 1262 return (NULL); 1263 } 1264 1265 const char * 1266 vm_get_stat_desc(struct vmctx *ctx, int index) 1267 { 1268 static struct vm_stat_desc statdesc; 1269 1270 statdesc.index = index; 1271 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 1272 return (statdesc.desc); 1273 else 1274 return (NULL); 1275 } 1276 1277 int 1278 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) 1279 { 1280 int error; 1281 struct vm_x2apic x2apic; 1282 1283 bzero(&x2apic, sizeof(x2apic)); 1284 x2apic.cpuid = vcpu; 1285 1286 error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); 1287 *state = x2apic.state; 1288 return (error); 1289 } 1290 1291 int 1292 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) 1293 { 1294 int error; 1295 struct vm_x2apic x2apic; 1296 1297 bzero(&x2apic, sizeof(x2apic)); 1298 x2apic.cpuid = vcpu; 1299 x2apic.state = state; 1300 1301 error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); 1302 1303 return (error); 1304 } 1305 1306 /* 1307 * From Intel Vol 3a: 1308 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 1309 */ 1310 int 1311 vcpu_reset(struct vmctx *vmctx, int vcpu) 1312 { 1313 int error; 1314 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 1315 uint32_t desc_access, desc_limit; 1316 uint16_t sel; 1317 1318 zero = 0; 1319 1320 rflags = 0x2; 1321 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 1322 if (error) 1323 goto done; 1324 1325 rip = 0xfff0; 1326 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 1327 goto done; 1328 1329 cr0 = CR0_NE; 1330 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 1331 goto done; 1332 1333 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 1334 goto done; 1335 1336 cr4 = 0; 1337 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 1338 goto done; 1339 1340 /* 1341 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 1342 */ 1343 desc_base = 0xffff0000; 1344 desc_limit = 0xffff; 1345 desc_access = 0x0093; 1346 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 1347 desc_base, desc_limit, desc_access); 1348 if (error) 1349 goto done; 1350 1351 sel = 0xf000; 1352 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 1353 goto done; 1354 1355 /* 1356 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 1357 */ 1358 desc_base = 0; 1359 desc_limit = 0xffff; 1360 desc_access = 0x0093; 1361 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 1362 desc_base, desc_limit, desc_access); 1363 if (error) 1364 goto done; 1365 1366 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 1367 desc_base, desc_limit, desc_access); 1368 if (error) 1369 goto done; 1370 1371 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 1372 desc_base, desc_limit, desc_access); 1373 if (error) 1374 goto done; 1375 1376 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 1377 desc_base, desc_limit, desc_access); 1378 if (error) 1379 goto done; 1380 1381 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 1382 desc_base, desc_limit, desc_access); 1383 if (error) 1384 goto done; 1385 1386 sel = 0; 1387 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 1388 goto done; 1389 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 1390 goto done; 1391 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 1392 goto done; 1393 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 1394 goto done; 1395 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 1396 goto done; 1397 1398 /* General purpose registers */ 1399 rdx = 0xf00; 1400 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 1401 goto done; 1402 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 1403 goto done; 1404 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 1405 goto done; 1406 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 1407 goto done; 1408 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 1409 goto done; 1410 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 1411 goto done; 1412 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 1413 goto done; 1414 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 1415 goto done; 1416 1417 /* GDTR, IDTR */ 1418 desc_base = 0; 1419 desc_limit = 0xffff; 1420 desc_access = 0; 1421 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 1422 desc_base, desc_limit, desc_access); 1423 if (error != 0) 1424 goto done; 1425 1426 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 1427 desc_base, desc_limit, desc_access); 1428 if (error != 0) 1429 goto done; 1430 1431 /* TR */ 1432 desc_base = 0; 1433 desc_limit = 0xffff; 1434 desc_access = 0x0000008b; 1435 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 1436 if (error) 1437 goto done; 1438 1439 sel = 0; 1440 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 1441 goto done; 1442 1443 /* LDTR */ 1444 desc_base = 0; 1445 desc_limit = 0xffff; 1446 desc_access = 0x00000082; 1447 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 1448 desc_limit, desc_access); 1449 if (error) 1450 goto done; 1451 1452 sel = 0; 1453 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 1454 goto done; 1455 1456 /* XXX cr2, debug registers */ 1457 1458 error = 0; 1459 done: 1460 return (error); 1461 } 1462 1463 int 1464 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) 1465 { 1466 int error, i; 1467 struct vm_gpa_pte gpapte; 1468 1469 bzero(&gpapte, sizeof(gpapte)); 1470 gpapte.gpa = gpa; 1471 1472 error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); 1473 1474 if (error == 0) { 1475 *num = gpapte.ptenum; 1476 for (i = 0; i < gpapte.ptenum; i++) 1477 pte[i] = gpapte.pte[i]; 1478 } 1479 1480 return (error); 1481 } 1482 1483 int 1484 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) 1485 { 1486 int error; 1487 struct vm_hpet_cap cap; 1488 1489 bzero(&cap, sizeof(struct vm_hpet_cap)); 1490 error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); 1491 if (capabilities != NULL) 1492 *capabilities = cap.capabilities; 1493 return (error); 1494 } 1495 1496 int 1497 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1498 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1499 { 1500 struct vm_gla2gpa gg; 1501 int error; 1502 1503 bzero(&gg, sizeof(struct vm_gla2gpa)); 1504 gg.vcpuid = vcpu; 1505 gg.prot = prot; 1506 gg.gla = gla; 1507 gg.paging = *paging; 1508 1509 error = ioctl(ctx->fd, VM_GLA2GPA, &gg); 1510 if (error == 0) { 1511 *fault = gg.fault; 1512 *gpa = gg.gpa; 1513 } 1514 return (error); 1515 } 1516 1517 int 1518 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1519 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1520 { 1521 struct vm_gla2gpa gg; 1522 int error; 1523 1524 bzero(&gg, sizeof(struct vm_gla2gpa)); 1525 gg.vcpuid = vcpu; 1526 gg.prot = prot; 1527 gg.gla = gla; 1528 gg.paging = *paging; 1529 1530 error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg); 1531 if (error == 0) { 1532 *fault = gg.fault; 1533 *gpa = gg.gpa; 1534 } 1535 return (error); 1536 } 1537 1538 #ifndef min 1539 #define min(a,b) (((a) < (b)) ? (a) : (b)) 1540 #endif 1541 1542 int 1543 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 1544 uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, 1545 int *fault) 1546 { 1547 void *va; 1548 uint64_t gpa; 1549 int error, i, n, off; 1550 1551 for (i = 0; i < iovcnt; i++) { 1552 iov[i].iov_base = 0; 1553 iov[i].iov_len = 0; 1554 } 1555 1556 while (len) { 1557 assert(iovcnt > 0); 1558 error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault); 1559 if (error || *fault) 1560 return (error); 1561 1562 off = gpa & PAGE_MASK; 1563 n = min(len, PAGE_SIZE - off); 1564 1565 va = vm_map_gpa(ctx, gpa, n); 1566 if (va == NULL) 1567 return (EFAULT); 1568 1569 iov->iov_base = va; 1570 iov->iov_len = n; 1571 iov++; 1572 iovcnt--; 1573 1574 gla += n; 1575 len -= n; 1576 } 1577 return (0); 1578 } 1579 1580 void 1581 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt) 1582 { 1583 1584 return; 1585 } 1586 1587 void 1588 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) 1589 { 1590 const char *src; 1591 char *dst; 1592 size_t n; 1593 1594 dst = vp; 1595 while (len) { 1596 assert(iov->iov_len); 1597 n = min(len, iov->iov_len); 1598 src = iov->iov_base; 1599 bcopy(src, dst, n); 1600 1601 iov++; 1602 dst += n; 1603 len -= n; 1604 } 1605 } 1606 1607 void 1608 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, 1609 size_t len) 1610 { 1611 const char *src; 1612 char *dst; 1613 size_t n; 1614 1615 src = vp; 1616 while (len) { 1617 assert(iov->iov_len); 1618 n = min(len, iov->iov_len); 1619 dst = iov->iov_base; 1620 bcopy(src, dst, n); 1621 1622 iov++; 1623 src += n; 1624 len -= n; 1625 } 1626 } 1627 1628 static int 1629 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) 1630 { 1631 struct vm_cpuset vm_cpuset; 1632 int error; 1633 1634 bzero(&vm_cpuset, sizeof(struct vm_cpuset)); 1635 vm_cpuset.which = which; 1636 vm_cpuset.cpusetsize = sizeof(cpuset_t); 1637 vm_cpuset.cpus = cpus; 1638 1639 error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); 1640 return (error); 1641 } 1642 1643 int 1644 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) 1645 { 1646 1647 return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); 1648 } 1649 1650 int 1651 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) 1652 { 1653 1654 return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); 1655 } 1656 1657 int 1658 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) 1659 { 1660 1661 return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); 1662 } 1663 1664 int 1665 vm_activate_cpu(struct vmctx *ctx, int vcpu) 1666 { 1667 struct vm_activate_cpu ac; 1668 int error; 1669 1670 bzero(&ac, sizeof(struct vm_activate_cpu)); 1671 ac.vcpuid = vcpu; 1672 error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); 1673 return (error); 1674 } 1675 1676 int 1677 vm_suspend_cpu(struct vmctx *ctx, int vcpu) 1678 { 1679 struct vm_activate_cpu ac; 1680 int error; 1681 1682 bzero(&ac, sizeof(struct vm_activate_cpu)); 1683 ac.vcpuid = vcpu; 1684 error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); 1685 return (error); 1686 } 1687 1688 int 1689 vm_resume_cpu(struct vmctx *ctx, int vcpu) 1690 { 1691 struct vm_activate_cpu ac; 1692 int error; 1693 1694 bzero(&ac, sizeof(struct vm_activate_cpu)); 1695 ac.vcpuid = vcpu; 1696 error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); 1697 return (error); 1698 } 1699 1700 int 1701 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) 1702 { 1703 struct vm_intinfo vmii; 1704 int error; 1705 1706 bzero(&vmii, sizeof(struct vm_intinfo)); 1707 vmii.vcpuid = vcpu; 1708 error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); 1709 if (error == 0) { 1710 *info1 = vmii.info1; 1711 *info2 = vmii.info2; 1712 } 1713 return (error); 1714 } 1715 1716 int 1717 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) 1718 { 1719 struct vm_intinfo vmii; 1720 int error; 1721 1722 bzero(&vmii, sizeof(struct vm_intinfo)); 1723 vmii.vcpuid = vcpu; 1724 vmii.info1 = info1; 1725 error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); 1726 return (error); 1727 } 1728 1729 int 1730 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) 1731 { 1732 struct vm_rtc_data rtcdata; 1733 int error; 1734 1735 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1736 rtcdata.offset = offset; 1737 rtcdata.value = value; 1738 error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); 1739 return (error); 1740 } 1741 1742 int 1743 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) 1744 { 1745 struct vm_rtc_data rtcdata; 1746 int error; 1747 1748 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1749 rtcdata.offset = offset; 1750 error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); 1751 if (error == 0) 1752 *retval = rtcdata.value; 1753 return (error); 1754 } 1755 1756 int 1757 vm_rtc_settime(struct vmctx *ctx, time_t secs) 1758 { 1759 struct vm_rtc_time rtctime; 1760 int error; 1761 1762 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1763 rtctime.secs = secs; 1764 error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); 1765 return (error); 1766 } 1767 1768 int 1769 vm_rtc_gettime(struct vmctx *ctx, time_t *secs) 1770 { 1771 struct vm_rtc_time rtctime; 1772 int error; 1773 1774 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1775 error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); 1776 if (error == 0) 1777 *secs = rtctime.secs; 1778 return (error); 1779 } 1780 1781 int 1782 vm_restart_instruction(void *arg, int vcpu) 1783 { 1784 struct vmctx *ctx = arg; 1785 1786 return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu)); 1787 } 1788 1789 int 1790 vm_set_topology(struct vmctx *ctx, 1791 uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) 1792 { 1793 struct vm_cpu_topology topology; 1794 1795 bzero(&topology, sizeof (struct vm_cpu_topology)); 1796 topology.sockets = sockets; 1797 topology.cores = cores; 1798 topology.threads = threads; 1799 topology.maxcpus = maxcpus; 1800 return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); 1801 } 1802 1803 int 1804 vm_get_topology(struct vmctx *ctx, 1805 uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) 1806 { 1807 struct vm_cpu_topology topology; 1808 int error; 1809 1810 bzero(&topology, sizeof (struct vm_cpu_topology)); 1811 error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); 1812 if (error == 0) { 1813 *sockets = topology.sockets; 1814 *cores = topology.cores; 1815 *threads = topology.threads; 1816 *maxcpus = topology.maxcpus; 1817 } 1818 return (error); 1819 } 1820 1821 int 1822 vm_get_device_fd(struct vmctx *ctx) 1823 { 1824 1825 return (ctx->fd); 1826 } 1827 1828 #ifndef __FreeBSD__ 1829 int 1830 vm_wrlock_cycle(struct vmctx *ctx) 1831 { 1832 if (ioctl(ctx->fd, VM_WRLOCK_CYCLE, 0) != 0) { 1833 return (errno); 1834 } 1835 return (0); 1836 } 1837 #endif /* __FreeBSD__ */ 1838 1839 #ifdef __FreeBSD__ 1840 const cap_ioctl_t * 1841 vm_get_ioctls(size_t *len) 1842 { 1843 cap_ioctl_t *cmds; 1844 /* keep in sync with machine/vmm_dev.h */ 1845 static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, 1846 VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, 1847 VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER, 1848 VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, 1849 VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, 1850 VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, 1851 VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, 1852 VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, 1853 VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, 1854 VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, 1855 VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, 1856 VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, 1857 VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, 1858 VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, 1859 VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, 1860 VM_GLA2GPA_NOFAULT, 1861 VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, 1862 VM_SET_INTINFO, VM_GET_INTINFO, 1863 VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, 1864 VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; 1865 1866 if (len == NULL) { 1867 cmds = malloc(sizeof(vm_ioctl_cmds)); 1868 if (cmds == NULL) 1869 return (NULL); 1870 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); 1871 return (cmds); 1872 } 1873 1874 *len = nitems(vm_ioctl_cmds); 1875 return (NULL); 1876 } 1877 #endif /* __FreeBSD__ */ 1878