1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2015 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2022 Oxide Computer Company 41 */ 42 43 #include <sys/cdefs.h> 44 45 #include <sys/param.h> 46 #include <sys/sysctl.h> 47 #include <sys/ioctl.h> 48 #include <sys/mman.h> 49 #include <sys/module.h> 50 #include <sys/_iovec.h> 51 #include <sys/cpuset.h> 52 53 #include <errno.h> 54 #include <stdbool.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <assert.h> 58 #include <string.h> 59 #include <fcntl.h> 60 #include <unistd.h> 61 62 #include <libutil.h> 63 64 #include <machine/vmm.h> 65 #include <machine/vmm_dev.h> 66 67 #include "vmmapi.h" 68 #include "internal.h" 69 70 #define MB (1024 * 1024UL) 71 #define GB (1024 * 1024 * 1024UL) 72 73 #define VM_LOWMEM_LIMIT (3 * GB) 74 #define VM_HIGHMEM_BASE (4 * GB) 75 76 #ifndef __FreeBSD__ 77 /* shim to no-op for now */ 78 #define MAP_NOCORE 0 79 #define MAP_ALIGNED_SUPER 0 80 81 /* Rely on PROT_NONE for guard purposes */ 82 #define MAP_GUARD (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE) 83 84 #define _Thread_local __thread 85 #endif 86 87 /* 88 * Size of the guard region before and after the virtual address space 89 * mapping the guest physical memory. This must be a multiple of the 90 * superpage size for performance reasons. 91 */ 92 #define VM_MMAP_GUARD_SIZE (4 * MB) 93 94 #define PROT_RW (PROT_READ | PROT_WRITE) 95 #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) 96 97 struct vmctx { 98 int fd; 99 uint32_t lowmem_limit; 100 int memflags; 101 size_t lowmem; 102 size_t highmem; 103 char *baseaddr; 104 char *name; 105 }; 106 107 #ifdef __FreeBSD__ 108 #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 109 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 110 #endif 111 112 static int 113 vm_device_open(const char *name) 114 { 115 int fd, len; 116 char *vmfile; 117 118 len = strlen("/dev/vmm/") + strlen(name) + 1; 119 vmfile = malloc(len); 120 assert(vmfile != NULL); 121 snprintf(vmfile, len, "/dev/vmm/%s", name); 122 123 /* Open the device file */ 124 fd = open(vmfile, O_RDWR, 0); 125 126 free(vmfile); 127 return (fd); 128 } 129 130 #ifdef __FreeBSD__ 131 int 132 vm_create(const char *name) 133 { 134 /* Try to load vmm(4) module before creating a guest. */ 135 if (modfind("vmm") < 0) 136 kldload("vmm"); 137 return (CREATE(name)); 138 } 139 #else 140 static int 141 vm_do_ctl(int cmd, void *req) 142 { 143 int ctl_fd; 144 145 ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR); 146 if (ctl_fd < 0) { 147 return (-1); 148 } 149 150 if (ioctl(ctl_fd, cmd, req) == -1) { 151 int err = errno; 152 153 /* Do not lose ioctl errno through the close(2) */ 154 (void) close(ctl_fd); 155 errno = err; 156 return (-1); 157 } 158 (void) close(ctl_fd); 159 160 return (0); 161 } 162 163 int 164 vm_create(const char *name, uint64_t flags) 165 { 166 struct vm_create_req req; 167 168 (void) strncpy(req.name, name, VM_MAX_NAMELEN); 169 req.flags = flags; 170 171 return (vm_do_ctl(VMM_CREATE_VM, &req)); 172 } 173 #endif 174 175 struct vmctx * 176 vm_open(const char *name) 177 { 178 struct vmctx *vm; 179 int saved_errno; 180 181 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 182 assert(vm != NULL); 183 184 vm->fd = -1; 185 vm->memflags = 0; 186 vm->lowmem_limit = 3 * GB; 187 vm->name = (char *)(vm + 1); 188 strcpy(vm->name, name); 189 190 if ((vm->fd = vm_device_open(vm->name)) < 0) 191 goto err; 192 193 return (vm); 194 err: 195 saved_errno = errno; 196 free(vm); 197 errno = saved_errno; 198 return (NULL); 199 } 200 201 #ifdef __FreeBSD__ 202 void 203 vm_close(struct vmctx *vm) 204 { 205 assert(vm != NULL); 206 207 close(vm->fd); 208 free(vm); 209 } 210 211 void 212 vm_destroy(struct vmctx *vm) 213 { 214 assert(vm != NULL); 215 216 if (vm->fd >= 0) 217 close(vm->fd); 218 DESTROY(vm->name); 219 220 free(vm); 221 } 222 #else 223 void 224 vm_close(struct vmctx *vm) 225 { 226 assert(vm != NULL); 227 assert(vm->fd >= 0); 228 229 (void) close(vm->fd); 230 231 free(vm); 232 } 233 234 void 235 vm_destroy(struct vmctx *vm) 236 { 237 assert(vm != NULL); 238 239 if (vm->fd >= 0) { 240 (void) ioctl(vm->fd, VM_DESTROY_SELF, 0); 241 (void) close(vm->fd); 242 vm->fd = -1; 243 } 244 245 free(vm); 246 } 247 #endif 248 249 struct vcpu * 250 vm_vcpu_open(struct vmctx *ctx, int vcpuid) 251 { 252 struct vcpu *vcpu; 253 254 vcpu = malloc(sizeof(*vcpu)); 255 #ifndef __FreeBSD__ 256 if (vcpu == NULL) 257 return (vcpu); 258 #endif 259 vcpu->ctx = ctx; 260 vcpu->vcpuid = vcpuid; 261 return (vcpu); 262 } 263 264 void 265 vm_vcpu_close(struct vcpu *vcpu) 266 { 267 free(vcpu); 268 } 269 270 int 271 vcpu_id(struct vcpu *vcpu) 272 { 273 return (vcpu->vcpuid); 274 } 275 276 struct vmctx * 277 vcpu_ctx(struct vcpu *vcpu) 278 { 279 return (vcpu->ctx); 280 } 281 282 int 283 vm_parse_memsize(const char *opt, size_t *ret_memsize) 284 { 285 char *endptr; 286 size_t optval; 287 int error; 288 289 optval = strtoul(opt, &endptr, 0); 290 if (*opt != '\0' && *endptr == '\0') { 291 /* 292 * For the sake of backward compatibility if the memory size 293 * specified on the command line is less than a megabyte then 294 * it is interpreted as being in units of MB. 295 */ 296 if (optval < MB) 297 optval *= MB; 298 *ret_memsize = optval; 299 error = 0; 300 } else 301 error = expand_number(opt, ret_memsize); 302 303 return (error); 304 } 305 306 uint32_t 307 vm_get_lowmem_limit(struct vmctx *ctx) 308 { 309 310 return (ctx->lowmem_limit); 311 } 312 313 void 314 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) 315 { 316 317 ctx->lowmem_limit = limit; 318 } 319 320 void 321 vm_set_memflags(struct vmctx *ctx, int flags) 322 { 323 324 ctx->memflags = flags; 325 } 326 327 int 328 vm_get_memflags(struct vmctx *ctx) 329 { 330 331 return (ctx->memflags); 332 } 333 334 /* 335 * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). 336 */ 337 int 338 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, 339 size_t len, int prot) 340 { 341 struct vm_memmap memmap; 342 int error, flags; 343 344 memmap.gpa = gpa; 345 memmap.segid = segid; 346 memmap.segoff = off; 347 memmap.len = len; 348 memmap.prot = prot; 349 memmap.flags = 0; 350 351 if (ctx->memflags & VM_MEM_F_WIRED) 352 memmap.flags |= VM_MEMMAP_F_WIRED; 353 354 /* 355 * If this mapping already exists then don't create it again. This 356 * is the common case for SYSMEM mappings created by bhyveload(8). 357 */ 358 error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); 359 if (error == 0 && gpa == memmap.gpa) { 360 if (segid != memmap.segid || off != memmap.segoff || 361 prot != memmap.prot || flags != memmap.flags) { 362 errno = EEXIST; 363 return (-1); 364 } else { 365 return (0); 366 } 367 } 368 369 error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); 370 return (error); 371 } 372 373 #ifdef __FreeBSD__ 374 int 375 vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, 376 size_t *lowmem_size, size_t *highmem_size) 377 { 378 379 *guest_baseaddr = ctx->baseaddr; 380 *lowmem_size = ctx->lowmem; 381 *highmem_size = ctx->highmem; 382 return (0); 383 } 384 #endif 385 386 int 387 vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 388 { 389 struct vm_munmap munmap; 390 int error; 391 392 munmap.gpa = gpa; 393 munmap.len = len; 394 395 error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap); 396 return (error); 397 } 398 399 int 400 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, 401 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 402 { 403 struct vm_memmap memmap; 404 int error; 405 406 bzero(&memmap, sizeof(struct vm_memmap)); 407 memmap.gpa = *gpa; 408 error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); 409 if (error == 0) { 410 *gpa = memmap.gpa; 411 *segid = memmap.segid; 412 *segoff = memmap.segoff; 413 *len = memmap.len; 414 *prot = memmap.prot; 415 *flags = memmap.flags; 416 } 417 return (error); 418 } 419 420 /* 421 * Return 0 if the segments are identical and non-zero otherwise. 422 * 423 * This is slightly complicated by the fact that only device memory segments 424 * are named. 425 */ 426 static int 427 cmpseg(size_t len, const char *str, size_t len2, const char *str2) 428 { 429 430 if (len == len2) { 431 if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) 432 return (0); 433 } 434 return (-1); 435 } 436 437 static int 438 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) 439 { 440 struct vm_memseg memseg; 441 size_t n; 442 int error; 443 444 /* 445 * If the memory segment has already been created then just return. 446 * This is the usual case for the SYSMEM segment created by userspace 447 * loaders like bhyveload(8). 448 */ 449 error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, 450 sizeof(memseg.name)); 451 if (error) 452 return (error); 453 454 if (memseg.len != 0) { 455 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { 456 errno = EINVAL; 457 return (-1); 458 } else { 459 return (0); 460 } 461 } 462 463 bzero(&memseg, sizeof(struct vm_memseg)); 464 memseg.segid = segid; 465 memseg.len = len; 466 if (name != NULL) { 467 n = strlcpy(memseg.name, name, sizeof(memseg.name)); 468 if (n >= sizeof(memseg.name)) { 469 errno = ENAMETOOLONG; 470 return (-1); 471 } 472 } 473 474 error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); 475 return (error); 476 } 477 478 int 479 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, 480 size_t bufsize) 481 { 482 struct vm_memseg memseg; 483 size_t n; 484 int error; 485 486 memseg.segid = segid; 487 error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); 488 if (error == 0) { 489 *lenp = memseg.len; 490 n = strlcpy(namebuf, memseg.name, bufsize); 491 if (n >= bufsize) { 492 errno = ENAMETOOLONG; 493 error = -1; 494 } 495 } 496 return (error); 497 } 498 499 static int 500 #ifdef __FreeBSD__ 501 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) 502 #else 503 setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len, 504 char *base) 505 #endif 506 { 507 char *ptr; 508 int error, flags; 509 510 /* Map 'len' bytes starting at 'gpa' in the guest address space */ 511 #ifdef __FreeBSD__ 512 error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); 513 #else 514 /* 515 * As we use two segments for lowmem/highmem the offset within the 516 * segment is 0 on illumos. 517 */ 518 error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL); 519 #endif 520 if (error) 521 return (error); 522 523 flags = MAP_SHARED | MAP_FIXED; 524 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 525 flags |= MAP_NOCORE; 526 527 /* mmap into the process address space on the host */ 528 ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); 529 if (ptr == MAP_FAILED) 530 return (-1); 531 532 return (0); 533 } 534 535 int 536 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) 537 { 538 size_t objsize, len; 539 vm_paddr_t gpa; 540 char *baseaddr, *ptr; 541 int error; 542 543 assert(vms == VM_MMAP_ALL); 544 545 /* 546 * If 'memsize' cannot fit entirely in the 'lowmem' segment then 547 * create another 'highmem' segment above 4GB for the remainder. 548 */ 549 if (memsize > ctx->lowmem_limit) { 550 ctx->lowmem = ctx->lowmem_limit; 551 ctx->highmem = memsize - ctx->lowmem_limit; 552 objsize = 4*GB + ctx->highmem; 553 } else { 554 ctx->lowmem = memsize; 555 ctx->highmem = 0; 556 objsize = ctx->lowmem; 557 } 558 559 #ifdef __FreeBSD__ 560 error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); 561 if (error) 562 return (error); 563 #endif 564 565 /* 566 * Stake out a contiguous region covering the guest physical memory 567 * and the adjoining guard regions. 568 */ 569 len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; 570 ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); 571 if (ptr == MAP_FAILED) 572 return (-1); 573 574 baseaddr = ptr + VM_MMAP_GUARD_SIZE; 575 576 #ifdef __FreeBSD__ 577 if (ctx->highmem > 0) { 578 gpa = 4*GB; 579 len = ctx->highmem; 580 error = setup_memory_segment(ctx, gpa, len, baseaddr); 581 if (error) 582 return (error); 583 } 584 585 if (ctx->lowmem > 0) { 586 gpa = 0; 587 len = ctx->lowmem; 588 error = setup_memory_segment(ctx, gpa, len, baseaddr); 589 if (error) 590 return (error); 591 } 592 #else 593 if (ctx->highmem > 0) { 594 error = vm_alloc_memseg(ctx, VM_HIGHMEM, ctx->highmem, NULL); 595 if (error) 596 return (error); 597 gpa = 4*GB; 598 len = ctx->highmem; 599 error = setup_memory_segment(ctx, VM_HIGHMEM, gpa, len, baseaddr); 600 if (error) 601 return (error); 602 } 603 604 if (ctx->lowmem > 0) { 605 error = vm_alloc_memseg(ctx, VM_LOWMEM, ctx->lowmem, NULL); 606 if (error) 607 return (error); 608 gpa = 0; 609 len = ctx->lowmem; 610 error = setup_memory_segment(ctx, VM_LOWMEM, gpa, len, baseaddr); 611 if (error) 612 return (error); 613 } 614 #endif 615 616 ctx->baseaddr = baseaddr; 617 618 return (0); 619 } 620 621 /* 622 * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in 623 * the lowmem or highmem regions. 624 * 625 * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region. 626 * The instruction emulation code depends on this behavior. 627 */ 628 void * 629 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) 630 { 631 632 if (ctx->lowmem > 0) { 633 if (gaddr < ctx->lowmem && len <= ctx->lowmem && 634 gaddr + len <= ctx->lowmem) 635 return (ctx->baseaddr + gaddr); 636 } 637 638 if (ctx->highmem > 0) { 639 if (gaddr >= 4*GB) { 640 if (gaddr < 4*GB + ctx->highmem && 641 len <= ctx->highmem && 642 gaddr + len <= 4*GB + ctx->highmem) 643 return (ctx->baseaddr + gaddr); 644 } 645 } 646 647 return (NULL); 648 } 649 650 #ifdef __FreeBSD__ 651 vm_paddr_t 652 vm_rev_map_gpa(struct vmctx *ctx, void *addr) 653 { 654 vm_paddr_t offaddr; 655 656 offaddr = (char *)addr - ctx->baseaddr; 657 658 if (ctx->lowmem > 0) 659 if (offaddr <= ctx->lowmem) 660 return (offaddr); 661 662 if (ctx->highmem > 0) 663 if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem) 664 return (offaddr); 665 666 return ((vm_paddr_t)-1); 667 } 668 669 const char * 670 vm_get_name(struct vmctx *ctx) 671 { 672 673 return (ctx->name); 674 } 675 #endif /* __FreeBSD__ */ 676 677 size_t 678 vm_get_lowmem_size(struct vmctx *ctx) 679 { 680 681 return (ctx->lowmem); 682 } 683 684 vm_paddr_t 685 vm_get_highmem_base(struct vmctx *ctx __unused) 686 { 687 return (VM_HIGHMEM_BASE); 688 } 689 690 size_t 691 vm_get_highmem_size(struct vmctx *ctx) 692 { 693 694 return (ctx->highmem); 695 } 696 697 #ifndef __FreeBSD__ 698 int 699 vm_get_devmem_offset(struct vmctx *ctx, int segid, off_t *mapoff) 700 { 701 struct vm_devmem_offset vdo; 702 int error; 703 704 vdo.segid = segid; 705 error = ioctl(ctx->fd, VM_DEVMEM_GETOFFSET, &vdo); 706 if (error == 0) 707 *mapoff = vdo.offset; 708 709 return (error); 710 } 711 #endif 712 713 void * 714 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) 715 { 716 #ifdef __FreeBSD__ 717 char pathname[MAXPATHLEN]; 718 #endif 719 size_t len2; 720 char *base, *ptr; 721 int fd, error, flags; 722 off_t mapoff; 723 724 fd = -1; 725 ptr = MAP_FAILED; 726 if (name == NULL || strlen(name) == 0) { 727 errno = EINVAL; 728 goto done; 729 } 730 731 error = vm_alloc_memseg(ctx, segid, len, name); 732 if (error) 733 goto done; 734 735 #ifdef __FreeBSD__ 736 strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname)); 737 strlcat(pathname, ctx->name, sizeof(pathname)); 738 strlcat(pathname, ".", sizeof(pathname)); 739 strlcat(pathname, name, sizeof(pathname)); 740 741 fd = open(pathname, O_RDWR); 742 if (fd < 0) 743 goto done; 744 #else 745 if (vm_get_devmem_offset(ctx, segid, &mapoff) != 0) 746 goto done; 747 #endif 748 749 /* 750 * Stake out a contiguous region covering the device memory and the 751 * adjoining guard regions. 752 */ 753 len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; 754 base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 755 0); 756 if (base == MAP_FAILED) 757 goto done; 758 759 flags = MAP_SHARED | MAP_FIXED; 760 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 761 flags |= MAP_NOCORE; 762 763 #ifdef __FreeBSD__ 764 /* mmap the devmem region in the host address space */ 765 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); 766 #else 767 /* mmap the devmem region in the host address space */ 768 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, ctx->fd, 769 mapoff); 770 #endif 771 done: 772 if (fd >= 0) 773 close(fd); 774 return (ptr); 775 } 776 777 #ifdef __FreeBSD__ 778 static int 779 vcpu_ioctl(struct vcpu *vcpu, u_long cmd, void *arg) 780 { 781 /* 782 * XXX: fragile, handle with care 783 * Assumes that the first field of the ioctl data 784 * is the vcpuid. 785 */ 786 *(int *)arg = vcpu->vcpuid; 787 return (ioctl(vcpu->cfx->fd, cmd, arg)); 788 } 789 #else 790 /* 791 * Rather than use the fragile function above, we continue to explicitly set 792 * the vcpu field in the command struct, and use the following function to 793 * wrap the invocations, to continue to minimise the upstream diff. 794 */ 795 static int 796 vcpu_ioctl(struct vcpu *vcpu, u_long cmd, void *arg) 797 { 798 return (ioctl(vcpu->ctx->fd, cmd, arg)); 799 } 800 #endif 801 802 int 803 vm_set_desc(struct vcpu *vcpu, int reg, 804 uint64_t base, uint32_t limit, uint32_t access) 805 { 806 int error; 807 struct vm_seg_desc vmsegdesc; 808 809 bzero(&vmsegdesc, sizeof(vmsegdesc)); 810 vmsegdesc.cpuid = vcpu->vcpuid; 811 vmsegdesc.regnum = reg; 812 vmsegdesc.desc.base = base; 813 vmsegdesc.desc.limit = limit; 814 vmsegdesc.desc.access = access; 815 816 error = vcpu_ioctl(vcpu, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 817 return (error); 818 } 819 820 int 821 vm_get_desc(struct vcpu *vcpu, int reg, uint64_t *base, uint32_t *limit, 822 uint32_t *access) 823 { 824 int error; 825 struct vm_seg_desc vmsegdesc; 826 827 bzero(&vmsegdesc, sizeof(vmsegdesc)); 828 vmsegdesc.cpuid = vcpu->vcpuid; 829 vmsegdesc.regnum = reg; 830 831 error = vcpu_ioctl(vcpu, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 832 if (error == 0) { 833 *base = vmsegdesc.desc.base; 834 *limit = vmsegdesc.desc.limit; 835 *access = vmsegdesc.desc.access; 836 } 837 return (error); 838 } 839 840 int 841 vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *seg_desc) 842 { 843 int error; 844 845 error = vm_get_desc(vcpu, reg, &seg_desc->base, &seg_desc->limit, 846 &seg_desc->access); 847 return (error); 848 } 849 850 int 851 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 852 { 853 int error; 854 struct vm_register vmreg; 855 856 bzero(&vmreg, sizeof(vmreg)); 857 vmreg.cpuid = vcpu->vcpuid; 858 vmreg.regnum = reg; 859 vmreg.regval = val; 860 861 error = vcpu_ioctl(vcpu, VM_SET_REGISTER, &vmreg); 862 return (error); 863 } 864 865 int 866 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *ret_val) 867 { 868 int error; 869 struct vm_register vmreg; 870 871 bzero(&vmreg, sizeof(vmreg)); 872 vmreg.cpuid = vcpu->vcpuid; 873 vmreg.regnum = reg; 874 875 error = vcpu_ioctl(vcpu, VM_GET_REGISTER, &vmreg); 876 *ret_val = vmreg.regval; 877 return (error); 878 } 879 880 int 881 vm_set_register_set(struct vcpu *vcpu, unsigned int count, 882 const int *regnums, uint64_t *regvals) 883 { 884 int error; 885 struct vm_register_set vmregset; 886 887 bzero(&vmregset, sizeof(vmregset)); 888 vmregset.cpuid = vcpu->vcpuid; 889 vmregset.count = count; 890 vmregset.regnums = regnums; 891 vmregset.regvals = regvals; 892 893 error = vcpu_ioctl(vcpu, VM_SET_REGISTER_SET, &vmregset); 894 return (error); 895 } 896 897 int 898 vm_get_register_set(struct vcpu *vcpu, unsigned int count, 899 const int *regnums, uint64_t *regvals) 900 { 901 int error; 902 struct vm_register_set vmregset; 903 904 bzero(&vmregset, sizeof(vmregset)); 905 vmregset.cpuid = vcpu->vcpuid; 906 vmregset.count = count; 907 vmregset.regnums = regnums; 908 vmregset.regvals = regvals; 909 910 error = vcpu_ioctl(vcpu, VM_GET_REGISTER_SET, &vmregset); 911 return (error); 912 } 913 914 #ifdef __FreeBSD__ 915 int 916 vm_run(struct vcpu *vcpu, struct vm_exit *vmexit) 917 { 918 int error; 919 struct vm_run vmrun; 920 921 bzero(&vmrun, sizeof(vmrun)); 922 923 error = vcpu_ioctl(vcpu, VM_RUN, &vmrun); 924 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 925 return (error); 926 } 927 #else 928 int 929 vm_run(struct vcpu *vcpu, const struct vm_entry *vm_entry, 930 struct vm_exit *vm_exit) 931 { 932 struct vm_entry entry; 933 934 bcopy(vm_entry, &entry, sizeof (entry)); 935 entry.cpuid = vcpu->vcpuid; 936 entry.exit_data = vm_exit; 937 938 return (vcpu_ioctl(vcpu, VM_RUN, &entry)); 939 } 940 #endif 941 942 int 943 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) 944 { 945 struct vm_suspend vmsuspend; 946 947 bzero(&vmsuspend, sizeof(vmsuspend)); 948 vmsuspend.how = how; 949 #ifndef __FreeBSD__ 950 /* 951 * The existing userspace does not (currently) inject targeted 952 * triple-fault suspend states, so it does not need to specify source. 953 */ 954 vmsuspend.source = -1; 955 #endif /* __FreeBSD__ */ 956 return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); 957 } 958 959 #ifdef __FreeBSD__ 960 int 961 vm_reinit(struct vmctx *ctx) 962 { 963 964 return (ioctl(ctx->fd, VM_REINIT, 0)); 965 } 966 #else 967 int 968 vm_reinit(struct vmctx *ctx, uint64_t flags) 969 { 970 struct vm_reinit reinit = { 971 .flags = flags 972 }; 973 974 return (ioctl(ctx->fd, VM_REINIT, &reinit)); 975 } 976 #endif 977 978 int 979 vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, 980 uint32_t errcode, int restart_instruction) 981 { 982 struct vm_exception exc; 983 984 exc.cpuid = vcpu->vcpuid; 985 exc.vector = vector; 986 exc.error_code = errcode; 987 exc.error_code_valid = errcode_valid; 988 exc.restart_instruction = restart_instruction; 989 990 return (vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &exc)); 991 } 992 993 #ifndef __FreeBSD__ 994 void 995 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, 996 int errcode) 997 { 998 int error; 999 struct vm_exception exc; 1000 1001 exc.cpuid = vcpu->vcpuid; 1002 exc.vector = vector; 1003 exc.error_code = errcode; 1004 exc.error_code_valid = errcode_valid; 1005 exc.restart_instruction = 1; 1006 error = vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &exc); 1007 1008 assert(error == 0); 1009 } 1010 #endif /* __FreeBSD__ */ 1011 1012 int 1013 vm_apicid2vcpu(struct vmctx *ctx __unused, int apicid) 1014 { 1015 /* 1016 * The apic id associated with the 'vcpu' has the same numerical value 1017 * as the 'vcpu' itself. 1018 */ 1019 return (apicid); 1020 } 1021 1022 int 1023 vm_lapic_irq(struct vcpu *vcpu, int vector) 1024 { 1025 struct vm_lapic_irq vmirq; 1026 1027 bzero(&vmirq, sizeof(vmirq)); 1028 vmirq.cpuid = vcpu->vcpuid; 1029 vmirq.vector = vector; 1030 1031 return (vcpu_ioctl(vcpu, VM_LAPIC_IRQ, &vmirq)); 1032 } 1033 1034 int 1035 vm_lapic_local_irq(struct vcpu *vcpu, int vector) 1036 { 1037 struct vm_lapic_irq vmirq; 1038 1039 bzero(&vmirq, sizeof(vmirq)); 1040 vmirq.cpuid = vcpu->vcpuid; 1041 vmirq.vector = vector; 1042 1043 return (vcpu_ioctl(vcpu, VM_LAPIC_LOCAL_IRQ, &vmirq)); 1044 } 1045 1046 int 1047 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) 1048 { 1049 struct vm_lapic_msi vmmsi; 1050 1051 bzero(&vmmsi, sizeof(vmmsi)); 1052 vmmsi.addr = addr; 1053 vmmsi.msg = msg; 1054 1055 return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); 1056 } 1057 1058 int 1059 vm_ioapic_assert_irq(struct vmctx *ctx, int irq) 1060 { 1061 struct vm_ioapic_irq ioapic_irq; 1062 1063 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 1064 ioapic_irq.irq = irq; 1065 1066 return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); 1067 } 1068 1069 int 1070 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) 1071 { 1072 struct vm_ioapic_irq ioapic_irq; 1073 1074 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 1075 ioapic_irq.irq = irq; 1076 1077 return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); 1078 } 1079 1080 int 1081 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) 1082 { 1083 struct vm_ioapic_irq ioapic_irq; 1084 1085 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 1086 ioapic_irq.irq = irq; 1087 1088 return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); 1089 } 1090 1091 int 1092 vm_ioapic_pincount(struct vmctx *ctx, int *pincount) 1093 { 1094 1095 return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); 1096 } 1097 1098 int 1099 vm_readwrite_kernemu_device(struct vcpu *vcpu, vm_paddr_t gpa, 1100 bool write, int size, uint64_t *value) 1101 { 1102 struct vm_readwrite_kernemu_device irp = { 1103 .vcpuid = vcpu->vcpuid, 1104 .access_width = fls(size) - 1, 1105 .gpa = gpa, 1106 .value = write ? *value : ~0ul, 1107 }; 1108 long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); 1109 int rc; 1110 1111 rc = vcpu_ioctl(vcpu, cmd, &irp); 1112 if (rc == 0 && !write) 1113 *value = irp.value; 1114 return (rc); 1115 } 1116 1117 int 1118 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 1119 { 1120 struct vm_isa_irq isa_irq; 1121 1122 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 1123 isa_irq.atpic_irq = atpic_irq; 1124 isa_irq.ioapic_irq = ioapic_irq; 1125 1126 return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); 1127 } 1128 1129 int 1130 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 1131 { 1132 struct vm_isa_irq isa_irq; 1133 1134 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 1135 isa_irq.atpic_irq = atpic_irq; 1136 isa_irq.ioapic_irq = ioapic_irq; 1137 1138 return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); 1139 } 1140 1141 int 1142 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 1143 { 1144 struct vm_isa_irq isa_irq; 1145 1146 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 1147 isa_irq.atpic_irq = atpic_irq; 1148 isa_irq.ioapic_irq = ioapic_irq; 1149 1150 return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); 1151 } 1152 1153 int 1154 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, 1155 enum vm_intr_trigger trigger) 1156 { 1157 struct vm_isa_irq_trigger isa_irq_trigger; 1158 1159 bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); 1160 isa_irq_trigger.atpic_irq = atpic_irq; 1161 isa_irq_trigger.trigger = trigger; 1162 1163 return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); 1164 } 1165 1166 int 1167 vm_inject_nmi(struct vcpu *vcpu) 1168 { 1169 struct vm_nmi vmnmi; 1170 1171 bzero(&vmnmi, sizeof(vmnmi)); 1172 vmnmi.cpuid = vcpu->vcpuid; 1173 1174 return (vcpu_ioctl(vcpu, VM_INJECT_NMI, &vmnmi)); 1175 } 1176 1177 static const char *capstrmap[] = { 1178 [VM_CAP_HALT_EXIT] = "hlt_exit", 1179 [VM_CAP_MTRAP_EXIT] = "mtrap_exit", 1180 [VM_CAP_PAUSE_EXIT] = "pause_exit", 1181 #ifdef __FreeBSD__ 1182 [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", 1183 #endif 1184 [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", 1185 [VM_CAP_BPT_EXIT] = "bpt_exit", 1186 }; 1187 1188 int 1189 vm_capability_name2type(const char *capname) 1190 { 1191 int i; 1192 1193 for (i = 0; i < (int)nitems(capstrmap); i++) { 1194 if (strcmp(capstrmap[i], capname) == 0) 1195 return (i); 1196 } 1197 1198 return (-1); 1199 } 1200 1201 const char * 1202 vm_capability_type2name(int type) 1203 { 1204 if (type >= 0 && type < (int)nitems(capstrmap)) 1205 return (capstrmap[type]); 1206 1207 return (NULL); 1208 } 1209 1210 int 1211 vm_get_capability(struct vcpu *vcpu, enum vm_cap_type cap, 1212 int *retval) 1213 { 1214 int error; 1215 struct vm_capability vmcap; 1216 1217 bzero(&vmcap, sizeof(vmcap)); 1218 vmcap.cpuid = vcpu->vcpuid; 1219 vmcap.captype = cap; 1220 1221 error = vcpu_ioctl(vcpu, VM_GET_CAPABILITY, &vmcap); 1222 *retval = vmcap.capval; 1223 return (error); 1224 } 1225 1226 int 1227 vm_set_capability(struct vcpu *vcpu, enum vm_cap_type cap, int val) 1228 { 1229 struct vm_capability vmcap; 1230 1231 bzero(&vmcap, sizeof(vmcap)); 1232 vmcap.cpuid = vcpu->vcpuid; 1233 vmcap.captype = cap; 1234 vmcap.capval = val; 1235 1236 return (vcpu_ioctl(vcpu, VM_SET_CAPABILITY, &vmcap)); 1237 } 1238 1239 #ifdef __FreeBSD__ 1240 int 1241 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1242 { 1243 struct vm_pptdev pptdev; 1244 1245 bzero(&pptdev, sizeof(pptdev)); 1246 pptdev.bus = bus; 1247 pptdev.slot = slot; 1248 pptdev.func = func; 1249 1250 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1251 } 1252 1253 int 1254 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1255 { 1256 struct vm_pptdev pptdev; 1257 1258 bzero(&pptdev, sizeof(pptdev)); 1259 pptdev.bus = bus; 1260 pptdev.slot = slot; 1261 pptdev.func = func; 1262 1263 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1264 } 1265 1266 int 1267 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1268 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 1269 { 1270 struct vm_pptdev_mmio pptmmio; 1271 1272 bzero(&pptmmio, sizeof(pptmmio)); 1273 pptmmio.bus = bus; 1274 pptmmio.slot = slot; 1275 pptmmio.func = func; 1276 pptmmio.gpa = gpa; 1277 pptmmio.len = len; 1278 pptmmio.hpa = hpa; 1279 1280 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1281 } 1282 1283 int 1284 vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1285 vm_paddr_t gpa, size_t len) 1286 { 1287 struct vm_pptdev_mmio pptmmio; 1288 1289 bzero(&pptmmio, sizeof(pptmmio)); 1290 pptmmio.bus = bus; 1291 pptmmio.slot = slot; 1292 pptmmio.func = func; 1293 pptmmio.gpa = gpa; 1294 pptmmio.len = len; 1295 1296 return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio)); 1297 } 1298 1299 int 1300 vm_setup_pptdev_msi(struct vmctx *ctx, int bus, int slot, int func, 1301 uint64_t addr, uint64_t msg, int numvec) 1302 { 1303 struct vm_pptdev_msi pptmsi; 1304 1305 bzero(&pptmsi, sizeof(pptmsi)); 1306 pptmsi.bus = bus; 1307 pptmsi.slot = slot; 1308 pptmsi.func = func; 1309 pptmsi.msg = msg; 1310 pptmsi.addr = addr; 1311 pptmsi.numvec = numvec; 1312 1313 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1314 } 1315 1316 int 1317 vm_setup_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func, 1318 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 1319 { 1320 struct vm_pptdev_msix pptmsix; 1321 1322 bzero(&pptmsix, sizeof(pptmsix)); 1323 pptmsix.bus = bus; 1324 pptmsix.slot = slot; 1325 pptmsix.func = func; 1326 pptmsix.idx = idx; 1327 pptmsix.msg = msg; 1328 pptmsix.addr = addr; 1329 pptmsix.vector_control = vector_control; 1330 1331 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1332 } 1333 1334 int 1335 vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) 1336 { 1337 struct vm_pptdev ppt; 1338 1339 bzero(&ppt, sizeof(ppt)); 1340 ppt.bus = bus; 1341 ppt.slot = slot; 1342 ppt.func = func; 1343 1344 return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt); 1345 } 1346 1347 #else /* __FreeBSD__ */ 1348 1349 int 1350 vm_assign_pptdev(struct vmctx *ctx, int pptfd) 1351 { 1352 struct vm_pptdev pptdev; 1353 1354 pptdev.pptfd = pptfd; 1355 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 1356 } 1357 1358 int 1359 vm_unassign_pptdev(struct vmctx *ctx, int pptfd) 1360 { 1361 struct vm_pptdev pptdev; 1362 1363 pptdev.pptfd = pptfd; 1364 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1365 } 1366 1367 int 1368 vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len, 1369 vm_paddr_t hpa) 1370 { 1371 struct vm_pptdev_mmio pptmmio; 1372 1373 pptmmio.pptfd = pptfd; 1374 pptmmio.gpa = gpa; 1375 pptmmio.len = len; 1376 pptmmio.hpa = hpa; 1377 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1378 } 1379 1380 int 1381 vm_unmap_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len) 1382 { 1383 struct vm_pptdev_mmio pptmmio; 1384 1385 bzero(&pptmmio, sizeof(pptmmio)); 1386 pptmmio.pptfd = pptfd; 1387 pptmmio.gpa = gpa; 1388 pptmmio.len = len; 1389 1390 return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio)); 1391 } 1392 1393 int 1394 vm_setup_pptdev_msi(struct vmctx *ctx, int pptfd, uint64_t addr, 1395 uint64_t msg, int numvec) 1396 { 1397 struct vm_pptdev_msi pptmsi; 1398 1399 pptmsi.pptfd = pptfd; 1400 pptmsi.msg = msg; 1401 pptmsi.addr = addr; 1402 pptmsi.numvec = numvec; 1403 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1404 } 1405 1406 int 1407 vm_setup_pptdev_msix(struct vmctx *ctx, int pptfd, int idx, 1408 uint64_t addr, uint64_t msg, uint32_t vector_control) 1409 { 1410 struct vm_pptdev_msix pptmsix; 1411 1412 pptmsix.pptfd = pptfd; 1413 pptmsix.idx = idx; 1414 pptmsix.msg = msg; 1415 pptmsix.addr = addr; 1416 pptmsix.vector_control = vector_control; 1417 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1418 } 1419 1420 int 1421 vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, 1422 int *msix_limit) 1423 { 1424 struct vm_pptdev_limits pptlimits; 1425 int error; 1426 1427 bzero(&pptlimits, sizeof (pptlimits)); 1428 pptlimits.pptfd = pptfd; 1429 error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); 1430 1431 *msi_limit = pptlimits.msi_limit; 1432 *msix_limit = pptlimits.msix_limit; 1433 return (error); 1434 } 1435 1436 int 1437 vm_disable_pptdev_msix(struct vmctx *ctx, int pptfd) 1438 { 1439 struct vm_pptdev pptdev; 1440 1441 pptdev.pptfd = pptfd; 1442 return (ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &pptdev)); 1443 } 1444 #endif /* __FreeBSD__ */ 1445 1446 uint64_t * 1447 vm_get_stats(struct vcpu *vcpu, struct timeval *ret_tv, 1448 int *ret_entries) 1449 { 1450 static _Thread_local uint64_t *stats_buf; 1451 static _Thread_local uint32_t stats_count; 1452 uint64_t *new_stats; 1453 struct vm_stats vmstats; 1454 uint32_t count, index; 1455 bool have_stats; 1456 1457 have_stats = false; 1458 vmstats.cpuid = vcpu->vcpuid; 1459 count = 0; 1460 for (index = 0;; index += nitems(vmstats.statbuf)) { 1461 vmstats.index = index; 1462 if (vcpu_ioctl(vcpu, VM_STATS_IOC, &vmstats) != 0) 1463 break; 1464 if (stats_count < index + vmstats.num_entries) { 1465 new_stats = reallocarray(stats_buf, 1466 index + vmstats.num_entries, sizeof(uint64_t)); 1467 if (new_stats == NULL) { 1468 errno = ENOMEM; 1469 return (NULL); 1470 } 1471 stats_count = index + vmstats.num_entries; 1472 stats_buf = new_stats; 1473 } 1474 memcpy(stats_buf + index, vmstats.statbuf, 1475 vmstats.num_entries * sizeof(uint64_t)); 1476 count += vmstats.num_entries; 1477 have_stats = true; 1478 1479 if (vmstats.num_entries != nitems(vmstats.statbuf)) 1480 break; 1481 } 1482 if (have_stats) { 1483 if (ret_entries) 1484 *ret_entries = count; 1485 if (ret_tv) 1486 *ret_tv = vmstats.tv; 1487 return (stats_buf); 1488 } else { 1489 return (NULL); 1490 } 1491 } 1492 1493 const char * 1494 vm_get_stat_desc(struct vmctx *ctx, int index) 1495 { 1496 static struct vm_stat_desc statdesc; 1497 1498 statdesc.index = index; 1499 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 1500 return (statdesc.desc); 1501 else 1502 return (NULL); 1503 } 1504 1505 int 1506 vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) 1507 { 1508 int error; 1509 struct vm_x2apic x2apic; 1510 1511 bzero(&x2apic, sizeof(x2apic)); 1512 x2apic.cpuid = vcpu->vcpuid; 1513 1514 error = vcpu_ioctl(vcpu, VM_GET_X2APIC_STATE, &x2apic); 1515 *state = x2apic.state; 1516 return (error); 1517 } 1518 1519 int 1520 vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1521 { 1522 int error; 1523 struct vm_x2apic x2apic; 1524 1525 bzero(&x2apic, sizeof(x2apic)); 1526 x2apic.cpuid = vcpu->vcpuid; 1527 x2apic.state = state; 1528 1529 error = vcpu_ioctl(vcpu, VM_SET_X2APIC_STATE, &x2apic); 1530 1531 return (error); 1532 } 1533 1534 #ifndef __FreeBSD__ 1535 int 1536 vcpu_reset(struct vcpu *vcpu) 1537 { 1538 struct vm_vcpu_reset vvr; 1539 1540 vvr.vcpuid = vcpu->vcpuid; 1541 vvr.kind = VRK_RESET; 1542 1543 return (vcpu_ioctl(vcpu, VM_RESET_CPU, &vvr)); 1544 } 1545 #else /* __FreeBSD__ */ 1546 /* 1547 * From Intel Vol 3a: 1548 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 1549 */ 1550 int 1551 vcpu_reset(struct vcpu *vcpu) 1552 { 1553 int error; 1554 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 1555 uint32_t desc_access, desc_limit; 1556 uint16_t sel; 1557 1558 zero = 0; 1559 1560 rflags = 0x2; 1561 error = vm_set_register(vcpu, VM_REG_GUEST_RFLAGS, rflags); 1562 if (error) 1563 goto done; 1564 1565 rip = 0xfff0; 1566 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RIP, rip)) != 0) 1567 goto done; 1568 1569 /* 1570 * According to Intels Software Developer Manual CR0 should be 1571 * initialized with CR0_ET | CR0_NW | CR0_CD but that crashes some 1572 * guests like Windows. 1573 */ 1574 cr0 = CR0_NE; 1575 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 1576 goto done; 1577 1578 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR2, zero)) != 0) 1579 goto done; 1580 1581 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR3, zero)) != 0) 1582 goto done; 1583 1584 cr4 = 0; 1585 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 1586 goto done; 1587 1588 /* 1589 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 1590 */ 1591 desc_base = 0xffff0000; 1592 desc_limit = 0xffff; 1593 desc_access = 0x0093; 1594 error = vm_set_desc(vcpu, VM_REG_GUEST_CS, 1595 desc_base, desc_limit, desc_access); 1596 if (error) 1597 goto done; 1598 1599 sel = 0xf000; 1600 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CS, sel)) != 0) 1601 goto done; 1602 1603 /* 1604 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 1605 */ 1606 desc_base = 0; 1607 desc_limit = 0xffff; 1608 desc_access = 0x0093; 1609 error = vm_set_desc(vcpu, VM_REG_GUEST_SS, 1610 desc_base, desc_limit, desc_access); 1611 if (error) 1612 goto done; 1613 1614 error = vm_set_desc(vcpu, VM_REG_GUEST_DS, 1615 desc_base, desc_limit, desc_access); 1616 if (error) 1617 goto done; 1618 1619 error = vm_set_desc(vcpu, VM_REG_GUEST_ES, 1620 desc_base, desc_limit, desc_access); 1621 if (error) 1622 goto done; 1623 1624 error = vm_set_desc(vcpu, VM_REG_GUEST_FS, 1625 desc_base, desc_limit, desc_access); 1626 if (error) 1627 goto done; 1628 1629 error = vm_set_desc(vcpu, VM_REG_GUEST_GS, 1630 desc_base, desc_limit, desc_access); 1631 if (error) 1632 goto done; 1633 1634 sel = 0; 1635 if ((error = vm_set_register(vcpu, VM_REG_GUEST_SS, sel)) != 0) 1636 goto done; 1637 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DS, sel)) != 0) 1638 goto done; 1639 if ((error = vm_set_register(vcpu, VM_REG_GUEST_ES, sel)) != 0) 1640 goto done; 1641 if ((error = vm_set_register(vcpu, VM_REG_GUEST_FS, sel)) != 0) 1642 goto done; 1643 if ((error = vm_set_register(vcpu, VM_REG_GUEST_GS, sel)) != 0) 1644 goto done; 1645 1646 if ((error = vm_set_register(vcpu, VM_REG_GUEST_EFER, zero)) != 0) 1647 goto done; 1648 1649 /* General purpose registers */ 1650 rdx = 0xf00; 1651 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RAX, zero)) != 0) 1652 goto done; 1653 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBX, zero)) != 0) 1654 goto done; 1655 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RCX, zero)) != 0) 1656 goto done; 1657 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 1658 goto done; 1659 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSI, zero)) != 0) 1660 goto done; 1661 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDI, zero)) != 0) 1662 goto done; 1663 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBP, zero)) != 0) 1664 goto done; 1665 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSP, zero)) != 0) 1666 goto done; 1667 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R8, zero)) != 0) 1668 goto done; 1669 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R9, zero)) != 0) 1670 goto done; 1671 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R10, zero)) != 0) 1672 goto done; 1673 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R11, zero)) != 0) 1674 goto done; 1675 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R12, zero)) != 0) 1676 goto done; 1677 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R13, zero)) != 0) 1678 goto done; 1679 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R14, zero)) != 0) 1680 goto done; 1681 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R15, zero)) != 0) 1682 goto done; 1683 1684 /* GDTR, IDTR */ 1685 desc_base = 0; 1686 desc_limit = 0xffff; 1687 desc_access = 0; 1688 error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, 1689 desc_base, desc_limit, desc_access); 1690 if (error != 0) 1691 goto done; 1692 1693 error = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, 1694 desc_base, desc_limit, desc_access); 1695 if (error != 0) 1696 goto done; 1697 1698 /* TR */ 1699 desc_base = 0; 1700 desc_limit = 0xffff; 1701 desc_access = 0x0000008b; 1702 error = vm_set_desc(vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 1703 if (error) 1704 goto done; 1705 1706 sel = 0; 1707 if ((error = vm_set_register(vcpu, VM_REG_GUEST_TR, sel)) != 0) 1708 goto done; 1709 1710 /* LDTR */ 1711 desc_base = 0; 1712 desc_limit = 0xffff; 1713 desc_access = 0x00000082; 1714 error = vm_set_desc(vcpu, VM_REG_GUEST_LDTR, desc_base, 1715 desc_limit, desc_access); 1716 if (error) 1717 goto done; 1718 1719 sel = 0; 1720 if ((error = vm_set_register(vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 1721 goto done; 1722 1723 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR6, 1724 0xffff0ff0)) != 0) 1725 goto done; 1726 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR7, 0x400)) != 1727 0) 1728 goto done; 1729 1730 if ((error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 1731 zero)) != 0) 1732 goto done; 1733 1734 error = 0; 1735 done: 1736 return (error); 1737 } 1738 #endif /* __FreeBSD__ */ 1739 1740 int 1741 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) 1742 { 1743 int error, i; 1744 struct vm_gpa_pte gpapte; 1745 1746 bzero(&gpapte, sizeof(gpapte)); 1747 gpapte.gpa = gpa; 1748 1749 error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); 1750 1751 if (error == 0) { 1752 *num = gpapte.ptenum; 1753 for (i = 0; i < gpapte.ptenum; i++) 1754 pte[i] = gpapte.pte[i]; 1755 } 1756 1757 return (error); 1758 } 1759 1760 int 1761 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) 1762 { 1763 int error; 1764 struct vm_hpet_cap cap; 1765 1766 bzero(&cap, sizeof(struct vm_hpet_cap)); 1767 error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); 1768 if (capabilities != NULL) 1769 *capabilities = cap.capabilities; 1770 return (error); 1771 } 1772 1773 int 1774 vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, 1775 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1776 { 1777 struct vm_gla2gpa gg; 1778 int error; 1779 1780 bzero(&gg, sizeof(struct vm_gla2gpa)); 1781 gg.vcpuid = vcpu->vcpuid; 1782 gg.prot = prot; 1783 gg.gla = gla; 1784 gg.paging = *paging; 1785 1786 error = vcpu_ioctl(vcpu, VM_GLA2GPA, &gg); 1787 if (error == 0) { 1788 *fault = gg.fault; 1789 *gpa = gg.gpa; 1790 } 1791 return (error); 1792 } 1793 1794 int 1795 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 1796 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1797 { 1798 struct vm_gla2gpa gg; 1799 int error; 1800 1801 bzero(&gg, sizeof(struct vm_gla2gpa)); 1802 gg.vcpuid = vcpu->vcpuid; 1803 gg.prot = prot; 1804 gg.gla = gla; 1805 gg.paging = *paging; 1806 1807 error = vcpu_ioctl(vcpu, VM_GLA2GPA_NOFAULT, &gg); 1808 if (error == 0) { 1809 *fault = gg.fault; 1810 *gpa = gg.gpa; 1811 } 1812 return (error); 1813 } 1814 1815 #ifndef min 1816 #define min(a,b) (((a) < (b)) ? (a) : (b)) 1817 #endif 1818 1819 int 1820 vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, 1821 uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, 1822 int *fault) 1823 { 1824 void *va; 1825 uint64_t gpa, off; 1826 int error, i, n; 1827 1828 for (i = 0; i < iovcnt; i++) { 1829 iov[i].iov_base = 0; 1830 iov[i].iov_len = 0; 1831 } 1832 1833 while (len) { 1834 assert(iovcnt > 0); 1835 error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); 1836 if (error || *fault) 1837 return (error); 1838 1839 off = gpa & PAGE_MASK; 1840 n = MIN(len, PAGE_SIZE - off); 1841 1842 va = vm_map_gpa(vcpu->ctx, gpa, n); 1843 if (va == NULL) 1844 return (EFAULT); 1845 1846 iov->iov_base = va; 1847 iov->iov_len = n; 1848 iov++; 1849 iovcnt--; 1850 1851 gla += n; 1852 len -= n; 1853 } 1854 return (0); 1855 } 1856 1857 void 1858 vm_copy_teardown(struct iovec *iov __unused, int iovcnt __unused) 1859 { 1860 /* 1861 * Intentionally empty. This is used by the instruction 1862 * emulation code shared with the kernel. The in-kernel 1863 * version of this is non-empty. 1864 */ 1865 } 1866 1867 void 1868 vm_copyin(struct iovec *iov, void *vp, size_t len) 1869 { 1870 const char *src; 1871 char *dst; 1872 size_t n; 1873 1874 dst = vp; 1875 while (len) { 1876 assert(iov->iov_len); 1877 n = min(len, iov->iov_len); 1878 src = iov->iov_base; 1879 bcopy(src, dst, n); 1880 1881 iov++; 1882 dst += n; 1883 len -= n; 1884 } 1885 } 1886 1887 void 1888 vm_copyout(const void *vp, struct iovec *iov, size_t len) 1889 { 1890 const char *src; 1891 char *dst; 1892 size_t n; 1893 1894 src = vp; 1895 while (len) { 1896 assert(iov->iov_len); 1897 n = min(len, iov->iov_len); 1898 dst = iov->iov_base; 1899 bcopy(src, dst, n); 1900 1901 iov++; 1902 src += n; 1903 len -= n; 1904 } 1905 } 1906 1907 static int 1908 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) 1909 { 1910 struct vm_cpuset vm_cpuset; 1911 int error; 1912 1913 bzero(&vm_cpuset, sizeof(struct vm_cpuset)); 1914 vm_cpuset.which = which; 1915 vm_cpuset.cpusetsize = sizeof(cpuset_t); 1916 vm_cpuset.cpus = cpus; 1917 1918 error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); 1919 return (error); 1920 } 1921 1922 int 1923 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) 1924 { 1925 1926 return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); 1927 } 1928 1929 #ifdef __FreeBSD__ 1930 int 1931 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) 1932 { 1933 1934 return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); 1935 } 1936 #endif /* __FreeBSD__ */ 1937 1938 int 1939 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) 1940 { 1941 1942 return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); 1943 } 1944 1945 int 1946 vm_activate_cpu(struct vcpu *vcpu) 1947 { 1948 struct vm_activate_cpu ac; 1949 int error; 1950 1951 bzero(&ac, sizeof(struct vm_activate_cpu)); 1952 ac.vcpuid = vcpu->vcpuid; 1953 error = vcpu_ioctl(vcpu, VM_ACTIVATE_CPU, &ac); 1954 return (error); 1955 } 1956 1957 int 1958 vm_suspend_all_cpus(struct vmctx *ctx) 1959 { 1960 struct vm_activate_cpu ac; 1961 int error; 1962 1963 bzero(&ac, sizeof(struct vm_activate_cpu)); 1964 ac.vcpuid = -1; 1965 error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); 1966 return (error); 1967 } 1968 1969 int 1970 vm_suspend_cpu(struct vcpu *vcpu) 1971 { 1972 struct vm_activate_cpu ac; 1973 int error; 1974 1975 bzero(&ac, sizeof(struct vm_activate_cpu)); 1976 ac.vcpuid = vcpu->vcpuid; 1977 error = vcpu_ioctl(vcpu, VM_SUSPEND_CPU, &ac); 1978 return (error); 1979 } 1980 1981 int 1982 vm_resume_cpu(struct vcpu *vcpu) 1983 { 1984 struct vm_activate_cpu ac; 1985 int error; 1986 1987 bzero(&ac, sizeof(struct vm_activate_cpu)); 1988 ac.vcpuid = vcpu->vcpuid; 1989 error = vcpu_ioctl(vcpu, VM_RESUME_CPU, &ac); 1990 return (error); 1991 } 1992 1993 int 1994 vm_resume_all_cpus(struct vmctx *ctx) 1995 { 1996 struct vm_activate_cpu ac; 1997 int error; 1998 1999 bzero(&ac, sizeof(struct vm_activate_cpu)); 2000 ac.vcpuid = -1; 2001 error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); 2002 return (error); 2003 } 2004 2005 int 2006 vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) 2007 { 2008 struct vm_intinfo vmii; 2009 int error; 2010 2011 bzero(&vmii, sizeof(struct vm_intinfo)); 2012 vmii.vcpuid = vcpu->vcpuid; 2013 error = vcpu_ioctl(vcpu, VM_GET_INTINFO, &vmii); 2014 if (error == 0) { 2015 *info1 = vmii.info1; 2016 *info2 = vmii.info2; 2017 } 2018 return (error); 2019 } 2020 2021 int 2022 vm_set_intinfo(struct vcpu *vcpu, uint64_t info1) 2023 { 2024 struct vm_intinfo vmii; 2025 int error; 2026 2027 bzero(&vmii, sizeof(struct vm_intinfo)); 2028 vmii.vcpuid = vcpu->vcpuid; 2029 vmii.info1 = info1; 2030 error = vcpu_ioctl(vcpu, VM_SET_INTINFO, &vmii); 2031 return (error); 2032 } 2033 2034 int 2035 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) 2036 { 2037 struct vm_rtc_data rtcdata; 2038 int error; 2039 2040 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 2041 rtcdata.offset = offset; 2042 rtcdata.value = value; 2043 error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); 2044 return (error); 2045 } 2046 2047 int 2048 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) 2049 { 2050 struct vm_rtc_data rtcdata; 2051 int error; 2052 2053 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 2054 rtcdata.offset = offset; 2055 error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); 2056 if (error == 0) 2057 *retval = rtcdata.value; 2058 return (error); 2059 } 2060 2061 #ifdef __FreeBSD__ 2062 int 2063 vm_rtc_settime(struct vmctx *ctx, time_t secs) 2064 { 2065 struct vm_rtc_time rtctime; 2066 int error; 2067 2068 bzero(&rtctime, sizeof(struct vm_rtc_time)); 2069 rtctime.secs = secs; 2070 error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); 2071 return (error); 2072 } 2073 2074 int 2075 vm_rtc_gettime(struct vmctx *ctx, time_t *secs) 2076 { 2077 struct vm_rtc_time rtctime; 2078 int error; 2079 2080 bzero(&rtctime, sizeof(struct vm_rtc_time)); 2081 error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); 2082 if (error == 0) 2083 *secs = rtctime.secs; 2084 return (error); 2085 } 2086 #else /* __FreeBSD__ */ 2087 2088 int 2089 vm_rtc_settime(struct vmctx *ctx, const timespec_t *ts) 2090 { 2091 return (ioctl(ctx->fd, VM_RTC_SETTIME, ts)); 2092 } 2093 2094 int 2095 vm_rtc_gettime(struct vmctx *ctx, timespec_t *ts) 2096 { 2097 return (ioctl(ctx->fd, VM_RTC_GETTIME, ts)); 2098 } 2099 2100 #endif /* __FreeBSD__ */ 2101 2102 int 2103 vm_restart_instruction(void *ctxp, int vcpu __unused) 2104 { 2105 struct vmctx *ctx = ctxp; 2106 int arg; 2107 2108 return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &arg)); 2109 } 2110 2111 int 2112 vm_set_topology(struct vmctx *ctx, 2113 uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) 2114 { 2115 struct vm_cpu_topology topology; 2116 2117 bzero(&topology, sizeof (struct vm_cpu_topology)); 2118 topology.sockets = sockets; 2119 topology.cores = cores; 2120 topology.threads = threads; 2121 topology.maxcpus = maxcpus; 2122 return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); 2123 } 2124 2125 int 2126 vm_get_topology(struct vmctx *ctx, 2127 uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) 2128 { 2129 struct vm_cpu_topology topology; 2130 int error; 2131 2132 bzero(&topology, sizeof (struct vm_cpu_topology)); 2133 error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); 2134 if (error == 0) { 2135 *sockets = topology.sockets; 2136 *cores = topology.cores; 2137 *threads = topology.threads; 2138 *maxcpus = topology.maxcpus; 2139 } 2140 return (error); 2141 } 2142 2143 #ifdef __FreeBSD__ 2144 /* Keep in sync with machine/vmm_dev.h. */ 2145 static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, 2146 VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, 2147 VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER, 2148 VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, 2149 VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, 2150 VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, 2151 VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, 2152 VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, 2153 VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, 2154 VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, 2155 VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, 2156 VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, 2157 VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX, 2158 VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, 2159 VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, 2160 VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, 2161 VM_GLA2GPA_NOFAULT, 2162 VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, 2163 VM_SET_INTINFO, VM_GET_INTINFO, 2164 VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, 2165 VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, 2166 VM_SNAPSHOT_REQ, VM_RESTORE_TIME 2167 }; 2168 2169 int 2170 vm_limit_rights(struct vmctx *ctx) 2171 { 2172 cap_rights_t rights; 2173 size_t ncmds; 2174 2175 cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 2176 if (caph_rights_limit(ctx->fd, &rights) != 0) 2177 return (-1); 2178 ncmds = nitems(vm_ioctl_cmds); 2179 if (caph_ioctls_limit(ctx->fd, vm_ioctl_cmds, ncmds) != 0) 2180 return (-1); 2181 return (0); 2182 } 2183 #endif 2184 2185 /* 2186 * Avoid using in new code. Operations on the fd should be wrapped here so that 2187 * capability rights can be kept in sync. 2188 */ 2189 int 2190 vm_get_device_fd(struct vmctx *ctx) 2191 { 2192 2193 return (ctx->fd); 2194 } 2195 2196 #ifndef __FreeBSD__ 2197 int 2198 vm_pmtmr_set_location(struct vmctx *ctx, uint16_t ioport) 2199 { 2200 return (ioctl(ctx->fd, VM_PMTMR_LOCATE, ioport)); 2201 } 2202 2203 int 2204 vm_wrlock_cycle(struct vmctx *ctx) 2205 { 2206 if (ioctl(ctx->fd, VM_WRLOCK_CYCLE, 0) != 0) { 2207 return (errno); 2208 } 2209 return (0); 2210 } 2211 2212 int 2213 vm_get_run_state(struct vcpu *vcpu, enum vcpu_run_state *state, 2214 uint8_t *sipi_vector) 2215 { 2216 struct vm_run_state data; 2217 2218 data.vcpuid = vcpu->vcpuid; 2219 if (vcpu_ioctl(vcpu, VM_GET_RUN_STATE, &data) != 0) { 2220 return (errno); 2221 } 2222 2223 *state = data.state; 2224 *sipi_vector = data.sipi_vector; 2225 return (0); 2226 } 2227 2228 int 2229 vm_set_run_state(struct vcpu *vcpu, enum vcpu_run_state state, 2230 uint8_t sipi_vector) 2231 { 2232 struct vm_run_state data; 2233 2234 data.vcpuid = vcpu->vcpuid; 2235 data.state = state; 2236 data.sipi_vector = sipi_vector; 2237 if (vcpu_ioctl(vcpu, VM_SET_RUN_STATE, &data) != 0) { 2238 return (errno); 2239 } 2240 2241 return (0); 2242 } 2243 2244 int 2245 vm_vcpu_barrier(struct vcpu *vcpu) 2246 { 2247 if (ioctl(vcpu->ctx->fd, VM_VCPU_BARRIER, vcpu->vcpuid) != 0) { 2248 return (errno); 2249 } 2250 2251 return (0); 2252 } 2253 #endif /* __FreeBSD__ */ 2254 2255 #ifdef __FreeBSD__ 2256 const cap_ioctl_t * 2257 vm_get_ioctls(size_t *len) 2258 { 2259 cap_ioctl_t *cmds; 2260 2261 if (len == NULL) { 2262 cmds = malloc(sizeof(vm_ioctl_cmds)); 2263 if (cmds == NULL) 2264 return (NULL); 2265 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); 2266 return (cmds); 2267 } 2268 2269 *len = nitems(vm_ioctl_cmds); 2270 return (NULL); 2271 } 2272 #endif /* __FreeBSD__ */ 2273