1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sys/mman.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 #include <unistd.h> 18 #include <linux/kernel.h> 19 20 #define KVM_UTIL_MIN_PFN 2 21 22 static int vcpu_mmap_sz(void); 23 24 int open_path_or_exit(const char *path, int flags) 25 { 26 int fd; 27 28 fd = open(path, flags); 29 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 30 31 return fd; 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 return open_path_or_exit(KVM_DEV_PATH, flags); 46 } 47 48 int open_kvm_dev_path_or_exit(void) 49 { 50 return _open_kvm_dev_path_or_exit(O_RDONLY); 51 } 52 53 static bool get_module_param_bool(const char *module_name, const char *param) 54 { 55 const int path_size = 128; 56 char path[path_size]; 57 char value; 58 ssize_t r; 59 int fd; 60 61 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 62 module_name, param); 63 TEST_ASSERT(r < path_size, 64 "Failed to construct sysfs path in %d bytes.", path_size); 65 66 fd = open_path_or_exit(path, O_RDONLY); 67 68 r = read(fd, &value, 1); 69 TEST_ASSERT(r == 1, "read(%s) failed", path); 70 71 r = close(fd); 72 TEST_ASSERT(!r, "close(%s) failed", path); 73 74 if (value == 'Y') 75 return true; 76 else if (value == 'N') 77 return false; 78 79 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 80 } 81 82 bool get_kvm_intel_param_bool(const char *param) 83 { 84 return get_module_param_bool("kvm_intel", param); 85 } 86 87 bool get_kvm_amd_param_bool(const char *param) 88 { 89 return get_module_param_bool("kvm_amd", param); 90 } 91 92 /* 93 * Capability 94 * 95 * Input Args: 96 * cap - Capability 97 * 98 * Output Args: None 99 * 100 * Return: 101 * On success, the Value corresponding to the capability (KVM_CAP_*) 102 * specified by the value of cap. On failure a TEST_ASSERT failure 103 * is produced. 104 * 105 * Looks up and returns the value corresponding to the capability 106 * (KVM_CAP_*) given by cap. 107 */ 108 unsigned int kvm_check_cap(long cap) 109 { 110 int ret; 111 int kvm_fd; 112 113 kvm_fd = open_kvm_dev_path_or_exit(); 114 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 115 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 116 117 close(kvm_fd); 118 119 return (unsigned int)ret; 120 } 121 122 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 123 { 124 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 125 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 126 else 127 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 128 vm->dirty_ring_size = ring_size; 129 } 130 131 static void vm_open(struct kvm_vm *vm) 132 { 133 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 134 135 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 136 137 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 138 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 139 } 140 141 const char *vm_guest_mode_string(uint32_t i) 142 { 143 static const char * const strings[] = { 144 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 145 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 146 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 147 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 148 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 149 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 150 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 151 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 152 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 153 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 154 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 155 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 156 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 157 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 158 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 159 }; 160 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 161 "Missing new mode strings?"); 162 163 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 164 165 return strings[i]; 166 } 167 168 const struct vm_guest_mode_params vm_guest_mode_params[] = { 169 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 170 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 171 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 172 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 173 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 174 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 175 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 176 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 177 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 178 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 179 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 180 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 181 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 182 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 183 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 184 }; 185 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 186 "Missing new mode params?"); 187 188 struct kvm_vm *____vm_create(enum vm_guest_mode mode) 189 { 190 struct kvm_vm *vm; 191 192 vm = calloc(1, sizeof(*vm)); 193 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 194 195 INIT_LIST_HEAD(&vm->vcpus); 196 vm->regions.gpa_tree = RB_ROOT; 197 vm->regions.hva_tree = RB_ROOT; 198 hash_init(vm->regions.slot_hash); 199 200 vm->mode = mode; 201 vm->type = 0; 202 203 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 204 vm->va_bits = vm_guest_mode_params[mode].va_bits; 205 vm->page_size = vm_guest_mode_params[mode].page_size; 206 vm->page_shift = vm_guest_mode_params[mode].page_shift; 207 208 /* Setup mode specific traits. */ 209 switch (vm->mode) { 210 case VM_MODE_P52V48_4K: 211 vm->pgtable_levels = 4; 212 break; 213 case VM_MODE_P52V48_64K: 214 vm->pgtable_levels = 3; 215 break; 216 case VM_MODE_P48V48_4K: 217 vm->pgtable_levels = 4; 218 break; 219 case VM_MODE_P48V48_64K: 220 vm->pgtable_levels = 3; 221 break; 222 case VM_MODE_P40V48_4K: 223 case VM_MODE_P36V48_4K: 224 vm->pgtable_levels = 4; 225 break; 226 case VM_MODE_P40V48_64K: 227 case VM_MODE_P36V48_64K: 228 vm->pgtable_levels = 3; 229 break; 230 case VM_MODE_P48V48_16K: 231 case VM_MODE_P40V48_16K: 232 case VM_MODE_P36V48_16K: 233 vm->pgtable_levels = 4; 234 break; 235 case VM_MODE_P36V47_16K: 236 vm->pgtable_levels = 3; 237 break; 238 case VM_MODE_PXXV48_4K: 239 #ifdef __x86_64__ 240 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 241 /* 242 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 243 * it doesn't take effect unless a CR4.LA57 is set, which it 244 * isn't for this VM_MODE. 245 */ 246 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 247 "Linear address width (%d bits) not supported", 248 vm->va_bits); 249 pr_debug("Guest physical address width detected: %d\n", 250 vm->pa_bits); 251 vm->pgtable_levels = 4; 252 vm->va_bits = 48; 253 #else 254 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 255 #endif 256 break; 257 case VM_MODE_P47V64_4K: 258 vm->pgtable_levels = 5; 259 break; 260 case VM_MODE_P44V64_4K: 261 vm->pgtable_levels = 5; 262 break; 263 default: 264 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 265 } 266 267 #ifdef __aarch64__ 268 if (vm->pa_bits != 40) 269 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 270 #endif 271 272 vm_open(vm); 273 274 /* Limit to VA-bit canonical virtual addresses. */ 275 vm->vpages_valid = sparsebit_alloc(); 276 sparsebit_set_num(vm->vpages_valid, 277 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 278 sparsebit_set_num(vm->vpages_valid, 279 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 280 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 281 282 /* Limit physical addresses to PA-bits. */ 283 vm->max_gfn = vm_compute_max_gfn(vm); 284 285 /* Allocate and setup memory for guest. */ 286 vm->vpages_mapped = sparsebit_alloc(); 287 288 return vm; 289 } 290 291 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 292 uint32_t nr_runnable_vcpus, 293 uint64_t extra_mem_pages) 294 { 295 uint64_t nr_pages; 296 297 TEST_ASSERT(nr_runnable_vcpus, 298 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 299 300 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 301 "nr_vcpus = %d too large for host, max-vcpus = %d", 302 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 303 304 /* 305 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 306 * test code and other per-VM assets that will be loaded into memslot0. 307 */ 308 nr_pages = 512; 309 310 /* Account for the per-vCPU stacks on behalf of the test. */ 311 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 312 313 /* 314 * Account for the number of pages needed for the page tables. The 315 * maximum page table size for a memory region will be when the 316 * smallest page size is used. Considering each page contains x page 317 * table descriptors, the total extra size for page tables (for extra 318 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 319 * than N/x*2. 320 */ 321 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 322 323 return vm_adjust_num_guest_pages(mode, nr_pages); 324 } 325 326 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 327 uint64_t nr_extra_pages) 328 { 329 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 330 nr_extra_pages); 331 struct kvm_vm *vm; 332 int i; 333 334 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 335 vm_guest_mode_string(mode), nr_pages); 336 337 vm = ____vm_create(mode); 338 339 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 340 for (i = 0; i < NR_MEM_REGIONS; i++) 341 vm->memslots[i] = 0; 342 343 kvm_vm_elf_load(vm, program_invocation_name); 344 345 #ifdef __x86_64__ 346 vm_create_irqchip(vm); 347 #endif 348 return vm; 349 } 350 351 /* 352 * VM Create with customized parameters 353 * 354 * Input Args: 355 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 356 * nr_vcpus - VCPU count 357 * extra_mem_pages - Non-slot0 physical memory total size 358 * guest_code - Guest entry point 359 * vcpuids - VCPU IDs 360 * 361 * Output Args: None 362 * 363 * Return: 364 * Pointer to opaque structure that describes the created VM. 365 * 366 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 367 * extra_mem_pages is only used to calculate the maximum page table size, 368 * no real memory allocation for non-slot0 memory in this function. 369 */ 370 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 371 uint64_t extra_mem_pages, 372 void *guest_code, struct kvm_vcpu *vcpus[]) 373 { 374 struct kvm_vm *vm; 375 int i; 376 377 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 378 379 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 380 381 for (i = 0; i < nr_vcpus; ++i) 382 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 383 384 return vm; 385 } 386 387 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 388 uint64_t extra_mem_pages, 389 void *guest_code) 390 { 391 struct kvm_vcpu *vcpus[1]; 392 struct kvm_vm *vm; 393 394 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 395 guest_code, vcpus); 396 397 *vcpu = vcpus[0]; 398 return vm; 399 } 400 401 /* 402 * VM Restart 403 * 404 * Input Args: 405 * vm - VM that has been released before 406 * 407 * Output Args: None 408 * 409 * Reopens the file descriptors associated to the VM and reinstates the 410 * global state, such as the irqchip and the memory regions that are mapped 411 * into the guest. 412 */ 413 void kvm_vm_restart(struct kvm_vm *vmp) 414 { 415 int ctr; 416 struct userspace_mem_region *region; 417 418 vm_open(vmp); 419 if (vmp->has_irqchip) 420 vm_create_irqchip(vmp); 421 422 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 423 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 424 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 425 " rc: %i errno: %i\n" 426 " slot: %u flags: 0x%x\n" 427 " guest_phys_addr: 0x%llx size: 0x%llx", 428 ret, errno, region->region.slot, 429 region->region.flags, 430 region->region.guest_phys_addr, 431 region->region.memory_size); 432 } 433 } 434 435 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 436 uint32_t vcpu_id) 437 { 438 return __vm_vcpu_add(vm, vcpu_id); 439 } 440 441 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 442 { 443 kvm_vm_restart(vm); 444 445 return vm_vcpu_recreate(vm, 0); 446 } 447 448 /* 449 * Userspace Memory Region Find 450 * 451 * Input Args: 452 * vm - Virtual Machine 453 * start - Starting VM physical address 454 * end - Ending VM physical address, inclusive. 455 * 456 * Output Args: None 457 * 458 * Return: 459 * Pointer to overlapping region, NULL if no such region. 460 * 461 * Searches for a region with any physical memory that overlaps with 462 * any portion of the guest physical addresses from start to end 463 * inclusive. If multiple overlapping regions exist, a pointer to any 464 * of the regions is returned. Null is returned only when no overlapping 465 * region exists. 466 */ 467 static struct userspace_mem_region * 468 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 469 { 470 struct rb_node *node; 471 472 for (node = vm->regions.gpa_tree.rb_node; node; ) { 473 struct userspace_mem_region *region = 474 container_of(node, struct userspace_mem_region, gpa_node); 475 uint64_t existing_start = region->region.guest_phys_addr; 476 uint64_t existing_end = region->region.guest_phys_addr 477 + region->region.memory_size - 1; 478 if (start <= existing_end && end >= existing_start) 479 return region; 480 481 if (start < existing_start) 482 node = node->rb_left; 483 else 484 node = node->rb_right; 485 } 486 487 return NULL; 488 } 489 490 /* 491 * KVM Userspace Memory Region Find 492 * 493 * Input Args: 494 * vm - Virtual Machine 495 * start - Starting VM physical address 496 * end - Ending VM physical address, inclusive. 497 * 498 * Output Args: None 499 * 500 * Return: 501 * Pointer to overlapping region, NULL if no such region. 502 * 503 * Public interface to userspace_mem_region_find. Allows tests to look up 504 * the memslot datastructure for a given range of guest physical memory. 505 */ 506 struct kvm_userspace_memory_region * 507 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 508 uint64_t end) 509 { 510 struct userspace_mem_region *region; 511 512 region = userspace_mem_region_find(vm, start, end); 513 if (!region) 514 return NULL; 515 516 return ®ion->region; 517 } 518 519 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 520 { 521 522 } 523 524 /* 525 * VM VCPU Remove 526 * 527 * Input Args: 528 * vcpu - VCPU to remove 529 * 530 * Output Args: None 531 * 532 * Return: None, TEST_ASSERT failures for all error conditions 533 * 534 * Removes a vCPU from a VM and frees its resources. 535 */ 536 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 537 { 538 int ret; 539 540 if (vcpu->dirty_gfns) { 541 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 542 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 543 vcpu->dirty_gfns = NULL; 544 } 545 546 ret = munmap(vcpu->run, vcpu_mmap_sz()); 547 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 548 549 ret = close(vcpu->fd); 550 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 551 552 list_del(&vcpu->list); 553 554 vcpu_arch_free(vcpu); 555 free(vcpu); 556 } 557 558 void kvm_vm_release(struct kvm_vm *vmp) 559 { 560 struct kvm_vcpu *vcpu, *tmp; 561 int ret; 562 563 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 564 vm_vcpu_rm(vmp, vcpu); 565 566 ret = close(vmp->fd); 567 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 568 569 ret = close(vmp->kvm_fd); 570 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 571 } 572 573 static void __vm_mem_region_delete(struct kvm_vm *vm, 574 struct userspace_mem_region *region, 575 bool unlink) 576 { 577 int ret; 578 579 if (unlink) { 580 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 581 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 582 hash_del(®ion->slot_node); 583 } 584 585 region->region.memory_size = 0; 586 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 587 588 sparsebit_free(®ion->unused_phy_pages); 589 ret = munmap(region->mmap_start, region->mmap_size); 590 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 591 if (region->fd >= 0) { 592 /* There's an extra map when using shared memory. */ 593 ret = munmap(region->mmap_alias, region->mmap_size); 594 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 595 close(region->fd); 596 } 597 598 free(region); 599 } 600 601 /* 602 * Destroys and frees the VM pointed to by vmp. 603 */ 604 void kvm_vm_free(struct kvm_vm *vmp) 605 { 606 int ctr; 607 struct hlist_node *node; 608 struct userspace_mem_region *region; 609 610 if (vmp == NULL) 611 return; 612 613 /* Free cached stats metadata and close FD */ 614 if (vmp->stats_fd) { 615 free(vmp->stats_desc); 616 close(vmp->stats_fd); 617 } 618 619 /* Free userspace_mem_regions. */ 620 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 621 __vm_mem_region_delete(vmp, region, false); 622 623 /* Free sparsebit arrays. */ 624 sparsebit_free(&vmp->vpages_valid); 625 sparsebit_free(&vmp->vpages_mapped); 626 627 kvm_vm_release(vmp); 628 629 /* Free the structure describing the VM. */ 630 free(vmp); 631 } 632 633 int kvm_memfd_alloc(size_t size, bool hugepages) 634 { 635 int memfd_flags = MFD_CLOEXEC; 636 int fd, r; 637 638 if (hugepages) 639 memfd_flags |= MFD_HUGETLB; 640 641 fd = memfd_create("kvm_selftest", memfd_flags); 642 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 643 644 r = ftruncate(fd, size); 645 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 646 647 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 648 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 649 650 return fd; 651 } 652 653 /* 654 * Memory Compare, host virtual to guest virtual 655 * 656 * Input Args: 657 * hva - Starting host virtual address 658 * vm - Virtual Machine 659 * gva - Starting guest virtual address 660 * len - number of bytes to compare 661 * 662 * Output Args: None 663 * 664 * Input/Output Args: None 665 * 666 * Return: 667 * Returns 0 if the bytes starting at hva for a length of len 668 * are equal the guest virtual bytes starting at gva. Returns 669 * a value < 0, if bytes at hva are less than those at gva. 670 * Otherwise a value > 0 is returned. 671 * 672 * Compares the bytes starting at the host virtual address hva, for 673 * a length of len, to the guest bytes starting at the guest virtual 674 * address given by gva. 675 */ 676 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 677 { 678 size_t amt; 679 680 /* 681 * Compare a batch of bytes until either a match is found 682 * or all the bytes have been compared. 683 */ 684 for (uintptr_t offset = 0; offset < len; offset += amt) { 685 uintptr_t ptr1 = (uintptr_t)hva + offset; 686 687 /* 688 * Determine host address for guest virtual address 689 * at offset. 690 */ 691 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 692 693 /* 694 * Determine amount to compare on this pass. 695 * Don't allow the comparsion to cross a page boundary. 696 */ 697 amt = len - offset; 698 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 699 amt = vm->page_size - (ptr1 % vm->page_size); 700 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 701 amt = vm->page_size - (ptr2 % vm->page_size); 702 703 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 704 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 705 706 /* 707 * Perform the comparison. If there is a difference 708 * return that result to the caller, otherwise need 709 * to continue on looking for a mismatch. 710 */ 711 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 712 if (ret != 0) 713 return ret; 714 } 715 716 /* 717 * No mismatch found. Let the caller know the two memory 718 * areas are equal. 719 */ 720 return 0; 721 } 722 723 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 724 struct userspace_mem_region *region) 725 { 726 struct rb_node **cur, *parent; 727 728 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 729 struct userspace_mem_region *cregion; 730 731 cregion = container_of(*cur, typeof(*cregion), gpa_node); 732 parent = *cur; 733 if (region->region.guest_phys_addr < 734 cregion->region.guest_phys_addr) 735 cur = &(*cur)->rb_left; 736 else { 737 TEST_ASSERT(region->region.guest_phys_addr != 738 cregion->region.guest_phys_addr, 739 "Duplicate GPA in region tree"); 740 741 cur = &(*cur)->rb_right; 742 } 743 } 744 745 rb_link_node(®ion->gpa_node, parent, cur); 746 rb_insert_color(®ion->gpa_node, gpa_tree); 747 } 748 749 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 750 struct userspace_mem_region *region) 751 { 752 struct rb_node **cur, *parent; 753 754 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 755 struct userspace_mem_region *cregion; 756 757 cregion = container_of(*cur, typeof(*cregion), hva_node); 758 parent = *cur; 759 if (region->host_mem < cregion->host_mem) 760 cur = &(*cur)->rb_left; 761 else { 762 TEST_ASSERT(region->host_mem != 763 cregion->host_mem, 764 "Duplicate HVA in region tree"); 765 766 cur = &(*cur)->rb_right; 767 } 768 } 769 770 rb_link_node(®ion->hva_node, parent, cur); 771 rb_insert_color(®ion->hva_node, hva_tree); 772 } 773 774 775 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 776 uint64_t gpa, uint64_t size, void *hva) 777 { 778 struct kvm_userspace_memory_region region = { 779 .slot = slot, 780 .flags = flags, 781 .guest_phys_addr = gpa, 782 .memory_size = size, 783 .userspace_addr = (uintptr_t)hva, 784 }; 785 786 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 787 } 788 789 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 790 uint64_t gpa, uint64_t size, void *hva) 791 { 792 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 793 794 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 795 errno, strerror(errno)); 796 } 797 798 /* 799 * VM Userspace Memory Region Add 800 * 801 * Input Args: 802 * vm - Virtual Machine 803 * src_type - Storage source for this region. 804 * NULL to use anonymous memory. 805 * guest_paddr - Starting guest physical address 806 * slot - KVM region slot 807 * npages - Number of physical pages 808 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 809 * 810 * Output Args: None 811 * 812 * Return: None 813 * 814 * Allocates a memory area of the number of pages specified by npages 815 * and maps it to the VM specified by vm, at a starting physical address 816 * given by guest_paddr. The region is created with a KVM region slot 817 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 818 * region is created with the flags given by flags. 819 */ 820 void vm_userspace_mem_region_add(struct kvm_vm *vm, 821 enum vm_mem_backing_src_type src_type, 822 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 823 uint32_t flags) 824 { 825 int ret; 826 struct userspace_mem_region *region; 827 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 828 size_t alignment; 829 830 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 831 "Number of guest pages is not compatible with the host. " 832 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 833 834 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 835 "address not on a page boundary.\n" 836 " guest_paddr: 0x%lx vm->page_size: 0x%x", 837 guest_paddr, vm->page_size); 838 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 839 <= vm->max_gfn, "Physical range beyond maximum " 840 "supported physical address,\n" 841 " guest_paddr: 0x%lx npages: 0x%lx\n" 842 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 843 guest_paddr, npages, vm->max_gfn, vm->page_size); 844 845 /* 846 * Confirm a mem region with an overlapping address doesn't 847 * already exist. 848 */ 849 region = (struct userspace_mem_region *) userspace_mem_region_find( 850 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 851 if (region != NULL) 852 TEST_FAIL("overlapping userspace_mem_region already " 853 "exists\n" 854 " requested guest_paddr: 0x%lx npages: 0x%lx " 855 "page_size: 0x%x\n" 856 " existing guest_paddr: 0x%lx size: 0x%lx", 857 guest_paddr, npages, vm->page_size, 858 (uint64_t) region->region.guest_phys_addr, 859 (uint64_t) region->region.memory_size); 860 861 /* Confirm no region with the requested slot already exists. */ 862 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 863 slot) { 864 if (region->region.slot != slot) 865 continue; 866 867 TEST_FAIL("A mem region with the requested slot " 868 "already exists.\n" 869 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 870 " existing slot: %u paddr: 0x%lx size: 0x%lx", 871 slot, guest_paddr, npages, 872 region->region.slot, 873 (uint64_t) region->region.guest_phys_addr, 874 (uint64_t) region->region.memory_size); 875 } 876 877 /* Allocate and initialize new mem region structure. */ 878 region = calloc(1, sizeof(*region)); 879 TEST_ASSERT(region != NULL, "Insufficient Memory"); 880 region->mmap_size = npages * vm->page_size; 881 882 #ifdef __s390x__ 883 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 884 alignment = 0x100000; 885 #else 886 alignment = 1; 887 #endif 888 889 /* 890 * When using THP mmap is not guaranteed to returned a hugepage aligned 891 * address so we have to pad the mmap. Padding is not needed for HugeTLB 892 * because mmap will always return an address aligned to the HugeTLB 893 * page size. 894 */ 895 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 896 alignment = max(backing_src_pagesz, alignment); 897 898 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 899 900 /* Add enough memory to align up if necessary */ 901 if (alignment > 1) 902 region->mmap_size += alignment; 903 904 region->fd = -1; 905 if (backing_src_is_shared(src_type)) 906 region->fd = kvm_memfd_alloc(region->mmap_size, 907 src_type == VM_MEM_SRC_SHARED_HUGETLB); 908 909 region->mmap_start = mmap(NULL, region->mmap_size, 910 PROT_READ | PROT_WRITE, 911 vm_mem_backing_src_alias(src_type)->flag, 912 region->fd, 0); 913 TEST_ASSERT(region->mmap_start != MAP_FAILED, 914 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 915 916 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 917 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 918 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 919 region->mmap_start, backing_src_pagesz); 920 921 /* Align host address */ 922 region->host_mem = align_ptr_up(region->mmap_start, alignment); 923 924 /* As needed perform madvise */ 925 if ((src_type == VM_MEM_SRC_ANONYMOUS || 926 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 927 ret = madvise(region->host_mem, npages * vm->page_size, 928 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 929 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 930 region->host_mem, npages * vm->page_size, 931 vm_mem_backing_src_alias(src_type)->name); 932 } 933 934 region->backing_src_type = src_type; 935 region->unused_phy_pages = sparsebit_alloc(); 936 sparsebit_set_num(region->unused_phy_pages, 937 guest_paddr >> vm->page_shift, npages); 938 region->region.slot = slot; 939 region->region.flags = flags; 940 region->region.guest_phys_addr = guest_paddr; 941 region->region.memory_size = npages * vm->page_size; 942 region->region.userspace_addr = (uintptr_t) region->host_mem; 943 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 944 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 945 " rc: %i errno: %i\n" 946 " slot: %u flags: 0x%x\n" 947 " guest_phys_addr: 0x%lx size: 0x%lx", 948 ret, errno, slot, flags, 949 guest_paddr, (uint64_t) region->region.memory_size); 950 951 /* Add to quick lookup data structures */ 952 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 953 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 954 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 955 956 /* If shared memory, create an alias. */ 957 if (region->fd >= 0) { 958 region->mmap_alias = mmap(NULL, region->mmap_size, 959 PROT_READ | PROT_WRITE, 960 vm_mem_backing_src_alias(src_type)->flag, 961 region->fd, 0); 962 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 963 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 964 965 /* Align host alias address */ 966 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 967 } 968 } 969 970 /* 971 * Memslot to region 972 * 973 * Input Args: 974 * vm - Virtual Machine 975 * memslot - KVM memory slot ID 976 * 977 * Output Args: None 978 * 979 * Return: 980 * Pointer to memory region structure that describe memory region 981 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 982 * on error (e.g. currently no memory region using memslot as a KVM 983 * memory slot ID). 984 */ 985 struct userspace_mem_region * 986 memslot2region(struct kvm_vm *vm, uint32_t memslot) 987 { 988 struct userspace_mem_region *region; 989 990 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 991 memslot) 992 if (region->region.slot == memslot) 993 return region; 994 995 fprintf(stderr, "No mem region with the requested slot found,\n" 996 " requested slot: %u\n", memslot); 997 fputs("---- vm dump ----\n", stderr); 998 vm_dump(stderr, vm, 2); 999 TEST_FAIL("Mem region not found"); 1000 return NULL; 1001 } 1002 1003 /* 1004 * VM Memory Region Flags Set 1005 * 1006 * Input Args: 1007 * vm - Virtual Machine 1008 * flags - Starting guest physical address 1009 * 1010 * Output Args: None 1011 * 1012 * Return: None 1013 * 1014 * Sets the flags of the memory region specified by the value of slot, 1015 * to the values given by flags. 1016 */ 1017 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1018 { 1019 int ret; 1020 struct userspace_mem_region *region; 1021 1022 region = memslot2region(vm, slot); 1023 1024 region->region.flags = flags; 1025 1026 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1027 1028 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1029 " rc: %i errno: %i slot: %u flags: 0x%x", 1030 ret, errno, slot, flags); 1031 } 1032 1033 /* 1034 * VM Memory Region Move 1035 * 1036 * Input Args: 1037 * vm - Virtual Machine 1038 * slot - Slot of the memory region to move 1039 * new_gpa - Starting guest physical address 1040 * 1041 * Output Args: None 1042 * 1043 * Return: None 1044 * 1045 * Change the gpa of a memory region. 1046 */ 1047 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1048 { 1049 struct userspace_mem_region *region; 1050 int ret; 1051 1052 region = memslot2region(vm, slot); 1053 1054 region->region.guest_phys_addr = new_gpa; 1055 1056 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1057 1058 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1059 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1060 ret, errno, slot, new_gpa); 1061 } 1062 1063 /* 1064 * VM Memory Region Delete 1065 * 1066 * Input Args: 1067 * vm - Virtual Machine 1068 * slot - Slot of the memory region to delete 1069 * 1070 * Output Args: None 1071 * 1072 * Return: None 1073 * 1074 * Delete a memory region. 1075 */ 1076 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1077 { 1078 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1079 } 1080 1081 /* Returns the size of a vCPU's kvm_run structure. */ 1082 static int vcpu_mmap_sz(void) 1083 { 1084 int dev_fd, ret; 1085 1086 dev_fd = open_kvm_dev_path_or_exit(); 1087 1088 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1089 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1090 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1091 1092 close(dev_fd); 1093 1094 return ret; 1095 } 1096 1097 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1098 { 1099 struct kvm_vcpu *vcpu; 1100 1101 list_for_each_entry(vcpu, &vm->vcpus, list) { 1102 if (vcpu->id == vcpu_id) 1103 return true; 1104 } 1105 1106 return false; 1107 } 1108 1109 /* 1110 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1111 * No additional vCPU setup is done. Returns the vCPU. 1112 */ 1113 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1114 { 1115 struct kvm_vcpu *vcpu; 1116 1117 /* Confirm a vcpu with the specified id doesn't already exist. */ 1118 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1119 1120 /* Allocate and initialize new vcpu structure. */ 1121 vcpu = calloc(1, sizeof(*vcpu)); 1122 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1123 1124 vcpu->vm = vm; 1125 vcpu->id = vcpu_id; 1126 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1127 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1128 1129 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1130 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1131 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1132 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1133 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1134 TEST_ASSERT(vcpu->run != MAP_FAILED, 1135 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1136 1137 /* Add to linked-list of VCPUs. */ 1138 list_add(&vcpu->list, &vm->vcpus); 1139 1140 return vcpu; 1141 } 1142 1143 /* 1144 * VM Virtual Address Unused Gap 1145 * 1146 * Input Args: 1147 * vm - Virtual Machine 1148 * sz - Size (bytes) 1149 * vaddr_min - Minimum Virtual Address 1150 * 1151 * Output Args: None 1152 * 1153 * Return: 1154 * Lowest virtual address at or below vaddr_min, with at least 1155 * sz unused bytes. TEST_ASSERT failure if no area of at least 1156 * size sz is available. 1157 * 1158 * Within the VM specified by vm, locates the lowest starting virtual 1159 * address >= vaddr_min, that has at least sz unallocated bytes. A 1160 * TEST_ASSERT failure occurs for invalid input or no area of at least 1161 * sz unallocated bytes >= vaddr_min is available. 1162 */ 1163 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1164 vm_vaddr_t vaddr_min) 1165 { 1166 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1167 1168 /* Determine lowest permitted virtual page index. */ 1169 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1170 if ((pgidx_start * vm->page_size) < vaddr_min) 1171 goto no_va_found; 1172 1173 /* Loop over section with enough valid virtual page indexes. */ 1174 if (!sparsebit_is_set_num(vm->vpages_valid, 1175 pgidx_start, pages)) 1176 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1177 pgidx_start, pages); 1178 do { 1179 /* 1180 * Are there enough unused virtual pages available at 1181 * the currently proposed starting virtual page index. 1182 * If not, adjust proposed starting index to next 1183 * possible. 1184 */ 1185 if (sparsebit_is_clear_num(vm->vpages_mapped, 1186 pgidx_start, pages)) 1187 goto va_found; 1188 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1189 pgidx_start, pages); 1190 if (pgidx_start == 0) 1191 goto no_va_found; 1192 1193 /* 1194 * If needed, adjust proposed starting virtual address, 1195 * to next range of valid virtual addresses. 1196 */ 1197 if (!sparsebit_is_set_num(vm->vpages_valid, 1198 pgidx_start, pages)) { 1199 pgidx_start = sparsebit_next_set_num( 1200 vm->vpages_valid, pgidx_start, pages); 1201 if (pgidx_start == 0) 1202 goto no_va_found; 1203 } 1204 } while (pgidx_start != 0); 1205 1206 no_va_found: 1207 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1208 1209 /* NOT REACHED */ 1210 return -1; 1211 1212 va_found: 1213 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1214 pgidx_start, pages), 1215 "Unexpected, invalid virtual page index range,\n" 1216 " pgidx_start: 0x%lx\n" 1217 " pages: 0x%lx", 1218 pgidx_start, pages); 1219 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1220 pgidx_start, pages), 1221 "Unexpected, pages already mapped,\n" 1222 " pgidx_start: 0x%lx\n" 1223 " pages: 0x%lx", 1224 pgidx_start, pages); 1225 1226 return pgidx_start * vm->page_size; 1227 } 1228 1229 /* 1230 * VM Virtual Address Allocate 1231 * 1232 * Input Args: 1233 * vm - Virtual Machine 1234 * sz - Size in bytes 1235 * vaddr_min - Minimum starting virtual address 1236 * 1237 * Output Args: None 1238 * 1239 * Return: 1240 * Starting guest virtual address 1241 * 1242 * Allocates at least sz bytes within the virtual address space of the vm 1243 * given by vm. The allocated bytes are mapped to a virtual address >= 1244 * the address given by vaddr_min. Note that each allocation uses a 1245 * a unique set of pages, with the minimum real allocation being at least 1246 * a page. 1247 */ 1248 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1249 { 1250 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1251 1252 virt_pgd_alloc(vm); 1253 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1254 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1255 1256 /* 1257 * Find an unused range of virtual page addresses of at least 1258 * pages in length. 1259 */ 1260 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1261 1262 /* Map the virtual pages. */ 1263 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1264 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1265 1266 virt_pg_map(vm, vaddr, paddr); 1267 1268 sparsebit_set(vm->vpages_mapped, 1269 vaddr >> vm->page_shift); 1270 } 1271 1272 return vaddr_start; 1273 } 1274 1275 /* 1276 * VM Virtual Address Allocate Pages 1277 * 1278 * Input Args: 1279 * vm - Virtual Machine 1280 * 1281 * Output Args: None 1282 * 1283 * Return: 1284 * Starting guest virtual address 1285 * 1286 * Allocates at least N system pages worth of bytes within the virtual address 1287 * space of the vm. 1288 */ 1289 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1290 { 1291 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1292 } 1293 1294 /* 1295 * VM Virtual Address Allocate Page 1296 * 1297 * Input Args: 1298 * vm - Virtual Machine 1299 * 1300 * Output Args: None 1301 * 1302 * Return: 1303 * Starting guest virtual address 1304 * 1305 * Allocates at least one system page worth of bytes within the virtual address 1306 * space of the vm. 1307 */ 1308 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1309 { 1310 return vm_vaddr_alloc_pages(vm, 1); 1311 } 1312 1313 /* 1314 * Map a range of VM virtual address to the VM's physical address 1315 * 1316 * Input Args: 1317 * vm - Virtual Machine 1318 * vaddr - Virtuall address to map 1319 * paddr - VM Physical Address 1320 * npages - The number of pages to map 1321 * 1322 * Output Args: None 1323 * 1324 * Return: None 1325 * 1326 * Within the VM given by @vm, creates a virtual translation for 1327 * @npages starting at @vaddr to the page range starting at @paddr. 1328 */ 1329 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1330 unsigned int npages) 1331 { 1332 size_t page_size = vm->page_size; 1333 size_t size = npages * page_size; 1334 1335 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1336 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1337 1338 while (npages--) { 1339 virt_pg_map(vm, vaddr, paddr); 1340 vaddr += page_size; 1341 paddr += page_size; 1342 } 1343 } 1344 1345 /* 1346 * Address VM Physical to Host Virtual 1347 * 1348 * Input Args: 1349 * vm - Virtual Machine 1350 * gpa - VM physical address 1351 * 1352 * Output Args: None 1353 * 1354 * Return: 1355 * Equivalent host virtual address 1356 * 1357 * Locates the memory region containing the VM physical address given 1358 * by gpa, within the VM given by vm. When found, the host virtual 1359 * address providing the memory to the vm physical address is returned. 1360 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1361 */ 1362 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1363 { 1364 struct userspace_mem_region *region; 1365 1366 region = userspace_mem_region_find(vm, gpa, gpa); 1367 if (!region) { 1368 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1369 return NULL; 1370 } 1371 1372 return (void *)((uintptr_t)region->host_mem 1373 + (gpa - region->region.guest_phys_addr)); 1374 } 1375 1376 /* 1377 * Address Host Virtual to VM Physical 1378 * 1379 * Input Args: 1380 * vm - Virtual Machine 1381 * hva - Host virtual address 1382 * 1383 * Output Args: None 1384 * 1385 * Return: 1386 * Equivalent VM physical address 1387 * 1388 * Locates the memory region containing the host virtual address given 1389 * by hva, within the VM given by vm. When found, the equivalent 1390 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1391 * region containing hva exists. 1392 */ 1393 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1394 { 1395 struct rb_node *node; 1396 1397 for (node = vm->regions.hva_tree.rb_node; node; ) { 1398 struct userspace_mem_region *region = 1399 container_of(node, struct userspace_mem_region, hva_node); 1400 1401 if (hva >= region->host_mem) { 1402 if (hva <= (region->host_mem 1403 + region->region.memory_size - 1)) 1404 return (vm_paddr_t)((uintptr_t) 1405 region->region.guest_phys_addr 1406 + (hva - (uintptr_t)region->host_mem)); 1407 1408 node = node->rb_right; 1409 } else 1410 node = node->rb_left; 1411 } 1412 1413 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1414 return -1; 1415 } 1416 1417 /* 1418 * Address VM physical to Host Virtual *alias*. 1419 * 1420 * Input Args: 1421 * vm - Virtual Machine 1422 * gpa - VM physical address 1423 * 1424 * Output Args: None 1425 * 1426 * Return: 1427 * Equivalent address within the host virtual *alias* area, or NULL 1428 * (without failing the test) if the guest memory is not shared (so 1429 * no alias exists). 1430 * 1431 * Create a writable, shared virtual=>physical alias for the specific GPA. 1432 * The primary use case is to allow the host selftest to manipulate guest 1433 * memory without mapping said memory in the guest's address space. And, for 1434 * userfaultfd-based demand paging, to do so without triggering userfaults. 1435 */ 1436 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1437 { 1438 struct userspace_mem_region *region; 1439 uintptr_t offset; 1440 1441 region = userspace_mem_region_find(vm, gpa, gpa); 1442 if (!region) 1443 return NULL; 1444 1445 if (!region->host_alias) 1446 return NULL; 1447 1448 offset = gpa - region->region.guest_phys_addr; 1449 return (void *) ((uintptr_t) region->host_alias + offset); 1450 } 1451 1452 /* Create an interrupt controller chip for the specified VM. */ 1453 void vm_create_irqchip(struct kvm_vm *vm) 1454 { 1455 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1456 1457 vm->has_irqchip = true; 1458 } 1459 1460 int _vcpu_run(struct kvm_vcpu *vcpu) 1461 { 1462 int rc; 1463 1464 do { 1465 rc = __vcpu_run(vcpu); 1466 } while (rc == -1 && errno == EINTR); 1467 1468 assert_on_unhandled_exception(vcpu); 1469 1470 return rc; 1471 } 1472 1473 /* 1474 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1475 * Assert if the KVM returns an error (other than -EINTR). 1476 */ 1477 void vcpu_run(struct kvm_vcpu *vcpu) 1478 { 1479 int ret = _vcpu_run(vcpu); 1480 1481 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1482 } 1483 1484 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1485 { 1486 int ret; 1487 1488 vcpu->run->immediate_exit = 1; 1489 ret = __vcpu_run(vcpu); 1490 vcpu->run->immediate_exit = 0; 1491 1492 TEST_ASSERT(ret == -1 && errno == EINTR, 1493 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1494 ret, errno); 1495 } 1496 1497 /* 1498 * Get the list of guest registers which are supported for 1499 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1500 * it is the caller's responsibility to free the list. 1501 */ 1502 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1503 { 1504 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1505 int ret; 1506 1507 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1508 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1509 1510 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1511 reg_list->n = reg_list_n.n; 1512 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1513 return reg_list; 1514 } 1515 1516 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1517 { 1518 uint32_t page_size = vcpu->vm->page_size; 1519 uint32_t size = vcpu->vm->dirty_ring_size; 1520 1521 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1522 1523 if (!vcpu->dirty_gfns) { 1524 void *addr; 1525 1526 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1527 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1528 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1529 1530 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1531 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1532 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1533 1534 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1535 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1536 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1537 1538 vcpu->dirty_gfns = addr; 1539 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1540 } 1541 1542 return vcpu->dirty_gfns; 1543 } 1544 1545 /* 1546 * Device Ioctl 1547 */ 1548 1549 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1550 { 1551 struct kvm_device_attr attribute = { 1552 .group = group, 1553 .attr = attr, 1554 .flags = 0, 1555 }; 1556 1557 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1558 } 1559 1560 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1561 { 1562 struct kvm_create_device create_dev = { 1563 .type = type, 1564 .flags = KVM_CREATE_DEVICE_TEST, 1565 }; 1566 1567 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1568 } 1569 1570 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1571 { 1572 struct kvm_create_device create_dev = { 1573 .type = type, 1574 .fd = -1, 1575 .flags = 0, 1576 }; 1577 int err; 1578 1579 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1580 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1581 return err ? : create_dev.fd; 1582 } 1583 1584 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1585 { 1586 struct kvm_device_attr kvmattr = { 1587 .group = group, 1588 .attr = attr, 1589 .flags = 0, 1590 .addr = (uintptr_t)val, 1591 }; 1592 1593 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1594 } 1595 1596 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1597 { 1598 struct kvm_device_attr kvmattr = { 1599 .group = group, 1600 .attr = attr, 1601 .flags = 0, 1602 .addr = (uintptr_t)val, 1603 }; 1604 1605 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1606 } 1607 1608 /* 1609 * IRQ related functions. 1610 */ 1611 1612 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1613 { 1614 struct kvm_irq_level irq_level = { 1615 .irq = irq, 1616 .level = level, 1617 }; 1618 1619 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1620 } 1621 1622 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1623 { 1624 int ret = _kvm_irq_line(vm, irq, level); 1625 1626 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1627 } 1628 1629 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1630 { 1631 struct kvm_irq_routing *routing; 1632 size_t size; 1633 1634 size = sizeof(struct kvm_irq_routing); 1635 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1636 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1637 routing = calloc(1, size); 1638 assert(routing); 1639 1640 return routing; 1641 } 1642 1643 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1644 uint32_t gsi, uint32_t pin) 1645 { 1646 int i; 1647 1648 assert(routing); 1649 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1650 1651 i = routing->nr; 1652 routing->entries[i].gsi = gsi; 1653 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1654 routing->entries[i].flags = 0; 1655 routing->entries[i].u.irqchip.irqchip = 0; 1656 routing->entries[i].u.irqchip.pin = pin; 1657 routing->nr++; 1658 } 1659 1660 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1661 { 1662 int ret; 1663 1664 assert(routing); 1665 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1666 free(routing); 1667 1668 return ret; 1669 } 1670 1671 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1672 { 1673 int ret; 1674 1675 ret = _kvm_gsi_routing_write(vm, routing); 1676 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1677 } 1678 1679 /* 1680 * VM Dump 1681 * 1682 * Input Args: 1683 * vm - Virtual Machine 1684 * indent - Left margin indent amount 1685 * 1686 * Output Args: 1687 * stream - Output FILE stream 1688 * 1689 * Return: None 1690 * 1691 * Dumps the current state of the VM given by vm, to the FILE stream 1692 * given by stream. 1693 */ 1694 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1695 { 1696 int ctr; 1697 struct userspace_mem_region *region; 1698 struct kvm_vcpu *vcpu; 1699 1700 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1701 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1702 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1703 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1704 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1705 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1706 "host_virt: %p\n", indent + 2, "", 1707 (uint64_t) region->region.guest_phys_addr, 1708 (uint64_t) region->region.memory_size, 1709 region->host_mem); 1710 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1711 sparsebit_dump(stream, region->unused_phy_pages, 0); 1712 } 1713 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1714 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1715 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1716 vm->pgd_created); 1717 if (vm->pgd_created) { 1718 fprintf(stream, "%*sVirtual Translation Tables:\n", 1719 indent + 2, ""); 1720 virt_dump(stream, vm, indent + 4); 1721 } 1722 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1723 1724 list_for_each_entry(vcpu, &vm->vcpus, list) 1725 vcpu_dump(stream, vcpu, indent + 2); 1726 } 1727 1728 /* Known KVM exit reasons */ 1729 static struct exit_reason { 1730 unsigned int reason; 1731 const char *name; 1732 } exit_reasons_known[] = { 1733 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1734 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1735 {KVM_EXIT_IO, "IO"}, 1736 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1737 {KVM_EXIT_DEBUG, "DEBUG"}, 1738 {KVM_EXIT_HLT, "HLT"}, 1739 {KVM_EXIT_MMIO, "MMIO"}, 1740 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1741 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1742 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1743 {KVM_EXIT_INTR, "INTR"}, 1744 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1745 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1746 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1747 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1748 {KVM_EXIT_DCR, "DCR"}, 1749 {KVM_EXIT_NMI, "NMI"}, 1750 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1751 {KVM_EXIT_OSI, "OSI"}, 1752 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1753 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1754 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1755 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1756 {KVM_EXIT_XEN, "XEN"}, 1757 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1758 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1759 #endif 1760 }; 1761 1762 /* 1763 * Exit Reason String 1764 * 1765 * Input Args: 1766 * exit_reason - Exit reason 1767 * 1768 * Output Args: None 1769 * 1770 * Return: 1771 * Constant string pointer describing the exit reason. 1772 * 1773 * Locates and returns a constant string that describes the KVM exit 1774 * reason given by exit_reason. If no such string is found, a constant 1775 * string of "Unknown" is returned. 1776 */ 1777 const char *exit_reason_str(unsigned int exit_reason) 1778 { 1779 unsigned int n1; 1780 1781 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1782 if (exit_reason == exit_reasons_known[n1].reason) 1783 return exit_reasons_known[n1].name; 1784 } 1785 1786 return "Unknown"; 1787 } 1788 1789 /* 1790 * Physical Contiguous Page Allocator 1791 * 1792 * Input Args: 1793 * vm - Virtual Machine 1794 * num - number of pages 1795 * paddr_min - Physical address minimum 1796 * memslot - Memory region to allocate page from 1797 * 1798 * Output Args: None 1799 * 1800 * Return: 1801 * Starting physical address 1802 * 1803 * Within the VM specified by vm, locates a range of available physical 1804 * pages at or above paddr_min. If found, the pages are marked as in use 1805 * and their base address is returned. A TEST_ASSERT failure occurs if 1806 * not enough pages are available at or above paddr_min. 1807 */ 1808 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1809 vm_paddr_t paddr_min, uint32_t memslot) 1810 { 1811 struct userspace_mem_region *region; 1812 sparsebit_idx_t pg, base; 1813 1814 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1815 1816 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1817 "not divisible by page size.\n" 1818 " paddr_min: 0x%lx page_size: 0x%x", 1819 paddr_min, vm->page_size); 1820 1821 region = memslot2region(vm, memslot); 1822 base = pg = paddr_min >> vm->page_shift; 1823 1824 do { 1825 for (; pg < base + num; ++pg) { 1826 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1827 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1828 break; 1829 } 1830 } 1831 } while (pg && pg != base + num); 1832 1833 if (pg == 0) { 1834 fprintf(stderr, "No guest physical page available, " 1835 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1836 paddr_min, vm->page_size, memslot); 1837 fputs("---- vm dump ----\n", stderr); 1838 vm_dump(stderr, vm, 2); 1839 abort(); 1840 } 1841 1842 for (pg = base; pg < base + num; ++pg) 1843 sparsebit_clear(region->unused_phy_pages, pg); 1844 1845 return base * vm->page_size; 1846 } 1847 1848 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1849 uint32_t memslot) 1850 { 1851 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1852 } 1853 1854 /* Arbitrary minimum physical address used for virtual translation tables. */ 1855 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1856 1857 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1858 { 1859 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1860 } 1861 1862 /* 1863 * Address Guest Virtual to Host Virtual 1864 * 1865 * Input Args: 1866 * vm - Virtual Machine 1867 * gva - VM virtual address 1868 * 1869 * Output Args: None 1870 * 1871 * Return: 1872 * Equivalent host virtual address 1873 */ 1874 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1875 { 1876 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1877 } 1878 1879 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1880 { 1881 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1882 } 1883 1884 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1885 unsigned int page_shift, 1886 unsigned int new_page_shift, 1887 bool ceil) 1888 { 1889 unsigned int n = 1 << (new_page_shift - page_shift); 1890 1891 if (page_shift >= new_page_shift) 1892 return num_pages * (1 << (page_shift - new_page_shift)); 1893 1894 return num_pages / n + !!(ceil && num_pages % n); 1895 } 1896 1897 static inline int getpageshift(void) 1898 { 1899 return __builtin_ffs(getpagesize()) - 1; 1900 } 1901 1902 unsigned int 1903 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1904 { 1905 return vm_calc_num_pages(num_guest_pages, 1906 vm_guest_mode_params[mode].page_shift, 1907 getpageshift(), true); 1908 } 1909 1910 unsigned int 1911 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1912 { 1913 return vm_calc_num_pages(num_host_pages, getpageshift(), 1914 vm_guest_mode_params[mode].page_shift, false); 1915 } 1916 1917 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1918 { 1919 unsigned int n; 1920 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1921 return vm_adjust_num_guest_pages(mode, n); 1922 } 1923 1924 /* 1925 * Read binary stats descriptors 1926 * 1927 * Input Args: 1928 * stats_fd - the file descriptor for the binary stats file from which to read 1929 * header - the binary stats metadata header corresponding to the given FD 1930 * 1931 * Output Args: None 1932 * 1933 * Return: 1934 * A pointer to a newly allocated series of stat descriptors. 1935 * Caller is responsible for freeing the returned kvm_stats_desc. 1936 * 1937 * Read the stats descriptors from the binary stats interface. 1938 */ 1939 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1940 struct kvm_stats_header *header) 1941 { 1942 struct kvm_stats_desc *stats_desc; 1943 ssize_t desc_size, total_size, ret; 1944 1945 desc_size = get_stats_descriptor_size(header); 1946 total_size = header->num_desc * desc_size; 1947 1948 stats_desc = calloc(header->num_desc, desc_size); 1949 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 1950 1951 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 1952 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 1953 1954 return stats_desc; 1955 } 1956 1957 /* 1958 * Read stat data for a particular stat 1959 * 1960 * Input Args: 1961 * stats_fd - the file descriptor for the binary stats file from which to read 1962 * header - the binary stats metadata header corresponding to the given FD 1963 * desc - the binary stat metadata for the particular stat to be read 1964 * max_elements - the maximum number of 8-byte values to read into data 1965 * 1966 * Output Args: 1967 * data - the buffer into which stat data should be read 1968 * 1969 * Read the data values of a specified stat from the binary stats interface. 1970 */ 1971 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 1972 struct kvm_stats_desc *desc, uint64_t *data, 1973 size_t max_elements) 1974 { 1975 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 1976 size_t size = nr_elements * sizeof(*data); 1977 ssize_t ret; 1978 1979 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 1980 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 1981 1982 ret = pread(stats_fd, data, size, 1983 header->data_offset + desc->offset); 1984 1985 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 1986 desc->name, errno, strerror(errno)); 1987 TEST_ASSERT(ret == size, 1988 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 1989 desc->name, size, ret); 1990 } 1991 1992 /* 1993 * Read the data of the named stat 1994 * 1995 * Input Args: 1996 * vm - the VM for which the stat should be read 1997 * stat_name - the name of the stat to read 1998 * max_elements - the maximum number of 8-byte values to read into data 1999 * 2000 * Output Args: 2001 * data - the buffer into which stat data should be read 2002 * 2003 * Read the data values of a specified stat from the binary stats interface. 2004 */ 2005 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2006 size_t max_elements) 2007 { 2008 struct kvm_stats_desc *desc; 2009 size_t size_desc; 2010 int i; 2011 2012 if (!vm->stats_fd) { 2013 vm->stats_fd = vm_get_stats_fd(vm); 2014 read_stats_header(vm->stats_fd, &vm->stats_header); 2015 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2016 &vm->stats_header); 2017 } 2018 2019 size_desc = get_stats_descriptor_size(&vm->stats_header); 2020 2021 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2022 desc = (void *)vm->stats_desc + (i * size_desc); 2023 2024 if (strcmp(desc->name, stat_name)) 2025 continue; 2026 2027 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2028 data, max_elements); 2029 2030 break; 2031 } 2032 } 2033