1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sys/mman.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 #include <unistd.h> 18 #include <linux/kernel.h> 19 20 #define KVM_UTIL_MIN_PFN 2 21 22 static int vcpu_mmap_sz(void); 23 24 int open_path_or_exit(const char *path, int flags) 25 { 26 int fd; 27 28 fd = open(path, flags); 29 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 30 31 return fd; 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 return open_path_or_exit(KVM_DEV_PATH, flags); 46 } 47 48 int open_kvm_dev_path_or_exit(void) 49 { 50 return _open_kvm_dev_path_or_exit(O_RDONLY); 51 } 52 53 static bool get_module_param_bool(const char *module_name, const char *param) 54 { 55 const int path_size = 128; 56 char path[path_size]; 57 char value; 58 ssize_t r; 59 int fd; 60 61 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 62 module_name, param); 63 TEST_ASSERT(r < path_size, 64 "Failed to construct sysfs path in %d bytes.", path_size); 65 66 fd = open_path_or_exit(path, O_RDONLY); 67 68 r = read(fd, &value, 1); 69 TEST_ASSERT(r == 1, "read(%s) failed", path); 70 71 r = close(fd); 72 TEST_ASSERT(!r, "close(%s) failed", path); 73 74 if (value == 'Y') 75 return true; 76 else if (value == 'N') 77 return false; 78 79 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 80 } 81 82 bool get_kvm_intel_param_bool(const char *param) 83 { 84 return get_module_param_bool("kvm_intel", param); 85 } 86 87 bool get_kvm_amd_param_bool(const char *param) 88 { 89 return get_module_param_bool("kvm_amd", param); 90 } 91 92 /* 93 * Capability 94 * 95 * Input Args: 96 * cap - Capability 97 * 98 * Output Args: None 99 * 100 * Return: 101 * On success, the Value corresponding to the capability (KVM_CAP_*) 102 * specified by the value of cap. On failure a TEST_ASSERT failure 103 * is produced. 104 * 105 * Looks up and returns the value corresponding to the capability 106 * (KVM_CAP_*) given by cap. 107 */ 108 unsigned int kvm_check_cap(long cap) 109 { 110 int ret; 111 int kvm_fd; 112 113 kvm_fd = open_kvm_dev_path_or_exit(); 114 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 115 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 116 117 close(kvm_fd); 118 119 return (unsigned int)ret; 120 } 121 122 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 123 { 124 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 125 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 126 else 127 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 128 vm->dirty_ring_size = ring_size; 129 } 130 131 static void vm_open(struct kvm_vm *vm) 132 { 133 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 134 135 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 136 137 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 138 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 139 } 140 141 const char *vm_guest_mode_string(uint32_t i) 142 { 143 static const char * const strings[] = { 144 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 145 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 146 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 147 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 148 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 149 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 150 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 151 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 152 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 153 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 154 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 155 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 156 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 157 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 158 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 159 }; 160 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 161 "Missing new mode strings?"); 162 163 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 164 165 return strings[i]; 166 } 167 168 const struct vm_guest_mode_params vm_guest_mode_params[] = { 169 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 170 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 171 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 172 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 173 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 174 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 175 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 176 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 177 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 178 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 179 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 180 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 181 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 182 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 183 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 184 }; 185 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 186 "Missing new mode params?"); 187 188 struct kvm_vm *____vm_create(enum vm_guest_mode mode) 189 { 190 struct kvm_vm *vm; 191 192 vm = calloc(1, sizeof(*vm)); 193 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 194 195 INIT_LIST_HEAD(&vm->vcpus); 196 vm->regions.gpa_tree = RB_ROOT; 197 vm->regions.hva_tree = RB_ROOT; 198 hash_init(vm->regions.slot_hash); 199 200 vm->mode = mode; 201 vm->type = 0; 202 203 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 204 vm->va_bits = vm_guest_mode_params[mode].va_bits; 205 vm->page_size = vm_guest_mode_params[mode].page_size; 206 vm->page_shift = vm_guest_mode_params[mode].page_shift; 207 208 /* Setup mode specific traits. */ 209 switch (vm->mode) { 210 case VM_MODE_P52V48_4K: 211 vm->pgtable_levels = 4; 212 break; 213 case VM_MODE_P52V48_64K: 214 vm->pgtable_levels = 3; 215 break; 216 case VM_MODE_P48V48_4K: 217 vm->pgtable_levels = 4; 218 break; 219 case VM_MODE_P48V48_64K: 220 vm->pgtable_levels = 3; 221 break; 222 case VM_MODE_P40V48_4K: 223 case VM_MODE_P36V48_4K: 224 vm->pgtable_levels = 4; 225 break; 226 case VM_MODE_P40V48_64K: 227 case VM_MODE_P36V48_64K: 228 vm->pgtable_levels = 3; 229 break; 230 case VM_MODE_P48V48_16K: 231 case VM_MODE_P40V48_16K: 232 case VM_MODE_P36V48_16K: 233 vm->pgtable_levels = 4; 234 break; 235 case VM_MODE_P36V47_16K: 236 vm->pgtable_levels = 3; 237 break; 238 case VM_MODE_PXXV48_4K: 239 #ifdef __x86_64__ 240 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 241 /* 242 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 243 * it doesn't take effect unless a CR4.LA57 is set, which it 244 * isn't for this VM_MODE. 245 */ 246 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 247 "Linear address width (%d bits) not supported", 248 vm->va_bits); 249 pr_debug("Guest physical address width detected: %d\n", 250 vm->pa_bits); 251 vm->pgtable_levels = 4; 252 vm->va_bits = 48; 253 #else 254 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 255 #endif 256 break; 257 case VM_MODE_P47V64_4K: 258 vm->pgtable_levels = 5; 259 break; 260 case VM_MODE_P44V64_4K: 261 vm->pgtable_levels = 5; 262 break; 263 default: 264 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 265 } 266 267 #ifdef __aarch64__ 268 if (vm->pa_bits != 40) 269 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 270 #endif 271 272 vm_open(vm); 273 274 /* Limit to VA-bit canonical virtual addresses. */ 275 vm->vpages_valid = sparsebit_alloc(); 276 sparsebit_set_num(vm->vpages_valid, 277 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 278 sparsebit_set_num(vm->vpages_valid, 279 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 280 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 281 282 /* Limit physical addresses to PA-bits. */ 283 vm->max_gfn = vm_compute_max_gfn(vm); 284 285 /* Allocate and setup memory for guest. */ 286 vm->vpages_mapped = sparsebit_alloc(); 287 288 return vm; 289 } 290 291 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 292 uint32_t nr_runnable_vcpus, 293 uint64_t extra_mem_pages) 294 { 295 uint64_t nr_pages; 296 297 TEST_ASSERT(nr_runnable_vcpus, 298 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 299 300 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 301 "nr_vcpus = %d too large for host, max-vcpus = %d", 302 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 303 304 /* 305 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 306 * test code and other per-VM assets that will be loaded into memslot0. 307 */ 308 nr_pages = 512; 309 310 /* Account for the per-vCPU stacks on behalf of the test. */ 311 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 312 313 /* 314 * Account for the number of pages needed for the page tables. The 315 * maximum page table size for a memory region will be when the 316 * smallest page size is used. Considering each page contains x page 317 * table descriptors, the total extra size for page tables (for extra 318 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 319 * than N/x*2. 320 */ 321 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 322 323 return vm_adjust_num_guest_pages(mode, nr_pages); 324 } 325 326 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 327 uint64_t nr_extra_pages) 328 { 329 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 330 nr_extra_pages); 331 struct kvm_vm *vm; 332 int i; 333 334 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 335 vm_guest_mode_string(mode), nr_pages); 336 337 vm = ____vm_create(mode); 338 339 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 340 for (i = 0; i < NR_MEM_REGIONS; i++) 341 vm->memslots[i] = 0; 342 343 kvm_vm_elf_load(vm, program_invocation_name); 344 345 #ifdef __x86_64__ 346 vm_create_irqchip(vm); 347 #endif 348 return vm; 349 } 350 351 /* 352 * VM Create with customized parameters 353 * 354 * Input Args: 355 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 356 * nr_vcpus - VCPU count 357 * extra_mem_pages - Non-slot0 physical memory total size 358 * guest_code - Guest entry point 359 * vcpuids - VCPU IDs 360 * 361 * Output Args: None 362 * 363 * Return: 364 * Pointer to opaque structure that describes the created VM. 365 * 366 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 367 * extra_mem_pages is only used to calculate the maximum page table size, 368 * no real memory allocation for non-slot0 memory in this function. 369 */ 370 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 371 uint64_t extra_mem_pages, 372 void *guest_code, struct kvm_vcpu *vcpus[]) 373 { 374 struct kvm_vm *vm; 375 int i; 376 377 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 378 379 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 380 381 for (i = 0; i < nr_vcpus; ++i) 382 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 383 384 return vm; 385 } 386 387 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 388 uint64_t extra_mem_pages, 389 void *guest_code) 390 { 391 struct kvm_vcpu *vcpus[1]; 392 struct kvm_vm *vm; 393 394 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 395 guest_code, vcpus); 396 397 *vcpu = vcpus[0]; 398 return vm; 399 } 400 401 /* 402 * VM Restart 403 * 404 * Input Args: 405 * vm - VM that has been released before 406 * 407 * Output Args: None 408 * 409 * Reopens the file descriptors associated to the VM and reinstates the 410 * global state, such as the irqchip and the memory regions that are mapped 411 * into the guest. 412 */ 413 void kvm_vm_restart(struct kvm_vm *vmp) 414 { 415 int ctr; 416 struct userspace_mem_region *region; 417 418 vm_open(vmp); 419 if (vmp->has_irqchip) 420 vm_create_irqchip(vmp); 421 422 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 423 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 424 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 425 " rc: %i errno: %i\n" 426 " slot: %u flags: 0x%x\n" 427 " guest_phys_addr: 0x%llx size: 0x%llx", 428 ret, errno, region->region.slot, 429 region->region.flags, 430 region->region.guest_phys_addr, 431 region->region.memory_size); 432 } 433 } 434 435 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 436 uint32_t vcpu_id) 437 { 438 return __vm_vcpu_add(vm, vcpu_id); 439 } 440 441 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 442 { 443 kvm_vm_restart(vm); 444 445 return vm_vcpu_recreate(vm, 0); 446 } 447 448 /* 449 * Userspace Memory Region Find 450 * 451 * Input Args: 452 * vm - Virtual Machine 453 * start - Starting VM physical address 454 * end - Ending VM physical address, inclusive. 455 * 456 * Output Args: None 457 * 458 * Return: 459 * Pointer to overlapping region, NULL if no such region. 460 * 461 * Searches for a region with any physical memory that overlaps with 462 * any portion of the guest physical addresses from start to end 463 * inclusive. If multiple overlapping regions exist, a pointer to any 464 * of the regions is returned. Null is returned only when no overlapping 465 * region exists. 466 */ 467 static struct userspace_mem_region * 468 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 469 { 470 struct rb_node *node; 471 472 for (node = vm->regions.gpa_tree.rb_node; node; ) { 473 struct userspace_mem_region *region = 474 container_of(node, struct userspace_mem_region, gpa_node); 475 uint64_t existing_start = region->region.guest_phys_addr; 476 uint64_t existing_end = region->region.guest_phys_addr 477 + region->region.memory_size - 1; 478 if (start <= existing_end && end >= existing_start) 479 return region; 480 481 if (start < existing_start) 482 node = node->rb_left; 483 else 484 node = node->rb_right; 485 } 486 487 return NULL; 488 } 489 490 /* 491 * KVM Userspace Memory Region Find 492 * 493 * Input Args: 494 * vm - Virtual Machine 495 * start - Starting VM physical address 496 * end - Ending VM physical address, inclusive. 497 * 498 * Output Args: None 499 * 500 * Return: 501 * Pointer to overlapping region, NULL if no such region. 502 * 503 * Public interface to userspace_mem_region_find. Allows tests to look up 504 * the memslot datastructure for a given range of guest physical memory. 505 */ 506 struct kvm_userspace_memory_region * 507 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 508 uint64_t end) 509 { 510 struct userspace_mem_region *region; 511 512 region = userspace_mem_region_find(vm, start, end); 513 if (!region) 514 return NULL; 515 516 return ®ion->region; 517 } 518 519 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 520 { 521 522 } 523 524 /* 525 * VM VCPU Remove 526 * 527 * Input Args: 528 * vcpu - VCPU to remove 529 * 530 * Output Args: None 531 * 532 * Return: None, TEST_ASSERT failures for all error conditions 533 * 534 * Removes a vCPU from a VM and frees its resources. 535 */ 536 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 537 { 538 int ret; 539 540 if (vcpu->dirty_gfns) { 541 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 542 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 543 vcpu->dirty_gfns = NULL; 544 } 545 546 ret = munmap(vcpu->run, vcpu_mmap_sz()); 547 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 548 549 ret = close(vcpu->fd); 550 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 551 552 list_del(&vcpu->list); 553 554 vcpu_arch_free(vcpu); 555 free(vcpu); 556 } 557 558 void kvm_vm_release(struct kvm_vm *vmp) 559 { 560 struct kvm_vcpu *vcpu, *tmp; 561 int ret; 562 563 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 564 vm_vcpu_rm(vmp, vcpu); 565 566 ret = close(vmp->fd); 567 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 568 569 ret = close(vmp->kvm_fd); 570 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 571 } 572 573 static void __vm_mem_region_delete(struct kvm_vm *vm, 574 struct userspace_mem_region *region, 575 bool unlink) 576 { 577 int ret; 578 579 if (unlink) { 580 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 581 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 582 hash_del(®ion->slot_node); 583 } 584 585 region->region.memory_size = 0; 586 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 587 588 sparsebit_free(®ion->unused_phy_pages); 589 ret = munmap(region->mmap_start, region->mmap_size); 590 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 591 if (region->fd >= 0) { 592 /* There's an extra map when using shared memory. */ 593 ret = munmap(region->mmap_alias, region->mmap_size); 594 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 595 close(region->fd); 596 } 597 598 free(region); 599 } 600 601 /* 602 * Destroys and frees the VM pointed to by vmp. 603 */ 604 void kvm_vm_free(struct kvm_vm *vmp) 605 { 606 int ctr; 607 struct hlist_node *node; 608 struct userspace_mem_region *region; 609 610 if (vmp == NULL) 611 return; 612 613 /* Free cached stats metadata and close FD */ 614 if (vmp->stats_fd) { 615 free(vmp->stats_desc); 616 close(vmp->stats_fd); 617 } 618 619 /* Free userspace_mem_regions. */ 620 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 621 __vm_mem_region_delete(vmp, region, false); 622 623 /* Free sparsebit arrays. */ 624 sparsebit_free(&vmp->vpages_valid); 625 sparsebit_free(&vmp->vpages_mapped); 626 627 kvm_vm_release(vmp); 628 629 /* Free the structure describing the VM. */ 630 free(vmp); 631 } 632 633 int kvm_memfd_alloc(size_t size, bool hugepages) 634 { 635 int memfd_flags = MFD_CLOEXEC; 636 int fd, r; 637 638 if (hugepages) 639 memfd_flags |= MFD_HUGETLB; 640 641 fd = memfd_create("kvm_selftest", memfd_flags); 642 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 643 644 r = ftruncate(fd, size); 645 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 646 647 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 648 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 649 650 return fd; 651 } 652 653 /* 654 * Memory Compare, host virtual to guest virtual 655 * 656 * Input Args: 657 * hva - Starting host virtual address 658 * vm - Virtual Machine 659 * gva - Starting guest virtual address 660 * len - number of bytes to compare 661 * 662 * Output Args: None 663 * 664 * Input/Output Args: None 665 * 666 * Return: 667 * Returns 0 if the bytes starting at hva for a length of len 668 * are equal the guest virtual bytes starting at gva. Returns 669 * a value < 0, if bytes at hva are less than those at gva. 670 * Otherwise a value > 0 is returned. 671 * 672 * Compares the bytes starting at the host virtual address hva, for 673 * a length of len, to the guest bytes starting at the guest virtual 674 * address given by gva. 675 */ 676 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 677 { 678 size_t amt; 679 680 /* 681 * Compare a batch of bytes until either a match is found 682 * or all the bytes have been compared. 683 */ 684 for (uintptr_t offset = 0; offset < len; offset += amt) { 685 uintptr_t ptr1 = (uintptr_t)hva + offset; 686 687 /* 688 * Determine host address for guest virtual address 689 * at offset. 690 */ 691 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 692 693 /* 694 * Determine amount to compare on this pass. 695 * Don't allow the comparsion to cross a page boundary. 696 */ 697 amt = len - offset; 698 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 699 amt = vm->page_size - (ptr1 % vm->page_size); 700 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 701 amt = vm->page_size - (ptr2 % vm->page_size); 702 703 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 704 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 705 706 /* 707 * Perform the comparison. If there is a difference 708 * return that result to the caller, otherwise need 709 * to continue on looking for a mismatch. 710 */ 711 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 712 if (ret != 0) 713 return ret; 714 } 715 716 /* 717 * No mismatch found. Let the caller know the two memory 718 * areas are equal. 719 */ 720 return 0; 721 } 722 723 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 724 struct userspace_mem_region *region) 725 { 726 struct rb_node **cur, *parent; 727 728 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 729 struct userspace_mem_region *cregion; 730 731 cregion = container_of(*cur, typeof(*cregion), gpa_node); 732 parent = *cur; 733 if (region->region.guest_phys_addr < 734 cregion->region.guest_phys_addr) 735 cur = &(*cur)->rb_left; 736 else { 737 TEST_ASSERT(region->region.guest_phys_addr != 738 cregion->region.guest_phys_addr, 739 "Duplicate GPA in region tree"); 740 741 cur = &(*cur)->rb_right; 742 } 743 } 744 745 rb_link_node(®ion->gpa_node, parent, cur); 746 rb_insert_color(®ion->gpa_node, gpa_tree); 747 } 748 749 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 750 struct userspace_mem_region *region) 751 { 752 struct rb_node **cur, *parent; 753 754 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 755 struct userspace_mem_region *cregion; 756 757 cregion = container_of(*cur, typeof(*cregion), hva_node); 758 parent = *cur; 759 if (region->host_mem < cregion->host_mem) 760 cur = &(*cur)->rb_left; 761 else { 762 TEST_ASSERT(region->host_mem != 763 cregion->host_mem, 764 "Duplicate HVA in region tree"); 765 766 cur = &(*cur)->rb_right; 767 } 768 } 769 770 rb_link_node(®ion->hva_node, parent, cur); 771 rb_insert_color(®ion->hva_node, hva_tree); 772 } 773 774 775 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 776 uint64_t gpa, uint64_t size, void *hva) 777 { 778 struct kvm_userspace_memory_region region = { 779 .slot = slot, 780 .flags = flags, 781 .guest_phys_addr = gpa, 782 .memory_size = size, 783 .userspace_addr = (uintptr_t)hva, 784 }; 785 786 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 787 } 788 789 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 790 uint64_t gpa, uint64_t size, void *hva) 791 { 792 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 793 794 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 795 errno, strerror(errno)); 796 } 797 798 /* 799 * VM Userspace Memory Region Add 800 * 801 * Input Args: 802 * vm - Virtual Machine 803 * src_type - Storage source for this region. 804 * NULL to use anonymous memory. 805 * guest_paddr - Starting guest physical address 806 * slot - KVM region slot 807 * npages - Number of physical pages 808 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 809 * 810 * Output Args: None 811 * 812 * Return: None 813 * 814 * Allocates a memory area of the number of pages specified by npages 815 * and maps it to the VM specified by vm, at a starting physical address 816 * given by guest_paddr. The region is created with a KVM region slot 817 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 818 * region is created with the flags given by flags. 819 */ 820 void vm_userspace_mem_region_add(struct kvm_vm *vm, 821 enum vm_mem_backing_src_type src_type, 822 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 823 uint32_t flags) 824 { 825 int ret; 826 struct userspace_mem_region *region; 827 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 828 size_t alignment; 829 830 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 831 "Number of guest pages is not compatible with the host. " 832 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 833 834 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 835 "address not on a page boundary.\n" 836 " guest_paddr: 0x%lx vm->page_size: 0x%x", 837 guest_paddr, vm->page_size); 838 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 839 <= vm->max_gfn, "Physical range beyond maximum " 840 "supported physical address,\n" 841 " guest_paddr: 0x%lx npages: 0x%lx\n" 842 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 843 guest_paddr, npages, vm->max_gfn, vm->page_size); 844 845 /* 846 * Confirm a mem region with an overlapping address doesn't 847 * already exist. 848 */ 849 region = (struct userspace_mem_region *) userspace_mem_region_find( 850 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 851 if (region != NULL) 852 TEST_FAIL("overlapping userspace_mem_region already " 853 "exists\n" 854 " requested guest_paddr: 0x%lx npages: 0x%lx " 855 "page_size: 0x%x\n" 856 " existing guest_paddr: 0x%lx size: 0x%lx", 857 guest_paddr, npages, vm->page_size, 858 (uint64_t) region->region.guest_phys_addr, 859 (uint64_t) region->region.memory_size); 860 861 /* Confirm no region with the requested slot already exists. */ 862 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 863 slot) { 864 if (region->region.slot != slot) 865 continue; 866 867 TEST_FAIL("A mem region with the requested slot " 868 "already exists.\n" 869 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 870 " existing slot: %u paddr: 0x%lx size: 0x%lx", 871 slot, guest_paddr, npages, 872 region->region.slot, 873 (uint64_t) region->region.guest_phys_addr, 874 (uint64_t) region->region.memory_size); 875 } 876 877 /* Allocate and initialize new mem region structure. */ 878 region = calloc(1, sizeof(*region)); 879 TEST_ASSERT(region != NULL, "Insufficient Memory"); 880 region->mmap_size = npages * vm->page_size; 881 882 #ifdef __s390x__ 883 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 884 alignment = 0x100000; 885 #else 886 alignment = 1; 887 #endif 888 889 /* 890 * When using THP mmap is not guaranteed to returned a hugepage aligned 891 * address so we have to pad the mmap. Padding is not needed for HugeTLB 892 * because mmap will always return an address aligned to the HugeTLB 893 * page size. 894 */ 895 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 896 alignment = max(backing_src_pagesz, alignment); 897 898 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 899 900 /* Add enough memory to align up if necessary */ 901 if (alignment > 1) 902 region->mmap_size += alignment; 903 904 region->fd = -1; 905 if (backing_src_is_shared(src_type)) 906 region->fd = kvm_memfd_alloc(region->mmap_size, 907 src_type == VM_MEM_SRC_SHARED_HUGETLB); 908 909 region->mmap_start = mmap(NULL, region->mmap_size, 910 PROT_READ | PROT_WRITE, 911 vm_mem_backing_src_alias(src_type)->flag, 912 region->fd, 0); 913 TEST_ASSERT(region->mmap_start != MAP_FAILED, 914 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 915 916 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 917 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 918 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 919 region->mmap_start, backing_src_pagesz); 920 921 /* Align host address */ 922 region->host_mem = align_ptr_up(region->mmap_start, alignment); 923 924 /* As needed perform madvise */ 925 if ((src_type == VM_MEM_SRC_ANONYMOUS || 926 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 927 ret = madvise(region->host_mem, npages * vm->page_size, 928 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 929 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 930 region->host_mem, npages * vm->page_size, 931 vm_mem_backing_src_alias(src_type)->name); 932 } 933 934 region->backing_src_type = src_type; 935 region->unused_phy_pages = sparsebit_alloc(); 936 sparsebit_set_num(region->unused_phy_pages, 937 guest_paddr >> vm->page_shift, npages); 938 region->region.slot = slot; 939 region->region.flags = flags; 940 region->region.guest_phys_addr = guest_paddr; 941 region->region.memory_size = npages * vm->page_size; 942 region->region.userspace_addr = (uintptr_t) region->host_mem; 943 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 944 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 945 " rc: %i errno: %i\n" 946 " slot: %u flags: 0x%x\n" 947 " guest_phys_addr: 0x%lx size: 0x%lx", 948 ret, errno, slot, flags, 949 guest_paddr, (uint64_t) region->region.memory_size); 950 951 /* Add to quick lookup data structures */ 952 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 953 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 954 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 955 956 /* If shared memory, create an alias. */ 957 if (region->fd >= 0) { 958 region->mmap_alias = mmap(NULL, region->mmap_size, 959 PROT_READ | PROT_WRITE, 960 vm_mem_backing_src_alias(src_type)->flag, 961 region->fd, 0); 962 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 963 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 964 965 /* Align host alias address */ 966 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 967 } 968 } 969 970 /* 971 * Memslot to region 972 * 973 * Input Args: 974 * vm - Virtual Machine 975 * memslot - KVM memory slot ID 976 * 977 * Output Args: None 978 * 979 * Return: 980 * Pointer to memory region structure that describe memory region 981 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 982 * on error (e.g. currently no memory region using memslot as a KVM 983 * memory slot ID). 984 */ 985 struct userspace_mem_region * 986 memslot2region(struct kvm_vm *vm, uint32_t memslot) 987 { 988 struct userspace_mem_region *region; 989 990 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 991 memslot) 992 if (region->region.slot == memslot) 993 return region; 994 995 fprintf(stderr, "No mem region with the requested slot found,\n" 996 " requested slot: %u\n", memslot); 997 fputs("---- vm dump ----\n", stderr); 998 vm_dump(stderr, vm, 2); 999 TEST_FAIL("Mem region not found"); 1000 return NULL; 1001 } 1002 1003 /* 1004 * VM Memory Region Flags Set 1005 * 1006 * Input Args: 1007 * vm - Virtual Machine 1008 * flags - Starting guest physical address 1009 * 1010 * Output Args: None 1011 * 1012 * Return: None 1013 * 1014 * Sets the flags of the memory region specified by the value of slot, 1015 * to the values given by flags. 1016 */ 1017 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1018 { 1019 int ret; 1020 struct userspace_mem_region *region; 1021 1022 region = memslot2region(vm, slot); 1023 1024 region->region.flags = flags; 1025 1026 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1027 1028 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1029 " rc: %i errno: %i slot: %u flags: 0x%x", 1030 ret, errno, slot, flags); 1031 } 1032 1033 /* 1034 * VM Memory Region Move 1035 * 1036 * Input Args: 1037 * vm - Virtual Machine 1038 * slot - Slot of the memory region to move 1039 * new_gpa - Starting guest physical address 1040 * 1041 * Output Args: None 1042 * 1043 * Return: None 1044 * 1045 * Change the gpa of a memory region. 1046 */ 1047 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1048 { 1049 struct userspace_mem_region *region; 1050 int ret; 1051 1052 region = memslot2region(vm, slot); 1053 1054 region->region.guest_phys_addr = new_gpa; 1055 1056 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1057 1058 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1059 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1060 ret, errno, slot, new_gpa); 1061 } 1062 1063 /* 1064 * VM Memory Region Delete 1065 * 1066 * Input Args: 1067 * vm - Virtual Machine 1068 * slot - Slot of the memory region to delete 1069 * 1070 * Output Args: None 1071 * 1072 * Return: None 1073 * 1074 * Delete a memory region. 1075 */ 1076 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1077 { 1078 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1079 } 1080 1081 /* Returns the size of a vCPU's kvm_run structure. */ 1082 static int vcpu_mmap_sz(void) 1083 { 1084 int dev_fd, ret; 1085 1086 dev_fd = open_kvm_dev_path_or_exit(); 1087 1088 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1089 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1090 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1091 1092 close(dev_fd); 1093 1094 return ret; 1095 } 1096 1097 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1098 { 1099 struct kvm_vcpu *vcpu; 1100 1101 list_for_each_entry(vcpu, &vm->vcpus, list) { 1102 if (vcpu->id == vcpu_id) 1103 return true; 1104 } 1105 1106 return false; 1107 } 1108 1109 /* 1110 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1111 * No additional vCPU setup is done. Returns the vCPU. 1112 */ 1113 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1114 { 1115 struct kvm_vcpu *vcpu; 1116 1117 /* Confirm a vcpu with the specified id doesn't already exist. */ 1118 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1119 1120 /* Allocate and initialize new vcpu structure. */ 1121 vcpu = calloc(1, sizeof(*vcpu)); 1122 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1123 1124 vcpu->vm = vm; 1125 vcpu->id = vcpu_id; 1126 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1127 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1128 1129 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1130 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1131 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1132 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1133 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1134 TEST_ASSERT(vcpu->run != MAP_FAILED, 1135 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1136 1137 /* Add to linked-list of VCPUs. */ 1138 list_add(&vcpu->list, &vm->vcpus); 1139 1140 return vcpu; 1141 } 1142 1143 /* 1144 * VM Virtual Address Unused Gap 1145 * 1146 * Input Args: 1147 * vm - Virtual Machine 1148 * sz - Size (bytes) 1149 * vaddr_min - Minimum Virtual Address 1150 * 1151 * Output Args: None 1152 * 1153 * Return: 1154 * Lowest virtual address at or below vaddr_min, with at least 1155 * sz unused bytes. TEST_ASSERT failure if no area of at least 1156 * size sz is available. 1157 * 1158 * Within the VM specified by vm, locates the lowest starting virtual 1159 * address >= vaddr_min, that has at least sz unallocated bytes. A 1160 * TEST_ASSERT failure occurs for invalid input or no area of at least 1161 * sz unallocated bytes >= vaddr_min is available. 1162 */ 1163 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1164 vm_vaddr_t vaddr_min) 1165 { 1166 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1167 1168 /* Determine lowest permitted virtual page index. */ 1169 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1170 if ((pgidx_start * vm->page_size) < vaddr_min) 1171 goto no_va_found; 1172 1173 /* Loop over section with enough valid virtual page indexes. */ 1174 if (!sparsebit_is_set_num(vm->vpages_valid, 1175 pgidx_start, pages)) 1176 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1177 pgidx_start, pages); 1178 do { 1179 /* 1180 * Are there enough unused virtual pages available at 1181 * the currently proposed starting virtual page index. 1182 * If not, adjust proposed starting index to next 1183 * possible. 1184 */ 1185 if (sparsebit_is_clear_num(vm->vpages_mapped, 1186 pgidx_start, pages)) 1187 goto va_found; 1188 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1189 pgidx_start, pages); 1190 if (pgidx_start == 0) 1191 goto no_va_found; 1192 1193 /* 1194 * If needed, adjust proposed starting virtual address, 1195 * to next range of valid virtual addresses. 1196 */ 1197 if (!sparsebit_is_set_num(vm->vpages_valid, 1198 pgidx_start, pages)) { 1199 pgidx_start = sparsebit_next_set_num( 1200 vm->vpages_valid, pgidx_start, pages); 1201 if (pgidx_start == 0) 1202 goto no_va_found; 1203 } 1204 } while (pgidx_start != 0); 1205 1206 no_va_found: 1207 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1208 1209 /* NOT REACHED */ 1210 return -1; 1211 1212 va_found: 1213 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1214 pgidx_start, pages), 1215 "Unexpected, invalid virtual page index range,\n" 1216 " pgidx_start: 0x%lx\n" 1217 " pages: 0x%lx", 1218 pgidx_start, pages); 1219 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1220 pgidx_start, pages), 1221 "Unexpected, pages already mapped,\n" 1222 " pgidx_start: 0x%lx\n" 1223 " pages: 0x%lx", 1224 pgidx_start, pages); 1225 1226 return pgidx_start * vm->page_size; 1227 } 1228 1229 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1230 enum kvm_mem_region_type type) 1231 { 1232 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1233 1234 virt_pgd_alloc(vm); 1235 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1236 KVM_UTIL_MIN_PFN * vm->page_size, 1237 vm->memslots[type]); 1238 1239 /* 1240 * Find an unused range of virtual page addresses of at least 1241 * pages in length. 1242 */ 1243 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1244 1245 /* Map the virtual pages. */ 1246 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1247 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1248 1249 virt_pg_map(vm, vaddr, paddr); 1250 1251 sparsebit_set(vm->vpages_mapped, 1252 vaddr >> vm->page_shift); 1253 } 1254 1255 return vaddr_start; 1256 } 1257 1258 /* 1259 * VM Virtual Address Allocate 1260 * 1261 * Input Args: 1262 * vm - Virtual Machine 1263 * sz - Size in bytes 1264 * vaddr_min - Minimum starting virtual address 1265 * 1266 * Output Args: None 1267 * 1268 * Return: 1269 * Starting guest virtual address 1270 * 1271 * Allocates at least sz bytes within the virtual address space of the vm 1272 * given by vm. The allocated bytes are mapped to a virtual address >= 1273 * the address given by vaddr_min. Note that each allocation uses a 1274 * a unique set of pages, with the minimum real allocation being at least 1275 * a page. The allocated physical space comes from the TEST_DATA memory region. 1276 */ 1277 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1278 { 1279 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); 1280 } 1281 1282 /* 1283 * VM Virtual Address Allocate Pages 1284 * 1285 * Input Args: 1286 * vm - Virtual Machine 1287 * 1288 * Output Args: None 1289 * 1290 * Return: 1291 * Starting guest virtual address 1292 * 1293 * Allocates at least N system pages worth of bytes within the virtual address 1294 * space of the vm. 1295 */ 1296 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1297 { 1298 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1299 } 1300 1301 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) 1302 { 1303 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); 1304 } 1305 1306 /* 1307 * VM Virtual Address Allocate Page 1308 * 1309 * Input Args: 1310 * vm - Virtual Machine 1311 * 1312 * Output Args: None 1313 * 1314 * Return: 1315 * Starting guest virtual address 1316 * 1317 * Allocates at least one system page worth of bytes within the virtual address 1318 * space of the vm. 1319 */ 1320 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1321 { 1322 return vm_vaddr_alloc_pages(vm, 1); 1323 } 1324 1325 /* 1326 * Map a range of VM virtual address to the VM's physical address 1327 * 1328 * Input Args: 1329 * vm - Virtual Machine 1330 * vaddr - Virtuall address to map 1331 * paddr - VM Physical Address 1332 * npages - The number of pages to map 1333 * 1334 * Output Args: None 1335 * 1336 * Return: None 1337 * 1338 * Within the VM given by @vm, creates a virtual translation for 1339 * @npages starting at @vaddr to the page range starting at @paddr. 1340 */ 1341 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1342 unsigned int npages) 1343 { 1344 size_t page_size = vm->page_size; 1345 size_t size = npages * page_size; 1346 1347 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1348 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1349 1350 while (npages--) { 1351 virt_pg_map(vm, vaddr, paddr); 1352 vaddr += page_size; 1353 paddr += page_size; 1354 } 1355 } 1356 1357 /* 1358 * Address VM Physical to Host Virtual 1359 * 1360 * Input Args: 1361 * vm - Virtual Machine 1362 * gpa - VM physical address 1363 * 1364 * Output Args: None 1365 * 1366 * Return: 1367 * Equivalent host virtual address 1368 * 1369 * Locates the memory region containing the VM physical address given 1370 * by gpa, within the VM given by vm. When found, the host virtual 1371 * address providing the memory to the vm physical address is returned. 1372 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1373 */ 1374 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1375 { 1376 struct userspace_mem_region *region; 1377 1378 region = userspace_mem_region_find(vm, gpa, gpa); 1379 if (!region) { 1380 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1381 return NULL; 1382 } 1383 1384 return (void *)((uintptr_t)region->host_mem 1385 + (gpa - region->region.guest_phys_addr)); 1386 } 1387 1388 /* 1389 * Address Host Virtual to VM Physical 1390 * 1391 * Input Args: 1392 * vm - Virtual Machine 1393 * hva - Host virtual address 1394 * 1395 * Output Args: None 1396 * 1397 * Return: 1398 * Equivalent VM physical address 1399 * 1400 * Locates the memory region containing the host virtual address given 1401 * by hva, within the VM given by vm. When found, the equivalent 1402 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1403 * region containing hva exists. 1404 */ 1405 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1406 { 1407 struct rb_node *node; 1408 1409 for (node = vm->regions.hva_tree.rb_node; node; ) { 1410 struct userspace_mem_region *region = 1411 container_of(node, struct userspace_mem_region, hva_node); 1412 1413 if (hva >= region->host_mem) { 1414 if (hva <= (region->host_mem 1415 + region->region.memory_size - 1)) 1416 return (vm_paddr_t)((uintptr_t) 1417 region->region.guest_phys_addr 1418 + (hva - (uintptr_t)region->host_mem)); 1419 1420 node = node->rb_right; 1421 } else 1422 node = node->rb_left; 1423 } 1424 1425 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1426 return -1; 1427 } 1428 1429 /* 1430 * Address VM physical to Host Virtual *alias*. 1431 * 1432 * Input Args: 1433 * vm - Virtual Machine 1434 * gpa - VM physical address 1435 * 1436 * Output Args: None 1437 * 1438 * Return: 1439 * Equivalent address within the host virtual *alias* area, or NULL 1440 * (without failing the test) if the guest memory is not shared (so 1441 * no alias exists). 1442 * 1443 * Create a writable, shared virtual=>physical alias for the specific GPA. 1444 * The primary use case is to allow the host selftest to manipulate guest 1445 * memory without mapping said memory in the guest's address space. And, for 1446 * userfaultfd-based demand paging, to do so without triggering userfaults. 1447 */ 1448 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1449 { 1450 struct userspace_mem_region *region; 1451 uintptr_t offset; 1452 1453 region = userspace_mem_region_find(vm, gpa, gpa); 1454 if (!region) 1455 return NULL; 1456 1457 if (!region->host_alias) 1458 return NULL; 1459 1460 offset = gpa - region->region.guest_phys_addr; 1461 return (void *) ((uintptr_t) region->host_alias + offset); 1462 } 1463 1464 /* Create an interrupt controller chip for the specified VM. */ 1465 void vm_create_irqchip(struct kvm_vm *vm) 1466 { 1467 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1468 1469 vm->has_irqchip = true; 1470 } 1471 1472 int _vcpu_run(struct kvm_vcpu *vcpu) 1473 { 1474 int rc; 1475 1476 do { 1477 rc = __vcpu_run(vcpu); 1478 } while (rc == -1 && errno == EINTR); 1479 1480 assert_on_unhandled_exception(vcpu); 1481 1482 return rc; 1483 } 1484 1485 /* 1486 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1487 * Assert if the KVM returns an error (other than -EINTR). 1488 */ 1489 void vcpu_run(struct kvm_vcpu *vcpu) 1490 { 1491 int ret = _vcpu_run(vcpu); 1492 1493 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1494 } 1495 1496 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1497 { 1498 int ret; 1499 1500 vcpu->run->immediate_exit = 1; 1501 ret = __vcpu_run(vcpu); 1502 vcpu->run->immediate_exit = 0; 1503 1504 TEST_ASSERT(ret == -1 && errno == EINTR, 1505 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1506 ret, errno); 1507 } 1508 1509 /* 1510 * Get the list of guest registers which are supported for 1511 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1512 * it is the caller's responsibility to free the list. 1513 */ 1514 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1515 { 1516 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1517 int ret; 1518 1519 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1520 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1521 1522 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1523 reg_list->n = reg_list_n.n; 1524 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1525 return reg_list; 1526 } 1527 1528 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1529 { 1530 uint32_t page_size = vcpu->vm->page_size; 1531 uint32_t size = vcpu->vm->dirty_ring_size; 1532 1533 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1534 1535 if (!vcpu->dirty_gfns) { 1536 void *addr; 1537 1538 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1539 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1540 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1541 1542 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1543 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1544 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1545 1546 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1547 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1548 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1549 1550 vcpu->dirty_gfns = addr; 1551 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1552 } 1553 1554 return vcpu->dirty_gfns; 1555 } 1556 1557 /* 1558 * Device Ioctl 1559 */ 1560 1561 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1562 { 1563 struct kvm_device_attr attribute = { 1564 .group = group, 1565 .attr = attr, 1566 .flags = 0, 1567 }; 1568 1569 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1570 } 1571 1572 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1573 { 1574 struct kvm_create_device create_dev = { 1575 .type = type, 1576 .flags = KVM_CREATE_DEVICE_TEST, 1577 }; 1578 1579 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1580 } 1581 1582 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1583 { 1584 struct kvm_create_device create_dev = { 1585 .type = type, 1586 .fd = -1, 1587 .flags = 0, 1588 }; 1589 int err; 1590 1591 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1592 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1593 return err ? : create_dev.fd; 1594 } 1595 1596 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1597 { 1598 struct kvm_device_attr kvmattr = { 1599 .group = group, 1600 .attr = attr, 1601 .flags = 0, 1602 .addr = (uintptr_t)val, 1603 }; 1604 1605 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1606 } 1607 1608 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1609 { 1610 struct kvm_device_attr kvmattr = { 1611 .group = group, 1612 .attr = attr, 1613 .flags = 0, 1614 .addr = (uintptr_t)val, 1615 }; 1616 1617 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1618 } 1619 1620 /* 1621 * IRQ related functions. 1622 */ 1623 1624 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1625 { 1626 struct kvm_irq_level irq_level = { 1627 .irq = irq, 1628 .level = level, 1629 }; 1630 1631 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1632 } 1633 1634 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1635 { 1636 int ret = _kvm_irq_line(vm, irq, level); 1637 1638 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1639 } 1640 1641 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1642 { 1643 struct kvm_irq_routing *routing; 1644 size_t size; 1645 1646 size = sizeof(struct kvm_irq_routing); 1647 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1648 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1649 routing = calloc(1, size); 1650 assert(routing); 1651 1652 return routing; 1653 } 1654 1655 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1656 uint32_t gsi, uint32_t pin) 1657 { 1658 int i; 1659 1660 assert(routing); 1661 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1662 1663 i = routing->nr; 1664 routing->entries[i].gsi = gsi; 1665 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1666 routing->entries[i].flags = 0; 1667 routing->entries[i].u.irqchip.irqchip = 0; 1668 routing->entries[i].u.irqchip.pin = pin; 1669 routing->nr++; 1670 } 1671 1672 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1673 { 1674 int ret; 1675 1676 assert(routing); 1677 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1678 free(routing); 1679 1680 return ret; 1681 } 1682 1683 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1684 { 1685 int ret; 1686 1687 ret = _kvm_gsi_routing_write(vm, routing); 1688 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1689 } 1690 1691 /* 1692 * VM Dump 1693 * 1694 * Input Args: 1695 * vm - Virtual Machine 1696 * indent - Left margin indent amount 1697 * 1698 * Output Args: 1699 * stream - Output FILE stream 1700 * 1701 * Return: None 1702 * 1703 * Dumps the current state of the VM given by vm, to the FILE stream 1704 * given by stream. 1705 */ 1706 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1707 { 1708 int ctr; 1709 struct userspace_mem_region *region; 1710 struct kvm_vcpu *vcpu; 1711 1712 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1713 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1714 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1715 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1716 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1717 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1718 "host_virt: %p\n", indent + 2, "", 1719 (uint64_t) region->region.guest_phys_addr, 1720 (uint64_t) region->region.memory_size, 1721 region->host_mem); 1722 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1723 sparsebit_dump(stream, region->unused_phy_pages, 0); 1724 } 1725 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1726 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1727 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1728 vm->pgd_created); 1729 if (vm->pgd_created) { 1730 fprintf(stream, "%*sVirtual Translation Tables:\n", 1731 indent + 2, ""); 1732 virt_dump(stream, vm, indent + 4); 1733 } 1734 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1735 1736 list_for_each_entry(vcpu, &vm->vcpus, list) 1737 vcpu_dump(stream, vcpu, indent + 2); 1738 } 1739 1740 /* Known KVM exit reasons */ 1741 static struct exit_reason { 1742 unsigned int reason; 1743 const char *name; 1744 } exit_reasons_known[] = { 1745 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1746 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1747 {KVM_EXIT_IO, "IO"}, 1748 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1749 {KVM_EXIT_DEBUG, "DEBUG"}, 1750 {KVM_EXIT_HLT, "HLT"}, 1751 {KVM_EXIT_MMIO, "MMIO"}, 1752 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1753 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1754 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1755 {KVM_EXIT_INTR, "INTR"}, 1756 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1757 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1758 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1759 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1760 {KVM_EXIT_DCR, "DCR"}, 1761 {KVM_EXIT_NMI, "NMI"}, 1762 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1763 {KVM_EXIT_OSI, "OSI"}, 1764 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1765 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1766 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1767 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1768 {KVM_EXIT_XEN, "XEN"}, 1769 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1770 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1771 #endif 1772 }; 1773 1774 /* 1775 * Exit Reason String 1776 * 1777 * Input Args: 1778 * exit_reason - Exit reason 1779 * 1780 * Output Args: None 1781 * 1782 * Return: 1783 * Constant string pointer describing the exit reason. 1784 * 1785 * Locates and returns a constant string that describes the KVM exit 1786 * reason given by exit_reason. If no such string is found, a constant 1787 * string of "Unknown" is returned. 1788 */ 1789 const char *exit_reason_str(unsigned int exit_reason) 1790 { 1791 unsigned int n1; 1792 1793 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1794 if (exit_reason == exit_reasons_known[n1].reason) 1795 return exit_reasons_known[n1].name; 1796 } 1797 1798 return "Unknown"; 1799 } 1800 1801 /* 1802 * Physical Contiguous Page Allocator 1803 * 1804 * Input Args: 1805 * vm - Virtual Machine 1806 * num - number of pages 1807 * paddr_min - Physical address minimum 1808 * memslot - Memory region to allocate page from 1809 * 1810 * Output Args: None 1811 * 1812 * Return: 1813 * Starting physical address 1814 * 1815 * Within the VM specified by vm, locates a range of available physical 1816 * pages at or above paddr_min. If found, the pages are marked as in use 1817 * and their base address is returned. A TEST_ASSERT failure occurs if 1818 * not enough pages are available at or above paddr_min. 1819 */ 1820 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1821 vm_paddr_t paddr_min, uint32_t memslot) 1822 { 1823 struct userspace_mem_region *region; 1824 sparsebit_idx_t pg, base; 1825 1826 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1827 1828 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1829 "not divisible by page size.\n" 1830 " paddr_min: 0x%lx page_size: 0x%x", 1831 paddr_min, vm->page_size); 1832 1833 region = memslot2region(vm, memslot); 1834 base = pg = paddr_min >> vm->page_shift; 1835 1836 do { 1837 for (; pg < base + num; ++pg) { 1838 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1839 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1840 break; 1841 } 1842 } 1843 } while (pg && pg != base + num); 1844 1845 if (pg == 0) { 1846 fprintf(stderr, "No guest physical page available, " 1847 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1848 paddr_min, vm->page_size, memslot); 1849 fputs("---- vm dump ----\n", stderr); 1850 vm_dump(stderr, vm, 2); 1851 abort(); 1852 } 1853 1854 for (pg = base; pg < base + num; ++pg) 1855 sparsebit_clear(region->unused_phy_pages, pg); 1856 1857 return base * vm->page_size; 1858 } 1859 1860 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1861 uint32_t memslot) 1862 { 1863 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1864 } 1865 1866 /* Arbitrary minimum physical address used for virtual translation tables. */ 1867 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1868 1869 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1870 { 1871 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 1872 vm->memslots[MEM_REGION_PT]); 1873 } 1874 1875 /* 1876 * Address Guest Virtual to Host Virtual 1877 * 1878 * Input Args: 1879 * vm - Virtual Machine 1880 * gva - VM virtual address 1881 * 1882 * Output Args: None 1883 * 1884 * Return: 1885 * Equivalent host virtual address 1886 */ 1887 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1888 { 1889 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1890 } 1891 1892 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1893 { 1894 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1895 } 1896 1897 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1898 unsigned int page_shift, 1899 unsigned int new_page_shift, 1900 bool ceil) 1901 { 1902 unsigned int n = 1 << (new_page_shift - page_shift); 1903 1904 if (page_shift >= new_page_shift) 1905 return num_pages * (1 << (page_shift - new_page_shift)); 1906 1907 return num_pages / n + !!(ceil && num_pages % n); 1908 } 1909 1910 static inline int getpageshift(void) 1911 { 1912 return __builtin_ffs(getpagesize()) - 1; 1913 } 1914 1915 unsigned int 1916 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1917 { 1918 return vm_calc_num_pages(num_guest_pages, 1919 vm_guest_mode_params[mode].page_shift, 1920 getpageshift(), true); 1921 } 1922 1923 unsigned int 1924 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1925 { 1926 return vm_calc_num_pages(num_host_pages, getpageshift(), 1927 vm_guest_mode_params[mode].page_shift, false); 1928 } 1929 1930 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1931 { 1932 unsigned int n; 1933 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1934 return vm_adjust_num_guest_pages(mode, n); 1935 } 1936 1937 /* 1938 * Read binary stats descriptors 1939 * 1940 * Input Args: 1941 * stats_fd - the file descriptor for the binary stats file from which to read 1942 * header - the binary stats metadata header corresponding to the given FD 1943 * 1944 * Output Args: None 1945 * 1946 * Return: 1947 * A pointer to a newly allocated series of stat descriptors. 1948 * Caller is responsible for freeing the returned kvm_stats_desc. 1949 * 1950 * Read the stats descriptors from the binary stats interface. 1951 */ 1952 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1953 struct kvm_stats_header *header) 1954 { 1955 struct kvm_stats_desc *stats_desc; 1956 ssize_t desc_size, total_size, ret; 1957 1958 desc_size = get_stats_descriptor_size(header); 1959 total_size = header->num_desc * desc_size; 1960 1961 stats_desc = calloc(header->num_desc, desc_size); 1962 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 1963 1964 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 1965 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 1966 1967 return stats_desc; 1968 } 1969 1970 /* 1971 * Read stat data for a particular stat 1972 * 1973 * Input Args: 1974 * stats_fd - the file descriptor for the binary stats file from which to read 1975 * header - the binary stats metadata header corresponding to the given FD 1976 * desc - the binary stat metadata for the particular stat to be read 1977 * max_elements - the maximum number of 8-byte values to read into data 1978 * 1979 * Output Args: 1980 * data - the buffer into which stat data should be read 1981 * 1982 * Read the data values of a specified stat from the binary stats interface. 1983 */ 1984 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 1985 struct kvm_stats_desc *desc, uint64_t *data, 1986 size_t max_elements) 1987 { 1988 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 1989 size_t size = nr_elements * sizeof(*data); 1990 ssize_t ret; 1991 1992 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 1993 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 1994 1995 ret = pread(stats_fd, data, size, 1996 header->data_offset + desc->offset); 1997 1998 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 1999 desc->name, errno, strerror(errno)); 2000 TEST_ASSERT(ret == size, 2001 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2002 desc->name, size, ret); 2003 } 2004 2005 /* 2006 * Read the data of the named stat 2007 * 2008 * Input Args: 2009 * vm - the VM for which the stat should be read 2010 * stat_name - the name of the stat to read 2011 * max_elements - the maximum number of 8-byte values to read into data 2012 * 2013 * Output Args: 2014 * data - the buffer into which stat data should be read 2015 * 2016 * Read the data values of a specified stat from the binary stats interface. 2017 */ 2018 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2019 size_t max_elements) 2020 { 2021 struct kvm_stats_desc *desc; 2022 size_t size_desc; 2023 int i; 2024 2025 if (!vm->stats_fd) { 2026 vm->stats_fd = vm_get_stats_fd(vm); 2027 read_stats_header(vm->stats_fd, &vm->stats_header); 2028 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2029 &vm->stats_header); 2030 } 2031 2032 size_desc = get_stats_descriptor_size(&vm->stats_header); 2033 2034 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2035 desc = (void *)vm->stats_desc + (i * size_desc); 2036 2037 if (strcmp(desc->name, stat_name)) 2038 continue; 2039 2040 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2041 data, max_elements); 2042 2043 break; 2044 } 2045 } 2046