1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_intel_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm_intel", param); 86 } 87 88 bool get_kvm_amd_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_amd", param); 91 } 92 93 /* 94 * Capability 95 * 96 * Input Args: 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: 102 * On success, the Value corresponding to the capability (KVM_CAP_*) 103 * specified by the value of cap. On failure a TEST_ASSERT failure 104 * is produced. 105 * 106 * Looks up and returns the value corresponding to the capability 107 * (KVM_CAP_*) given by cap. 108 */ 109 unsigned int kvm_check_cap(long cap) 110 { 111 int ret; 112 int kvm_fd; 113 114 kvm_fd = open_kvm_dev_path_or_exit(); 115 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 116 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 117 118 close(kvm_fd); 119 120 return (unsigned int)ret; 121 } 122 123 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 124 { 125 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 126 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 127 else 128 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 129 vm->dirty_ring_size = ring_size; 130 } 131 132 static void vm_open(struct kvm_vm *vm) 133 { 134 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 135 136 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 137 138 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 139 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 140 } 141 142 const char *vm_guest_mode_string(uint32_t i) 143 { 144 static const char * const strings[] = { 145 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 146 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 147 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 148 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 149 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 150 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 151 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 152 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 153 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 154 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 155 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 156 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 157 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 158 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 159 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 160 }; 161 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 162 "Missing new mode strings?"); 163 164 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 165 166 return strings[i]; 167 } 168 169 const struct vm_guest_mode_params vm_guest_mode_params[] = { 170 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 171 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 172 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 173 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 174 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 175 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 176 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 177 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 178 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 179 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 180 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 181 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 182 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 183 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 184 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 185 }; 186 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 187 "Missing new mode params?"); 188 189 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) 190 { 191 struct kvm_vm *vm; 192 193 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 194 vm_guest_mode_string(mode), nr_pages); 195 196 vm = calloc(1, sizeof(*vm)); 197 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 198 199 INIT_LIST_HEAD(&vm->vcpus); 200 vm->regions.gpa_tree = RB_ROOT; 201 vm->regions.hva_tree = RB_ROOT; 202 hash_init(vm->regions.slot_hash); 203 204 vm->mode = mode; 205 vm->type = 0; 206 207 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 208 vm->va_bits = vm_guest_mode_params[mode].va_bits; 209 vm->page_size = vm_guest_mode_params[mode].page_size; 210 vm->page_shift = vm_guest_mode_params[mode].page_shift; 211 212 /* Setup mode specific traits. */ 213 switch (vm->mode) { 214 case VM_MODE_P52V48_4K: 215 vm->pgtable_levels = 4; 216 break; 217 case VM_MODE_P52V48_64K: 218 vm->pgtable_levels = 3; 219 break; 220 case VM_MODE_P48V48_4K: 221 vm->pgtable_levels = 4; 222 break; 223 case VM_MODE_P48V48_64K: 224 vm->pgtable_levels = 3; 225 break; 226 case VM_MODE_P40V48_4K: 227 case VM_MODE_P36V48_4K: 228 vm->pgtable_levels = 4; 229 break; 230 case VM_MODE_P40V48_64K: 231 case VM_MODE_P36V48_64K: 232 vm->pgtable_levels = 3; 233 break; 234 case VM_MODE_P48V48_16K: 235 case VM_MODE_P40V48_16K: 236 case VM_MODE_P36V48_16K: 237 vm->pgtable_levels = 4; 238 break; 239 case VM_MODE_P36V47_16K: 240 vm->pgtable_levels = 3; 241 break; 242 case VM_MODE_PXXV48_4K: 243 #ifdef __x86_64__ 244 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 245 /* 246 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 247 * it doesn't take effect unless a CR4.LA57 is set, which it 248 * isn't for this VM_MODE. 249 */ 250 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 251 "Linear address width (%d bits) not supported", 252 vm->va_bits); 253 pr_debug("Guest physical address width detected: %d\n", 254 vm->pa_bits); 255 vm->pgtable_levels = 4; 256 vm->va_bits = 48; 257 #else 258 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 259 #endif 260 break; 261 case VM_MODE_P47V64_4K: 262 vm->pgtable_levels = 5; 263 break; 264 case VM_MODE_P44V64_4K: 265 vm->pgtable_levels = 5; 266 break; 267 default: 268 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 269 } 270 271 #ifdef __aarch64__ 272 if (vm->pa_bits != 40) 273 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 274 #endif 275 276 vm_open(vm); 277 278 /* Limit to VA-bit canonical virtual addresses. */ 279 vm->vpages_valid = sparsebit_alloc(); 280 sparsebit_set_num(vm->vpages_valid, 281 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 282 sparsebit_set_num(vm->vpages_valid, 283 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 284 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 285 286 /* Limit physical addresses to PA-bits. */ 287 vm->max_gfn = vm_compute_max_gfn(vm); 288 289 /* Allocate and setup memory for guest. */ 290 vm->vpages_mapped = sparsebit_alloc(); 291 if (nr_pages != 0) 292 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 293 0, 0, nr_pages, 0); 294 295 return vm; 296 } 297 298 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 299 uint32_t nr_runnable_vcpus, 300 uint64_t extra_mem_pages) 301 { 302 uint64_t nr_pages; 303 304 TEST_ASSERT(nr_runnable_vcpus, 305 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 306 307 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 308 "nr_vcpus = %d too large for host, max-vcpus = %d", 309 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 310 311 /* 312 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 313 * test code and other per-VM assets that will be loaded into memslot0. 314 */ 315 nr_pages = 512; 316 317 /* Account for the per-vCPU stacks on behalf of the test. */ 318 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 319 320 /* 321 * Account for the number of pages needed for the page tables. The 322 * maximum page table size for a memory region will be when the 323 * smallest page size is used. Considering each page contains x page 324 * table descriptors, the total extra size for page tables (for extra 325 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 326 * than N/x*2. 327 */ 328 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 329 330 return vm_adjust_num_guest_pages(mode, nr_pages); 331 } 332 333 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 334 uint64_t nr_extra_pages) 335 { 336 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 337 nr_extra_pages); 338 struct kvm_vm *vm; 339 340 vm = ____vm_create(mode, nr_pages); 341 342 kvm_vm_elf_load(vm, program_invocation_name); 343 344 #ifdef __x86_64__ 345 vm_create_irqchip(vm); 346 #endif 347 return vm; 348 } 349 350 /* 351 * VM Create with customized parameters 352 * 353 * Input Args: 354 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 355 * nr_vcpus - VCPU count 356 * extra_mem_pages - Non-slot0 physical memory total size 357 * guest_code - Guest entry point 358 * vcpuids - VCPU IDs 359 * 360 * Output Args: None 361 * 362 * Return: 363 * Pointer to opaque structure that describes the created VM. 364 * 365 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 366 * extra_mem_pages is only used to calculate the maximum page table size, 367 * no real memory allocation for non-slot0 memory in this function. 368 */ 369 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 370 uint64_t extra_mem_pages, 371 void *guest_code, struct kvm_vcpu *vcpus[]) 372 { 373 struct kvm_vm *vm; 374 int i; 375 376 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 377 378 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 379 380 for (i = 0; i < nr_vcpus; ++i) 381 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 382 383 return vm; 384 } 385 386 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 387 uint64_t extra_mem_pages, 388 void *guest_code) 389 { 390 struct kvm_vcpu *vcpus[1]; 391 struct kvm_vm *vm; 392 393 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 394 guest_code, vcpus); 395 396 *vcpu = vcpus[0]; 397 return vm; 398 } 399 400 /* 401 * VM Restart 402 * 403 * Input Args: 404 * vm - VM that has been released before 405 * 406 * Output Args: None 407 * 408 * Reopens the file descriptors associated to the VM and reinstates the 409 * global state, such as the irqchip and the memory regions that are mapped 410 * into the guest. 411 */ 412 void kvm_vm_restart(struct kvm_vm *vmp) 413 { 414 int ctr; 415 struct userspace_mem_region *region; 416 417 vm_open(vmp); 418 if (vmp->has_irqchip) 419 vm_create_irqchip(vmp); 420 421 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 422 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 423 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 424 " rc: %i errno: %i\n" 425 " slot: %u flags: 0x%x\n" 426 " guest_phys_addr: 0x%llx size: 0x%llx", 427 ret, errno, region->region.slot, 428 region->region.flags, 429 region->region.guest_phys_addr, 430 region->region.memory_size); 431 } 432 } 433 434 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 435 uint32_t vcpu_id) 436 { 437 return __vm_vcpu_add(vm, vcpu_id); 438 } 439 440 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 441 { 442 kvm_vm_restart(vm); 443 444 return vm_vcpu_recreate(vm, 0); 445 } 446 447 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 448 { 449 cpu_set_t mask; 450 int r; 451 452 CPU_ZERO(&mask); 453 CPU_SET(pcpu, &mask); 454 r = sched_setaffinity(0, sizeof(mask), &mask); 455 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 456 } 457 458 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 459 { 460 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 461 462 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 463 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 464 return pcpu; 465 } 466 467 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 468 int nr_vcpus) 469 { 470 cpu_set_t allowed_mask; 471 char *cpu, *cpu_list; 472 char delim[2] = ","; 473 int i, r; 474 475 cpu_list = strdup(pcpus_string); 476 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 477 478 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 479 TEST_ASSERT(!r, "sched_getaffinity() failed"); 480 481 cpu = strtok(cpu_list, delim); 482 483 /* 1. Get all pcpus for vcpus. */ 484 for (i = 0; i < nr_vcpus; i++) { 485 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 486 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 487 cpu = strtok(NULL, delim); 488 } 489 490 /* 2. Check if the main worker needs to be pinned. */ 491 if (cpu) { 492 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 493 cpu = strtok(NULL, delim); 494 } 495 496 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 497 free(cpu_list); 498 } 499 500 /* 501 * Userspace Memory Region Find 502 * 503 * Input Args: 504 * vm - Virtual Machine 505 * start - Starting VM physical address 506 * end - Ending VM physical address, inclusive. 507 * 508 * Output Args: None 509 * 510 * Return: 511 * Pointer to overlapping region, NULL if no such region. 512 * 513 * Searches for a region with any physical memory that overlaps with 514 * any portion of the guest physical addresses from start to end 515 * inclusive. If multiple overlapping regions exist, a pointer to any 516 * of the regions is returned. Null is returned only when no overlapping 517 * region exists. 518 */ 519 static struct userspace_mem_region * 520 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 521 { 522 struct rb_node *node; 523 524 for (node = vm->regions.gpa_tree.rb_node; node; ) { 525 struct userspace_mem_region *region = 526 container_of(node, struct userspace_mem_region, gpa_node); 527 uint64_t existing_start = region->region.guest_phys_addr; 528 uint64_t existing_end = region->region.guest_phys_addr 529 + region->region.memory_size - 1; 530 if (start <= existing_end && end >= existing_start) 531 return region; 532 533 if (start < existing_start) 534 node = node->rb_left; 535 else 536 node = node->rb_right; 537 } 538 539 return NULL; 540 } 541 542 /* 543 * KVM Userspace Memory Region Find 544 * 545 * Input Args: 546 * vm - Virtual Machine 547 * start - Starting VM physical address 548 * end - Ending VM physical address, inclusive. 549 * 550 * Output Args: None 551 * 552 * Return: 553 * Pointer to overlapping region, NULL if no such region. 554 * 555 * Public interface to userspace_mem_region_find. Allows tests to look up 556 * the memslot datastructure for a given range of guest physical memory. 557 */ 558 struct kvm_userspace_memory_region * 559 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 560 uint64_t end) 561 { 562 struct userspace_mem_region *region; 563 564 region = userspace_mem_region_find(vm, start, end); 565 if (!region) 566 return NULL; 567 568 return ®ion->region; 569 } 570 571 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 572 { 573 574 } 575 576 /* 577 * VM VCPU Remove 578 * 579 * Input Args: 580 * vcpu - VCPU to remove 581 * 582 * Output Args: None 583 * 584 * Return: None, TEST_ASSERT failures for all error conditions 585 * 586 * Removes a vCPU from a VM and frees its resources. 587 */ 588 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 589 { 590 int ret; 591 592 if (vcpu->dirty_gfns) { 593 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 594 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 595 vcpu->dirty_gfns = NULL; 596 } 597 598 ret = munmap(vcpu->run, vcpu_mmap_sz()); 599 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 600 601 ret = close(vcpu->fd); 602 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 603 604 list_del(&vcpu->list); 605 606 vcpu_arch_free(vcpu); 607 free(vcpu); 608 } 609 610 void kvm_vm_release(struct kvm_vm *vmp) 611 { 612 struct kvm_vcpu *vcpu, *tmp; 613 int ret; 614 615 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 616 vm_vcpu_rm(vmp, vcpu); 617 618 ret = close(vmp->fd); 619 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 620 621 ret = close(vmp->kvm_fd); 622 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 623 } 624 625 static void __vm_mem_region_delete(struct kvm_vm *vm, 626 struct userspace_mem_region *region, 627 bool unlink) 628 { 629 int ret; 630 631 if (unlink) { 632 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 633 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 634 hash_del(®ion->slot_node); 635 } 636 637 region->region.memory_size = 0; 638 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 639 640 sparsebit_free(®ion->unused_phy_pages); 641 ret = munmap(region->mmap_start, region->mmap_size); 642 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 643 644 free(region); 645 } 646 647 /* 648 * Destroys and frees the VM pointed to by vmp. 649 */ 650 void kvm_vm_free(struct kvm_vm *vmp) 651 { 652 int ctr; 653 struct hlist_node *node; 654 struct userspace_mem_region *region; 655 656 if (vmp == NULL) 657 return; 658 659 /* Free cached stats metadata and close FD */ 660 if (vmp->stats_fd) { 661 free(vmp->stats_desc); 662 close(vmp->stats_fd); 663 } 664 665 /* Free userspace_mem_regions. */ 666 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 667 __vm_mem_region_delete(vmp, region, false); 668 669 /* Free sparsebit arrays. */ 670 sparsebit_free(&vmp->vpages_valid); 671 sparsebit_free(&vmp->vpages_mapped); 672 673 kvm_vm_release(vmp); 674 675 /* Free the structure describing the VM. */ 676 free(vmp); 677 } 678 679 int kvm_memfd_alloc(size_t size, bool hugepages) 680 { 681 int memfd_flags = MFD_CLOEXEC; 682 int fd, r; 683 684 if (hugepages) 685 memfd_flags |= MFD_HUGETLB; 686 687 fd = memfd_create("kvm_selftest", memfd_flags); 688 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 689 690 r = ftruncate(fd, size); 691 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 692 693 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 694 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 695 696 return fd; 697 } 698 699 /* 700 * Memory Compare, host virtual to guest virtual 701 * 702 * Input Args: 703 * hva - Starting host virtual address 704 * vm - Virtual Machine 705 * gva - Starting guest virtual address 706 * len - number of bytes to compare 707 * 708 * Output Args: None 709 * 710 * Input/Output Args: None 711 * 712 * Return: 713 * Returns 0 if the bytes starting at hva for a length of len 714 * are equal the guest virtual bytes starting at gva. Returns 715 * a value < 0, if bytes at hva are less than those at gva. 716 * Otherwise a value > 0 is returned. 717 * 718 * Compares the bytes starting at the host virtual address hva, for 719 * a length of len, to the guest bytes starting at the guest virtual 720 * address given by gva. 721 */ 722 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 723 { 724 size_t amt; 725 726 /* 727 * Compare a batch of bytes until either a match is found 728 * or all the bytes have been compared. 729 */ 730 for (uintptr_t offset = 0; offset < len; offset += amt) { 731 uintptr_t ptr1 = (uintptr_t)hva + offset; 732 733 /* 734 * Determine host address for guest virtual address 735 * at offset. 736 */ 737 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 738 739 /* 740 * Determine amount to compare on this pass. 741 * Don't allow the comparsion to cross a page boundary. 742 */ 743 amt = len - offset; 744 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 745 amt = vm->page_size - (ptr1 % vm->page_size); 746 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 747 amt = vm->page_size - (ptr2 % vm->page_size); 748 749 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 750 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 751 752 /* 753 * Perform the comparison. If there is a difference 754 * return that result to the caller, otherwise need 755 * to continue on looking for a mismatch. 756 */ 757 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 758 if (ret != 0) 759 return ret; 760 } 761 762 /* 763 * No mismatch found. Let the caller know the two memory 764 * areas are equal. 765 */ 766 return 0; 767 } 768 769 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 770 struct userspace_mem_region *region) 771 { 772 struct rb_node **cur, *parent; 773 774 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 775 struct userspace_mem_region *cregion; 776 777 cregion = container_of(*cur, typeof(*cregion), gpa_node); 778 parent = *cur; 779 if (region->region.guest_phys_addr < 780 cregion->region.guest_phys_addr) 781 cur = &(*cur)->rb_left; 782 else { 783 TEST_ASSERT(region->region.guest_phys_addr != 784 cregion->region.guest_phys_addr, 785 "Duplicate GPA in region tree"); 786 787 cur = &(*cur)->rb_right; 788 } 789 } 790 791 rb_link_node(®ion->gpa_node, parent, cur); 792 rb_insert_color(®ion->gpa_node, gpa_tree); 793 } 794 795 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 796 struct userspace_mem_region *region) 797 { 798 struct rb_node **cur, *parent; 799 800 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 801 struct userspace_mem_region *cregion; 802 803 cregion = container_of(*cur, typeof(*cregion), hva_node); 804 parent = *cur; 805 if (region->host_mem < cregion->host_mem) 806 cur = &(*cur)->rb_left; 807 else { 808 TEST_ASSERT(region->host_mem != 809 cregion->host_mem, 810 "Duplicate HVA in region tree"); 811 812 cur = &(*cur)->rb_right; 813 } 814 } 815 816 rb_link_node(®ion->hva_node, parent, cur); 817 rb_insert_color(®ion->hva_node, hva_tree); 818 } 819 820 821 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 822 uint64_t gpa, uint64_t size, void *hva) 823 { 824 struct kvm_userspace_memory_region region = { 825 .slot = slot, 826 .flags = flags, 827 .guest_phys_addr = gpa, 828 .memory_size = size, 829 .userspace_addr = (uintptr_t)hva, 830 }; 831 832 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 833 } 834 835 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 836 uint64_t gpa, uint64_t size, void *hva) 837 { 838 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 839 840 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 841 errno, strerror(errno)); 842 } 843 844 /* 845 * VM Userspace Memory Region Add 846 * 847 * Input Args: 848 * vm - Virtual Machine 849 * src_type - Storage source for this region. 850 * NULL to use anonymous memory. 851 * guest_paddr - Starting guest physical address 852 * slot - KVM region slot 853 * npages - Number of physical pages 854 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 855 * 856 * Output Args: None 857 * 858 * Return: None 859 * 860 * Allocates a memory area of the number of pages specified by npages 861 * and maps it to the VM specified by vm, at a starting physical address 862 * given by guest_paddr. The region is created with a KVM region slot 863 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 864 * region is created with the flags given by flags. 865 */ 866 void vm_userspace_mem_region_add(struct kvm_vm *vm, 867 enum vm_mem_backing_src_type src_type, 868 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 869 uint32_t flags) 870 { 871 int ret; 872 struct userspace_mem_region *region; 873 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 874 size_t alignment; 875 876 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 877 "Number of guest pages is not compatible with the host. " 878 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 879 880 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 881 "address not on a page boundary.\n" 882 " guest_paddr: 0x%lx vm->page_size: 0x%x", 883 guest_paddr, vm->page_size); 884 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 885 <= vm->max_gfn, "Physical range beyond maximum " 886 "supported physical address,\n" 887 " guest_paddr: 0x%lx npages: 0x%lx\n" 888 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 889 guest_paddr, npages, vm->max_gfn, vm->page_size); 890 891 /* 892 * Confirm a mem region with an overlapping address doesn't 893 * already exist. 894 */ 895 region = (struct userspace_mem_region *) userspace_mem_region_find( 896 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 897 if (region != NULL) 898 TEST_FAIL("overlapping userspace_mem_region already " 899 "exists\n" 900 " requested guest_paddr: 0x%lx npages: 0x%lx " 901 "page_size: 0x%x\n" 902 " existing guest_paddr: 0x%lx size: 0x%lx", 903 guest_paddr, npages, vm->page_size, 904 (uint64_t) region->region.guest_phys_addr, 905 (uint64_t) region->region.memory_size); 906 907 /* Confirm no region with the requested slot already exists. */ 908 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 909 slot) { 910 if (region->region.slot != slot) 911 continue; 912 913 TEST_FAIL("A mem region with the requested slot " 914 "already exists.\n" 915 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 916 " existing slot: %u paddr: 0x%lx size: 0x%lx", 917 slot, guest_paddr, npages, 918 region->region.slot, 919 (uint64_t) region->region.guest_phys_addr, 920 (uint64_t) region->region.memory_size); 921 } 922 923 /* Allocate and initialize new mem region structure. */ 924 region = calloc(1, sizeof(*region)); 925 TEST_ASSERT(region != NULL, "Insufficient Memory"); 926 region->mmap_size = npages * vm->page_size; 927 928 #ifdef __s390x__ 929 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 930 alignment = 0x100000; 931 #else 932 alignment = 1; 933 #endif 934 935 /* 936 * When using THP mmap is not guaranteed to returned a hugepage aligned 937 * address so we have to pad the mmap. Padding is not needed for HugeTLB 938 * because mmap will always return an address aligned to the HugeTLB 939 * page size. 940 */ 941 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 942 alignment = max(backing_src_pagesz, alignment); 943 944 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 945 946 /* Add enough memory to align up if necessary */ 947 if (alignment > 1) 948 region->mmap_size += alignment; 949 950 region->fd = -1; 951 if (backing_src_is_shared(src_type)) 952 region->fd = kvm_memfd_alloc(region->mmap_size, 953 src_type == VM_MEM_SRC_SHARED_HUGETLB); 954 955 region->mmap_start = mmap(NULL, region->mmap_size, 956 PROT_READ | PROT_WRITE, 957 vm_mem_backing_src_alias(src_type)->flag, 958 region->fd, 0); 959 TEST_ASSERT(region->mmap_start != MAP_FAILED, 960 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 961 962 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 963 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 964 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 965 region->mmap_start, backing_src_pagesz); 966 967 /* Align host address */ 968 region->host_mem = align_ptr_up(region->mmap_start, alignment); 969 970 /* As needed perform madvise */ 971 if ((src_type == VM_MEM_SRC_ANONYMOUS || 972 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 973 ret = madvise(region->host_mem, npages * vm->page_size, 974 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 975 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 976 region->host_mem, npages * vm->page_size, 977 vm_mem_backing_src_alias(src_type)->name); 978 } 979 980 region->unused_phy_pages = sparsebit_alloc(); 981 sparsebit_set_num(region->unused_phy_pages, 982 guest_paddr >> vm->page_shift, npages); 983 region->region.slot = slot; 984 region->region.flags = flags; 985 region->region.guest_phys_addr = guest_paddr; 986 region->region.memory_size = npages * vm->page_size; 987 region->region.userspace_addr = (uintptr_t) region->host_mem; 988 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 989 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 990 " rc: %i errno: %i\n" 991 " slot: %u flags: 0x%x\n" 992 " guest_phys_addr: 0x%lx size: 0x%lx", 993 ret, errno, slot, flags, 994 guest_paddr, (uint64_t) region->region.memory_size); 995 996 /* Add to quick lookup data structures */ 997 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 998 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 999 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1000 1001 /* If shared memory, create an alias. */ 1002 if (region->fd >= 0) { 1003 region->mmap_alias = mmap(NULL, region->mmap_size, 1004 PROT_READ | PROT_WRITE, 1005 vm_mem_backing_src_alias(src_type)->flag, 1006 region->fd, 0); 1007 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1008 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1009 1010 /* Align host alias address */ 1011 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1012 } 1013 } 1014 1015 /* 1016 * Memslot to region 1017 * 1018 * Input Args: 1019 * vm - Virtual Machine 1020 * memslot - KVM memory slot ID 1021 * 1022 * Output Args: None 1023 * 1024 * Return: 1025 * Pointer to memory region structure that describe memory region 1026 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1027 * on error (e.g. currently no memory region using memslot as a KVM 1028 * memory slot ID). 1029 */ 1030 struct userspace_mem_region * 1031 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1032 { 1033 struct userspace_mem_region *region; 1034 1035 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1036 memslot) 1037 if (region->region.slot == memslot) 1038 return region; 1039 1040 fprintf(stderr, "No mem region with the requested slot found,\n" 1041 " requested slot: %u\n", memslot); 1042 fputs("---- vm dump ----\n", stderr); 1043 vm_dump(stderr, vm, 2); 1044 TEST_FAIL("Mem region not found"); 1045 return NULL; 1046 } 1047 1048 /* 1049 * VM Memory Region Flags Set 1050 * 1051 * Input Args: 1052 * vm - Virtual Machine 1053 * flags - Starting guest physical address 1054 * 1055 * Output Args: None 1056 * 1057 * Return: None 1058 * 1059 * Sets the flags of the memory region specified by the value of slot, 1060 * to the values given by flags. 1061 */ 1062 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1063 { 1064 int ret; 1065 struct userspace_mem_region *region; 1066 1067 region = memslot2region(vm, slot); 1068 1069 region->region.flags = flags; 1070 1071 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1072 1073 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1074 " rc: %i errno: %i slot: %u flags: 0x%x", 1075 ret, errno, slot, flags); 1076 } 1077 1078 /* 1079 * VM Memory Region Move 1080 * 1081 * Input Args: 1082 * vm - Virtual Machine 1083 * slot - Slot of the memory region to move 1084 * new_gpa - Starting guest physical address 1085 * 1086 * Output Args: None 1087 * 1088 * Return: None 1089 * 1090 * Change the gpa of a memory region. 1091 */ 1092 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1093 { 1094 struct userspace_mem_region *region; 1095 int ret; 1096 1097 region = memslot2region(vm, slot); 1098 1099 region->region.guest_phys_addr = new_gpa; 1100 1101 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1102 1103 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1104 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1105 ret, errno, slot, new_gpa); 1106 } 1107 1108 /* 1109 * VM Memory Region Delete 1110 * 1111 * Input Args: 1112 * vm - Virtual Machine 1113 * slot - Slot of the memory region to delete 1114 * 1115 * Output Args: None 1116 * 1117 * Return: None 1118 * 1119 * Delete a memory region. 1120 */ 1121 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1122 { 1123 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1124 } 1125 1126 /* Returns the size of a vCPU's kvm_run structure. */ 1127 static int vcpu_mmap_sz(void) 1128 { 1129 int dev_fd, ret; 1130 1131 dev_fd = open_kvm_dev_path_or_exit(); 1132 1133 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1134 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1135 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1136 1137 close(dev_fd); 1138 1139 return ret; 1140 } 1141 1142 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1143 { 1144 struct kvm_vcpu *vcpu; 1145 1146 list_for_each_entry(vcpu, &vm->vcpus, list) { 1147 if (vcpu->id == vcpu_id) 1148 return true; 1149 } 1150 1151 return false; 1152 } 1153 1154 /* 1155 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1156 * No additional vCPU setup is done. Returns the vCPU. 1157 */ 1158 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1159 { 1160 struct kvm_vcpu *vcpu; 1161 1162 /* Confirm a vcpu with the specified id doesn't already exist. */ 1163 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1164 1165 /* Allocate and initialize new vcpu structure. */ 1166 vcpu = calloc(1, sizeof(*vcpu)); 1167 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1168 1169 vcpu->vm = vm; 1170 vcpu->id = vcpu_id; 1171 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1172 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1173 1174 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1175 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1176 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1177 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1178 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1179 TEST_ASSERT(vcpu->run != MAP_FAILED, 1180 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1181 1182 /* Add to linked-list of VCPUs. */ 1183 list_add(&vcpu->list, &vm->vcpus); 1184 1185 return vcpu; 1186 } 1187 1188 /* 1189 * VM Virtual Address Unused Gap 1190 * 1191 * Input Args: 1192 * vm - Virtual Machine 1193 * sz - Size (bytes) 1194 * vaddr_min - Minimum Virtual Address 1195 * 1196 * Output Args: None 1197 * 1198 * Return: 1199 * Lowest virtual address at or below vaddr_min, with at least 1200 * sz unused bytes. TEST_ASSERT failure if no area of at least 1201 * size sz is available. 1202 * 1203 * Within the VM specified by vm, locates the lowest starting virtual 1204 * address >= vaddr_min, that has at least sz unallocated bytes. A 1205 * TEST_ASSERT failure occurs for invalid input or no area of at least 1206 * sz unallocated bytes >= vaddr_min is available. 1207 */ 1208 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1209 vm_vaddr_t vaddr_min) 1210 { 1211 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1212 1213 /* Determine lowest permitted virtual page index. */ 1214 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1215 if ((pgidx_start * vm->page_size) < vaddr_min) 1216 goto no_va_found; 1217 1218 /* Loop over section with enough valid virtual page indexes. */ 1219 if (!sparsebit_is_set_num(vm->vpages_valid, 1220 pgidx_start, pages)) 1221 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1222 pgidx_start, pages); 1223 do { 1224 /* 1225 * Are there enough unused virtual pages available at 1226 * the currently proposed starting virtual page index. 1227 * If not, adjust proposed starting index to next 1228 * possible. 1229 */ 1230 if (sparsebit_is_clear_num(vm->vpages_mapped, 1231 pgidx_start, pages)) 1232 goto va_found; 1233 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1234 pgidx_start, pages); 1235 if (pgidx_start == 0) 1236 goto no_va_found; 1237 1238 /* 1239 * If needed, adjust proposed starting virtual address, 1240 * to next range of valid virtual addresses. 1241 */ 1242 if (!sparsebit_is_set_num(vm->vpages_valid, 1243 pgidx_start, pages)) { 1244 pgidx_start = sparsebit_next_set_num( 1245 vm->vpages_valid, pgidx_start, pages); 1246 if (pgidx_start == 0) 1247 goto no_va_found; 1248 } 1249 } while (pgidx_start != 0); 1250 1251 no_va_found: 1252 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1253 1254 /* NOT REACHED */ 1255 return -1; 1256 1257 va_found: 1258 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1259 pgidx_start, pages), 1260 "Unexpected, invalid virtual page index range,\n" 1261 " pgidx_start: 0x%lx\n" 1262 " pages: 0x%lx", 1263 pgidx_start, pages); 1264 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1265 pgidx_start, pages), 1266 "Unexpected, pages already mapped,\n" 1267 " pgidx_start: 0x%lx\n" 1268 " pages: 0x%lx", 1269 pgidx_start, pages); 1270 1271 return pgidx_start * vm->page_size; 1272 } 1273 1274 /* 1275 * VM Virtual Address Allocate 1276 * 1277 * Input Args: 1278 * vm - Virtual Machine 1279 * sz - Size in bytes 1280 * vaddr_min - Minimum starting virtual address 1281 * 1282 * Output Args: None 1283 * 1284 * Return: 1285 * Starting guest virtual address 1286 * 1287 * Allocates at least sz bytes within the virtual address space of the vm 1288 * given by vm. The allocated bytes are mapped to a virtual address >= 1289 * the address given by vaddr_min. Note that each allocation uses a 1290 * a unique set of pages, with the minimum real allocation being at least 1291 * a page. 1292 */ 1293 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1294 { 1295 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1296 1297 virt_pgd_alloc(vm); 1298 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1299 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1300 1301 /* 1302 * Find an unused range of virtual page addresses of at least 1303 * pages in length. 1304 */ 1305 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1306 1307 /* Map the virtual pages. */ 1308 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1309 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1310 1311 virt_pg_map(vm, vaddr, paddr); 1312 1313 sparsebit_set(vm->vpages_mapped, 1314 vaddr >> vm->page_shift); 1315 } 1316 1317 return vaddr_start; 1318 } 1319 1320 /* 1321 * VM Virtual Address Allocate Pages 1322 * 1323 * Input Args: 1324 * vm - Virtual Machine 1325 * 1326 * Output Args: None 1327 * 1328 * Return: 1329 * Starting guest virtual address 1330 * 1331 * Allocates at least N system pages worth of bytes within the virtual address 1332 * space of the vm. 1333 */ 1334 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1335 { 1336 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1337 } 1338 1339 /* 1340 * VM Virtual Address Allocate Page 1341 * 1342 * Input Args: 1343 * vm - Virtual Machine 1344 * 1345 * Output Args: None 1346 * 1347 * Return: 1348 * Starting guest virtual address 1349 * 1350 * Allocates at least one system page worth of bytes within the virtual address 1351 * space of the vm. 1352 */ 1353 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1354 { 1355 return vm_vaddr_alloc_pages(vm, 1); 1356 } 1357 1358 /* 1359 * Map a range of VM virtual address to the VM's physical address 1360 * 1361 * Input Args: 1362 * vm - Virtual Machine 1363 * vaddr - Virtuall address to map 1364 * paddr - VM Physical Address 1365 * npages - The number of pages to map 1366 * 1367 * Output Args: None 1368 * 1369 * Return: None 1370 * 1371 * Within the VM given by @vm, creates a virtual translation for 1372 * @npages starting at @vaddr to the page range starting at @paddr. 1373 */ 1374 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1375 unsigned int npages) 1376 { 1377 size_t page_size = vm->page_size; 1378 size_t size = npages * page_size; 1379 1380 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1381 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1382 1383 while (npages--) { 1384 virt_pg_map(vm, vaddr, paddr); 1385 vaddr += page_size; 1386 paddr += page_size; 1387 } 1388 } 1389 1390 /* 1391 * Address VM Physical to Host Virtual 1392 * 1393 * Input Args: 1394 * vm - Virtual Machine 1395 * gpa - VM physical address 1396 * 1397 * Output Args: None 1398 * 1399 * Return: 1400 * Equivalent host virtual address 1401 * 1402 * Locates the memory region containing the VM physical address given 1403 * by gpa, within the VM given by vm. When found, the host virtual 1404 * address providing the memory to the vm physical address is returned. 1405 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1406 */ 1407 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1408 { 1409 struct userspace_mem_region *region; 1410 1411 region = userspace_mem_region_find(vm, gpa, gpa); 1412 if (!region) { 1413 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1414 return NULL; 1415 } 1416 1417 return (void *)((uintptr_t)region->host_mem 1418 + (gpa - region->region.guest_phys_addr)); 1419 } 1420 1421 /* 1422 * Address Host Virtual to VM Physical 1423 * 1424 * Input Args: 1425 * vm - Virtual Machine 1426 * hva - Host virtual address 1427 * 1428 * Output Args: None 1429 * 1430 * Return: 1431 * Equivalent VM physical address 1432 * 1433 * Locates the memory region containing the host virtual address given 1434 * by hva, within the VM given by vm. When found, the equivalent 1435 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1436 * region containing hva exists. 1437 */ 1438 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1439 { 1440 struct rb_node *node; 1441 1442 for (node = vm->regions.hva_tree.rb_node; node; ) { 1443 struct userspace_mem_region *region = 1444 container_of(node, struct userspace_mem_region, hva_node); 1445 1446 if (hva >= region->host_mem) { 1447 if (hva <= (region->host_mem 1448 + region->region.memory_size - 1)) 1449 return (vm_paddr_t)((uintptr_t) 1450 region->region.guest_phys_addr 1451 + (hva - (uintptr_t)region->host_mem)); 1452 1453 node = node->rb_right; 1454 } else 1455 node = node->rb_left; 1456 } 1457 1458 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1459 return -1; 1460 } 1461 1462 /* 1463 * Address VM physical to Host Virtual *alias*. 1464 * 1465 * Input Args: 1466 * vm - Virtual Machine 1467 * gpa - VM physical address 1468 * 1469 * Output Args: None 1470 * 1471 * Return: 1472 * Equivalent address within the host virtual *alias* area, or NULL 1473 * (without failing the test) if the guest memory is not shared (so 1474 * no alias exists). 1475 * 1476 * Create a writable, shared virtual=>physical alias for the specific GPA. 1477 * The primary use case is to allow the host selftest to manipulate guest 1478 * memory without mapping said memory in the guest's address space. And, for 1479 * userfaultfd-based demand paging, to do so without triggering userfaults. 1480 */ 1481 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1482 { 1483 struct userspace_mem_region *region; 1484 uintptr_t offset; 1485 1486 region = userspace_mem_region_find(vm, gpa, gpa); 1487 if (!region) 1488 return NULL; 1489 1490 if (!region->host_alias) 1491 return NULL; 1492 1493 offset = gpa - region->region.guest_phys_addr; 1494 return (void *) ((uintptr_t) region->host_alias + offset); 1495 } 1496 1497 /* Create an interrupt controller chip for the specified VM. */ 1498 void vm_create_irqchip(struct kvm_vm *vm) 1499 { 1500 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1501 1502 vm->has_irqchip = true; 1503 } 1504 1505 int _vcpu_run(struct kvm_vcpu *vcpu) 1506 { 1507 int rc; 1508 1509 do { 1510 rc = __vcpu_run(vcpu); 1511 } while (rc == -1 && errno == EINTR); 1512 1513 assert_on_unhandled_exception(vcpu); 1514 1515 return rc; 1516 } 1517 1518 /* 1519 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1520 * Assert if the KVM returns an error (other than -EINTR). 1521 */ 1522 void vcpu_run(struct kvm_vcpu *vcpu) 1523 { 1524 int ret = _vcpu_run(vcpu); 1525 1526 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1527 } 1528 1529 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1530 { 1531 int ret; 1532 1533 vcpu->run->immediate_exit = 1; 1534 ret = __vcpu_run(vcpu); 1535 vcpu->run->immediate_exit = 0; 1536 1537 TEST_ASSERT(ret == -1 && errno == EINTR, 1538 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1539 ret, errno); 1540 } 1541 1542 /* 1543 * Get the list of guest registers which are supported for 1544 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1545 * it is the caller's responsibility to free the list. 1546 */ 1547 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1548 { 1549 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1550 int ret; 1551 1552 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1553 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1554 1555 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1556 reg_list->n = reg_list_n.n; 1557 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1558 return reg_list; 1559 } 1560 1561 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1562 { 1563 uint32_t page_size = vcpu->vm->page_size; 1564 uint32_t size = vcpu->vm->dirty_ring_size; 1565 1566 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1567 1568 if (!vcpu->dirty_gfns) { 1569 void *addr; 1570 1571 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1572 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1573 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1574 1575 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1576 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1577 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1578 1579 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1580 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1581 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1582 1583 vcpu->dirty_gfns = addr; 1584 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1585 } 1586 1587 return vcpu->dirty_gfns; 1588 } 1589 1590 /* 1591 * Device Ioctl 1592 */ 1593 1594 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1595 { 1596 struct kvm_device_attr attribute = { 1597 .group = group, 1598 .attr = attr, 1599 .flags = 0, 1600 }; 1601 1602 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1603 } 1604 1605 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1606 { 1607 struct kvm_create_device create_dev = { 1608 .type = type, 1609 .flags = KVM_CREATE_DEVICE_TEST, 1610 }; 1611 1612 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1613 } 1614 1615 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1616 { 1617 struct kvm_create_device create_dev = { 1618 .type = type, 1619 .fd = -1, 1620 .flags = 0, 1621 }; 1622 int err; 1623 1624 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1625 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1626 return err ? : create_dev.fd; 1627 } 1628 1629 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1630 { 1631 struct kvm_device_attr kvmattr = { 1632 .group = group, 1633 .attr = attr, 1634 .flags = 0, 1635 .addr = (uintptr_t)val, 1636 }; 1637 1638 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1639 } 1640 1641 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1642 { 1643 struct kvm_device_attr kvmattr = { 1644 .group = group, 1645 .attr = attr, 1646 .flags = 0, 1647 .addr = (uintptr_t)val, 1648 }; 1649 1650 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1651 } 1652 1653 /* 1654 * IRQ related functions. 1655 */ 1656 1657 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1658 { 1659 struct kvm_irq_level irq_level = { 1660 .irq = irq, 1661 .level = level, 1662 }; 1663 1664 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1665 } 1666 1667 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1668 { 1669 int ret = _kvm_irq_line(vm, irq, level); 1670 1671 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1672 } 1673 1674 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1675 { 1676 struct kvm_irq_routing *routing; 1677 size_t size; 1678 1679 size = sizeof(struct kvm_irq_routing); 1680 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1681 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1682 routing = calloc(1, size); 1683 assert(routing); 1684 1685 return routing; 1686 } 1687 1688 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1689 uint32_t gsi, uint32_t pin) 1690 { 1691 int i; 1692 1693 assert(routing); 1694 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1695 1696 i = routing->nr; 1697 routing->entries[i].gsi = gsi; 1698 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1699 routing->entries[i].flags = 0; 1700 routing->entries[i].u.irqchip.irqchip = 0; 1701 routing->entries[i].u.irqchip.pin = pin; 1702 routing->nr++; 1703 } 1704 1705 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1706 { 1707 int ret; 1708 1709 assert(routing); 1710 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1711 free(routing); 1712 1713 return ret; 1714 } 1715 1716 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1717 { 1718 int ret; 1719 1720 ret = _kvm_gsi_routing_write(vm, routing); 1721 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1722 } 1723 1724 /* 1725 * VM Dump 1726 * 1727 * Input Args: 1728 * vm - Virtual Machine 1729 * indent - Left margin indent amount 1730 * 1731 * Output Args: 1732 * stream - Output FILE stream 1733 * 1734 * Return: None 1735 * 1736 * Dumps the current state of the VM given by vm, to the FILE stream 1737 * given by stream. 1738 */ 1739 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1740 { 1741 int ctr; 1742 struct userspace_mem_region *region; 1743 struct kvm_vcpu *vcpu; 1744 1745 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1746 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1747 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1748 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1749 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1750 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1751 "host_virt: %p\n", indent + 2, "", 1752 (uint64_t) region->region.guest_phys_addr, 1753 (uint64_t) region->region.memory_size, 1754 region->host_mem); 1755 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1756 sparsebit_dump(stream, region->unused_phy_pages, 0); 1757 } 1758 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1759 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1760 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1761 vm->pgd_created); 1762 if (vm->pgd_created) { 1763 fprintf(stream, "%*sVirtual Translation Tables:\n", 1764 indent + 2, ""); 1765 virt_dump(stream, vm, indent + 4); 1766 } 1767 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1768 1769 list_for_each_entry(vcpu, &vm->vcpus, list) 1770 vcpu_dump(stream, vcpu, indent + 2); 1771 } 1772 1773 /* Known KVM exit reasons */ 1774 static struct exit_reason { 1775 unsigned int reason; 1776 const char *name; 1777 } exit_reasons_known[] = { 1778 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1779 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1780 {KVM_EXIT_IO, "IO"}, 1781 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1782 {KVM_EXIT_DEBUG, "DEBUG"}, 1783 {KVM_EXIT_HLT, "HLT"}, 1784 {KVM_EXIT_MMIO, "MMIO"}, 1785 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1786 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1787 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1788 {KVM_EXIT_INTR, "INTR"}, 1789 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1790 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1791 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1792 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1793 {KVM_EXIT_DCR, "DCR"}, 1794 {KVM_EXIT_NMI, "NMI"}, 1795 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1796 {KVM_EXIT_OSI, "OSI"}, 1797 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1798 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1799 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1800 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1801 {KVM_EXIT_XEN, "XEN"}, 1802 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1803 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1804 #endif 1805 }; 1806 1807 /* 1808 * Exit Reason String 1809 * 1810 * Input Args: 1811 * exit_reason - Exit reason 1812 * 1813 * Output Args: None 1814 * 1815 * Return: 1816 * Constant string pointer describing the exit reason. 1817 * 1818 * Locates and returns a constant string that describes the KVM exit 1819 * reason given by exit_reason. If no such string is found, a constant 1820 * string of "Unknown" is returned. 1821 */ 1822 const char *exit_reason_str(unsigned int exit_reason) 1823 { 1824 unsigned int n1; 1825 1826 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1827 if (exit_reason == exit_reasons_known[n1].reason) 1828 return exit_reasons_known[n1].name; 1829 } 1830 1831 return "Unknown"; 1832 } 1833 1834 /* 1835 * Physical Contiguous Page Allocator 1836 * 1837 * Input Args: 1838 * vm - Virtual Machine 1839 * num - number of pages 1840 * paddr_min - Physical address minimum 1841 * memslot - Memory region to allocate page from 1842 * 1843 * Output Args: None 1844 * 1845 * Return: 1846 * Starting physical address 1847 * 1848 * Within the VM specified by vm, locates a range of available physical 1849 * pages at or above paddr_min. If found, the pages are marked as in use 1850 * and their base address is returned. A TEST_ASSERT failure occurs if 1851 * not enough pages are available at or above paddr_min. 1852 */ 1853 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1854 vm_paddr_t paddr_min, uint32_t memslot) 1855 { 1856 struct userspace_mem_region *region; 1857 sparsebit_idx_t pg, base; 1858 1859 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1860 1861 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1862 "not divisible by page size.\n" 1863 " paddr_min: 0x%lx page_size: 0x%x", 1864 paddr_min, vm->page_size); 1865 1866 region = memslot2region(vm, memslot); 1867 base = pg = paddr_min >> vm->page_shift; 1868 1869 do { 1870 for (; pg < base + num; ++pg) { 1871 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1872 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1873 break; 1874 } 1875 } 1876 } while (pg && pg != base + num); 1877 1878 if (pg == 0) { 1879 fprintf(stderr, "No guest physical page available, " 1880 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1881 paddr_min, vm->page_size, memslot); 1882 fputs("---- vm dump ----\n", stderr); 1883 vm_dump(stderr, vm, 2); 1884 abort(); 1885 } 1886 1887 for (pg = base; pg < base + num; ++pg) 1888 sparsebit_clear(region->unused_phy_pages, pg); 1889 1890 return base * vm->page_size; 1891 } 1892 1893 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1894 uint32_t memslot) 1895 { 1896 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1897 } 1898 1899 /* Arbitrary minimum physical address used for virtual translation tables. */ 1900 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1901 1902 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1903 { 1904 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1905 } 1906 1907 /* 1908 * Address Guest Virtual to Host Virtual 1909 * 1910 * Input Args: 1911 * vm - Virtual Machine 1912 * gva - VM virtual address 1913 * 1914 * Output Args: None 1915 * 1916 * Return: 1917 * Equivalent host virtual address 1918 */ 1919 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1920 { 1921 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1922 } 1923 1924 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1925 { 1926 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1927 } 1928 1929 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1930 unsigned int page_shift, 1931 unsigned int new_page_shift, 1932 bool ceil) 1933 { 1934 unsigned int n = 1 << (new_page_shift - page_shift); 1935 1936 if (page_shift >= new_page_shift) 1937 return num_pages * (1 << (page_shift - new_page_shift)); 1938 1939 return num_pages / n + !!(ceil && num_pages % n); 1940 } 1941 1942 static inline int getpageshift(void) 1943 { 1944 return __builtin_ffs(getpagesize()) - 1; 1945 } 1946 1947 unsigned int 1948 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1949 { 1950 return vm_calc_num_pages(num_guest_pages, 1951 vm_guest_mode_params[mode].page_shift, 1952 getpageshift(), true); 1953 } 1954 1955 unsigned int 1956 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1957 { 1958 return vm_calc_num_pages(num_host_pages, getpageshift(), 1959 vm_guest_mode_params[mode].page_shift, false); 1960 } 1961 1962 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1963 { 1964 unsigned int n; 1965 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1966 return vm_adjust_num_guest_pages(mode, n); 1967 } 1968 1969 /* 1970 * Read binary stats descriptors 1971 * 1972 * Input Args: 1973 * stats_fd - the file descriptor for the binary stats file from which to read 1974 * header - the binary stats metadata header corresponding to the given FD 1975 * 1976 * Output Args: None 1977 * 1978 * Return: 1979 * A pointer to a newly allocated series of stat descriptors. 1980 * Caller is responsible for freeing the returned kvm_stats_desc. 1981 * 1982 * Read the stats descriptors from the binary stats interface. 1983 */ 1984 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1985 struct kvm_stats_header *header) 1986 { 1987 struct kvm_stats_desc *stats_desc; 1988 ssize_t desc_size, total_size, ret; 1989 1990 desc_size = get_stats_descriptor_size(header); 1991 total_size = header->num_desc * desc_size; 1992 1993 stats_desc = calloc(header->num_desc, desc_size); 1994 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 1995 1996 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 1997 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 1998 1999 return stats_desc; 2000 } 2001 2002 /* 2003 * Read stat data for a particular stat 2004 * 2005 * Input Args: 2006 * stats_fd - the file descriptor for the binary stats file from which to read 2007 * header - the binary stats metadata header corresponding to the given FD 2008 * desc - the binary stat metadata for the particular stat to be read 2009 * max_elements - the maximum number of 8-byte values to read into data 2010 * 2011 * Output Args: 2012 * data - the buffer into which stat data should be read 2013 * 2014 * Read the data values of a specified stat from the binary stats interface. 2015 */ 2016 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2017 struct kvm_stats_desc *desc, uint64_t *data, 2018 size_t max_elements) 2019 { 2020 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2021 size_t size = nr_elements * sizeof(*data); 2022 ssize_t ret; 2023 2024 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2025 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2026 2027 ret = pread(stats_fd, data, size, 2028 header->data_offset + desc->offset); 2029 2030 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2031 desc->name, errno, strerror(errno)); 2032 TEST_ASSERT(ret == size, 2033 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2034 desc->name, size, ret); 2035 } 2036 2037 /* 2038 * Read the data of the named stat 2039 * 2040 * Input Args: 2041 * vm - the VM for which the stat should be read 2042 * stat_name - the name of the stat to read 2043 * max_elements - the maximum number of 8-byte values to read into data 2044 * 2045 * Output Args: 2046 * data - the buffer into which stat data should be read 2047 * 2048 * Read the data values of a specified stat from the binary stats interface. 2049 */ 2050 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2051 size_t max_elements) 2052 { 2053 struct kvm_stats_desc *desc; 2054 size_t size_desc; 2055 int i; 2056 2057 if (!vm->stats_fd) { 2058 vm->stats_fd = vm_get_stats_fd(vm); 2059 read_stats_header(vm->stats_fd, &vm->stats_header); 2060 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2061 &vm->stats_header); 2062 } 2063 2064 size_desc = get_stats_descriptor_size(&vm->stats_header); 2065 2066 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2067 desc = (void *)vm->stats_desc + (i * size_desc); 2068 2069 if (strcmp(desc->name, stat_name)) 2070 continue; 2071 2072 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2073 data, max_elements); 2074 2075 break; 2076 } 2077 } 2078