1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_intel_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm_intel", param); 86 } 87 88 bool get_kvm_amd_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_amd", param); 91 } 92 93 /* 94 * Capability 95 * 96 * Input Args: 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: 102 * On success, the Value corresponding to the capability (KVM_CAP_*) 103 * specified by the value of cap. On failure a TEST_ASSERT failure 104 * is produced. 105 * 106 * Looks up and returns the value corresponding to the capability 107 * (KVM_CAP_*) given by cap. 108 */ 109 unsigned int kvm_check_cap(long cap) 110 { 111 int ret; 112 int kvm_fd; 113 114 kvm_fd = open_kvm_dev_path_or_exit(); 115 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 116 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 117 118 close(kvm_fd); 119 120 return (unsigned int)ret; 121 } 122 123 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 124 { 125 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 126 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 127 else 128 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 129 vm->dirty_ring_size = ring_size; 130 } 131 132 static void vm_open(struct kvm_vm *vm) 133 { 134 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 135 136 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 137 138 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 139 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 140 } 141 142 const char *vm_guest_mode_string(uint32_t i) 143 { 144 static const char * const strings[] = { 145 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 146 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 147 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 148 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 149 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 150 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 151 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 152 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 153 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 154 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 155 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 156 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 157 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 158 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 159 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 160 }; 161 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 162 "Missing new mode strings?"); 163 164 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 165 166 return strings[i]; 167 } 168 169 const struct vm_guest_mode_params vm_guest_mode_params[] = { 170 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 171 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 172 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 173 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 174 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 175 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 176 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 177 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 178 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 179 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 180 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 181 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 182 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 183 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 184 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 185 }; 186 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 187 "Missing new mode params?"); 188 189 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) 190 { 191 struct kvm_vm *vm; 192 193 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 194 vm_guest_mode_string(mode), nr_pages); 195 196 vm = calloc(1, sizeof(*vm)); 197 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 198 199 INIT_LIST_HEAD(&vm->vcpus); 200 vm->regions.gpa_tree = RB_ROOT; 201 vm->regions.hva_tree = RB_ROOT; 202 hash_init(vm->regions.slot_hash); 203 204 vm->mode = mode; 205 vm->type = 0; 206 207 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 208 vm->va_bits = vm_guest_mode_params[mode].va_bits; 209 vm->page_size = vm_guest_mode_params[mode].page_size; 210 vm->page_shift = vm_guest_mode_params[mode].page_shift; 211 212 /* Setup mode specific traits. */ 213 switch (vm->mode) { 214 case VM_MODE_P52V48_4K: 215 vm->pgtable_levels = 4; 216 break; 217 case VM_MODE_P52V48_64K: 218 vm->pgtable_levels = 3; 219 break; 220 case VM_MODE_P48V48_4K: 221 vm->pgtable_levels = 4; 222 break; 223 case VM_MODE_P48V48_64K: 224 vm->pgtable_levels = 3; 225 break; 226 case VM_MODE_P40V48_4K: 227 case VM_MODE_P36V48_4K: 228 vm->pgtable_levels = 4; 229 break; 230 case VM_MODE_P40V48_64K: 231 case VM_MODE_P36V48_64K: 232 vm->pgtable_levels = 3; 233 break; 234 case VM_MODE_P48V48_16K: 235 case VM_MODE_P40V48_16K: 236 case VM_MODE_P36V48_16K: 237 vm->pgtable_levels = 4; 238 break; 239 case VM_MODE_P36V47_16K: 240 vm->pgtable_levels = 3; 241 break; 242 case VM_MODE_PXXV48_4K: 243 #ifdef __x86_64__ 244 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 245 /* 246 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 247 * it doesn't take effect unless a CR4.LA57 is set, which it 248 * isn't for this VM_MODE. 249 */ 250 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 251 "Linear address width (%d bits) not supported", 252 vm->va_bits); 253 pr_debug("Guest physical address width detected: %d\n", 254 vm->pa_bits); 255 vm->pgtable_levels = 4; 256 vm->va_bits = 48; 257 #else 258 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 259 #endif 260 break; 261 case VM_MODE_P47V64_4K: 262 vm->pgtable_levels = 5; 263 break; 264 case VM_MODE_P44V64_4K: 265 vm->pgtable_levels = 5; 266 break; 267 default: 268 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 269 } 270 271 #ifdef __aarch64__ 272 if (vm->pa_bits != 40) 273 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 274 #endif 275 276 vm_open(vm); 277 278 /* Limit to VA-bit canonical virtual addresses. */ 279 vm->vpages_valid = sparsebit_alloc(); 280 sparsebit_set_num(vm->vpages_valid, 281 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 282 sparsebit_set_num(vm->vpages_valid, 283 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 284 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 285 286 /* Limit physical addresses to PA-bits. */ 287 vm->max_gfn = vm_compute_max_gfn(vm); 288 289 /* Allocate and setup memory for guest. */ 290 vm->vpages_mapped = sparsebit_alloc(); 291 if (nr_pages != 0) 292 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 293 0, 0, nr_pages, 0); 294 295 return vm; 296 } 297 298 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 299 uint32_t nr_runnable_vcpus, 300 uint64_t extra_mem_pages) 301 { 302 uint64_t nr_pages; 303 304 TEST_ASSERT(nr_runnable_vcpus, 305 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 306 307 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 308 "nr_vcpus = %d too large for host, max-vcpus = %d", 309 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 310 311 /* 312 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 313 * test code and other per-VM assets that will be loaded into memslot0. 314 */ 315 nr_pages = 512; 316 317 /* Account for the per-vCPU stacks on behalf of the test. */ 318 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 319 320 /* 321 * Account for the number of pages needed for the page tables. The 322 * maximum page table size for a memory region will be when the 323 * smallest page size is used. Considering each page contains x page 324 * table descriptors, the total extra size for page tables (for extra 325 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 326 * than N/x*2. 327 */ 328 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 329 330 return vm_adjust_num_guest_pages(mode, nr_pages); 331 } 332 333 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 334 uint64_t nr_extra_pages) 335 { 336 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 337 nr_extra_pages); 338 struct userspace_mem_region *slot0; 339 struct kvm_vm *vm; 340 341 vm = ____vm_create(mode, nr_pages); 342 343 kvm_vm_elf_load(vm, program_invocation_name); 344 345 /* 346 * TODO: Add proper defines to protect the library's memslots, and then 347 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 348 * read-only memslots as MMIO, and creating a read-only memslot for the 349 * MMIO region would prevent silently clobbering the MMIO region. 350 */ 351 slot0 = memslot2region(vm, 0); 352 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 353 354 #ifdef __x86_64__ 355 vm_create_irqchip(vm); 356 #endif 357 358 return vm; 359 } 360 361 /* 362 * VM Create with customized parameters 363 * 364 * Input Args: 365 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 366 * nr_vcpus - VCPU count 367 * extra_mem_pages - Non-slot0 physical memory total size 368 * guest_code - Guest entry point 369 * vcpuids - VCPU IDs 370 * 371 * Output Args: None 372 * 373 * Return: 374 * Pointer to opaque structure that describes the created VM. 375 * 376 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 377 * extra_mem_pages is only used to calculate the maximum page table size, 378 * no real memory allocation for non-slot0 memory in this function. 379 */ 380 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 381 uint64_t extra_mem_pages, 382 void *guest_code, struct kvm_vcpu *vcpus[]) 383 { 384 struct kvm_vm *vm; 385 int i; 386 387 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 388 389 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 390 391 for (i = 0; i < nr_vcpus; ++i) 392 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 393 394 return vm; 395 } 396 397 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 398 uint64_t extra_mem_pages, 399 void *guest_code) 400 { 401 struct kvm_vcpu *vcpus[1]; 402 struct kvm_vm *vm; 403 404 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 405 guest_code, vcpus); 406 407 *vcpu = vcpus[0]; 408 return vm; 409 } 410 411 /* 412 * VM Restart 413 * 414 * Input Args: 415 * vm - VM that has been released before 416 * 417 * Output Args: None 418 * 419 * Reopens the file descriptors associated to the VM and reinstates the 420 * global state, such as the irqchip and the memory regions that are mapped 421 * into the guest. 422 */ 423 void kvm_vm_restart(struct kvm_vm *vmp) 424 { 425 int ctr; 426 struct userspace_mem_region *region; 427 428 vm_open(vmp); 429 if (vmp->has_irqchip) 430 vm_create_irqchip(vmp); 431 432 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 433 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 434 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 435 " rc: %i errno: %i\n" 436 " slot: %u flags: 0x%x\n" 437 " guest_phys_addr: 0x%llx size: 0x%llx", 438 ret, errno, region->region.slot, 439 region->region.flags, 440 region->region.guest_phys_addr, 441 region->region.memory_size); 442 } 443 } 444 445 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 446 uint32_t vcpu_id) 447 { 448 return __vm_vcpu_add(vm, vcpu_id); 449 } 450 451 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 452 { 453 kvm_vm_restart(vm); 454 455 return vm_vcpu_recreate(vm, 0); 456 } 457 458 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 459 { 460 cpu_set_t mask; 461 int r; 462 463 CPU_ZERO(&mask); 464 CPU_SET(pcpu, &mask); 465 r = sched_setaffinity(0, sizeof(mask), &mask); 466 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 467 } 468 469 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 470 { 471 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 472 473 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 474 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 475 return pcpu; 476 } 477 478 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 479 int nr_vcpus) 480 { 481 cpu_set_t allowed_mask; 482 char *cpu, *cpu_list; 483 char delim[2] = ","; 484 int i, r; 485 486 cpu_list = strdup(pcpus_string); 487 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 488 489 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 490 TEST_ASSERT(!r, "sched_getaffinity() failed"); 491 492 cpu = strtok(cpu_list, delim); 493 494 /* 1. Get all pcpus for vcpus. */ 495 for (i = 0; i < nr_vcpus; i++) { 496 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 497 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 498 cpu = strtok(NULL, delim); 499 } 500 501 /* 2. Check if the main worker needs to be pinned. */ 502 if (cpu) { 503 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 504 cpu = strtok(NULL, delim); 505 } 506 507 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 508 free(cpu_list); 509 } 510 511 /* 512 * Userspace Memory Region Find 513 * 514 * Input Args: 515 * vm - Virtual Machine 516 * start - Starting VM physical address 517 * end - Ending VM physical address, inclusive. 518 * 519 * Output Args: None 520 * 521 * Return: 522 * Pointer to overlapping region, NULL if no such region. 523 * 524 * Searches for a region with any physical memory that overlaps with 525 * any portion of the guest physical addresses from start to end 526 * inclusive. If multiple overlapping regions exist, a pointer to any 527 * of the regions is returned. Null is returned only when no overlapping 528 * region exists. 529 */ 530 static struct userspace_mem_region * 531 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 532 { 533 struct rb_node *node; 534 535 for (node = vm->regions.gpa_tree.rb_node; node; ) { 536 struct userspace_mem_region *region = 537 container_of(node, struct userspace_mem_region, gpa_node); 538 uint64_t existing_start = region->region.guest_phys_addr; 539 uint64_t existing_end = region->region.guest_phys_addr 540 + region->region.memory_size - 1; 541 if (start <= existing_end && end >= existing_start) 542 return region; 543 544 if (start < existing_start) 545 node = node->rb_left; 546 else 547 node = node->rb_right; 548 } 549 550 return NULL; 551 } 552 553 /* 554 * KVM Userspace Memory Region Find 555 * 556 * Input Args: 557 * vm - Virtual Machine 558 * start - Starting VM physical address 559 * end - Ending VM physical address, inclusive. 560 * 561 * Output Args: None 562 * 563 * Return: 564 * Pointer to overlapping region, NULL if no such region. 565 * 566 * Public interface to userspace_mem_region_find. Allows tests to look up 567 * the memslot datastructure for a given range of guest physical memory. 568 */ 569 struct kvm_userspace_memory_region * 570 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 571 uint64_t end) 572 { 573 struct userspace_mem_region *region; 574 575 region = userspace_mem_region_find(vm, start, end); 576 if (!region) 577 return NULL; 578 579 return ®ion->region; 580 } 581 582 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 583 { 584 585 } 586 587 /* 588 * VM VCPU Remove 589 * 590 * Input Args: 591 * vcpu - VCPU to remove 592 * 593 * Output Args: None 594 * 595 * Return: None, TEST_ASSERT failures for all error conditions 596 * 597 * Removes a vCPU from a VM and frees its resources. 598 */ 599 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 600 { 601 int ret; 602 603 if (vcpu->dirty_gfns) { 604 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 605 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 606 vcpu->dirty_gfns = NULL; 607 } 608 609 ret = munmap(vcpu->run, vcpu_mmap_sz()); 610 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 611 612 ret = close(vcpu->fd); 613 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 614 615 list_del(&vcpu->list); 616 617 vcpu_arch_free(vcpu); 618 free(vcpu); 619 } 620 621 void kvm_vm_release(struct kvm_vm *vmp) 622 { 623 struct kvm_vcpu *vcpu, *tmp; 624 int ret; 625 626 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 627 vm_vcpu_rm(vmp, vcpu); 628 629 ret = close(vmp->fd); 630 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 631 632 ret = close(vmp->kvm_fd); 633 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 634 } 635 636 static void __vm_mem_region_delete(struct kvm_vm *vm, 637 struct userspace_mem_region *region, 638 bool unlink) 639 { 640 int ret; 641 642 if (unlink) { 643 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 644 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 645 hash_del(®ion->slot_node); 646 } 647 648 region->region.memory_size = 0; 649 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 650 651 sparsebit_free(®ion->unused_phy_pages); 652 ret = munmap(region->mmap_start, region->mmap_size); 653 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 654 655 free(region); 656 } 657 658 /* 659 * Destroys and frees the VM pointed to by vmp. 660 */ 661 void kvm_vm_free(struct kvm_vm *vmp) 662 { 663 int ctr; 664 struct hlist_node *node; 665 struct userspace_mem_region *region; 666 667 if (vmp == NULL) 668 return; 669 670 /* Free cached stats metadata and close FD */ 671 if (vmp->stats_fd) { 672 free(vmp->stats_desc); 673 close(vmp->stats_fd); 674 } 675 676 /* Free userspace_mem_regions. */ 677 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 678 __vm_mem_region_delete(vmp, region, false); 679 680 /* Free sparsebit arrays. */ 681 sparsebit_free(&vmp->vpages_valid); 682 sparsebit_free(&vmp->vpages_mapped); 683 684 kvm_vm_release(vmp); 685 686 /* Free the structure describing the VM. */ 687 free(vmp); 688 } 689 690 int kvm_memfd_alloc(size_t size, bool hugepages) 691 { 692 int memfd_flags = MFD_CLOEXEC; 693 int fd, r; 694 695 if (hugepages) 696 memfd_flags |= MFD_HUGETLB; 697 698 fd = memfd_create("kvm_selftest", memfd_flags); 699 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 700 701 r = ftruncate(fd, size); 702 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 703 704 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 705 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 706 707 return fd; 708 } 709 710 /* 711 * Memory Compare, host virtual to guest virtual 712 * 713 * Input Args: 714 * hva - Starting host virtual address 715 * vm - Virtual Machine 716 * gva - Starting guest virtual address 717 * len - number of bytes to compare 718 * 719 * Output Args: None 720 * 721 * Input/Output Args: None 722 * 723 * Return: 724 * Returns 0 if the bytes starting at hva for a length of len 725 * are equal the guest virtual bytes starting at gva. Returns 726 * a value < 0, if bytes at hva are less than those at gva. 727 * Otherwise a value > 0 is returned. 728 * 729 * Compares the bytes starting at the host virtual address hva, for 730 * a length of len, to the guest bytes starting at the guest virtual 731 * address given by gva. 732 */ 733 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 734 { 735 size_t amt; 736 737 /* 738 * Compare a batch of bytes until either a match is found 739 * or all the bytes have been compared. 740 */ 741 for (uintptr_t offset = 0; offset < len; offset += amt) { 742 uintptr_t ptr1 = (uintptr_t)hva + offset; 743 744 /* 745 * Determine host address for guest virtual address 746 * at offset. 747 */ 748 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 749 750 /* 751 * Determine amount to compare on this pass. 752 * Don't allow the comparsion to cross a page boundary. 753 */ 754 amt = len - offset; 755 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 756 amt = vm->page_size - (ptr1 % vm->page_size); 757 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 758 amt = vm->page_size - (ptr2 % vm->page_size); 759 760 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 761 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 762 763 /* 764 * Perform the comparison. If there is a difference 765 * return that result to the caller, otherwise need 766 * to continue on looking for a mismatch. 767 */ 768 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 769 if (ret != 0) 770 return ret; 771 } 772 773 /* 774 * No mismatch found. Let the caller know the two memory 775 * areas are equal. 776 */ 777 return 0; 778 } 779 780 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 781 struct userspace_mem_region *region) 782 { 783 struct rb_node **cur, *parent; 784 785 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 786 struct userspace_mem_region *cregion; 787 788 cregion = container_of(*cur, typeof(*cregion), gpa_node); 789 parent = *cur; 790 if (region->region.guest_phys_addr < 791 cregion->region.guest_phys_addr) 792 cur = &(*cur)->rb_left; 793 else { 794 TEST_ASSERT(region->region.guest_phys_addr != 795 cregion->region.guest_phys_addr, 796 "Duplicate GPA in region tree"); 797 798 cur = &(*cur)->rb_right; 799 } 800 } 801 802 rb_link_node(®ion->gpa_node, parent, cur); 803 rb_insert_color(®ion->gpa_node, gpa_tree); 804 } 805 806 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 807 struct userspace_mem_region *region) 808 { 809 struct rb_node **cur, *parent; 810 811 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 812 struct userspace_mem_region *cregion; 813 814 cregion = container_of(*cur, typeof(*cregion), hva_node); 815 parent = *cur; 816 if (region->host_mem < cregion->host_mem) 817 cur = &(*cur)->rb_left; 818 else { 819 TEST_ASSERT(region->host_mem != 820 cregion->host_mem, 821 "Duplicate HVA in region tree"); 822 823 cur = &(*cur)->rb_right; 824 } 825 } 826 827 rb_link_node(®ion->hva_node, parent, cur); 828 rb_insert_color(®ion->hva_node, hva_tree); 829 } 830 831 832 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 833 uint64_t gpa, uint64_t size, void *hva) 834 { 835 struct kvm_userspace_memory_region region = { 836 .slot = slot, 837 .flags = flags, 838 .guest_phys_addr = gpa, 839 .memory_size = size, 840 .userspace_addr = (uintptr_t)hva, 841 }; 842 843 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 844 } 845 846 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 847 uint64_t gpa, uint64_t size, void *hva) 848 { 849 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 850 851 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 852 errno, strerror(errno)); 853 } 854 855 /* 856 * VM Userspace Memory Region Add 857 * 858 * Input Args: 859 * vm - Virtual Machine 860 * src_type - Storage source for this region. 861 * NULL to use anonymous memory. 862 * guest_paddr - Starting guest physical address 863 * slot - KVM region slot 864 * npages - Number of physical pages 865 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 866 * 867 * Output Args: None 868 * 869 * Return: None 870 * 871 * Allocates a memory area of the number of pages specified by npages 872 * and maps it to the VM specified by vm, at a starting physical address 873 * given by guest_paddr. The region is created with a KVM region slot 874 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 875 * region is created with the flags given by flags. 876 */ 877 void vm_userspace_mem_region_add(struct kvm_vm *vm, 878 enum vm_mem_backing_src_type src_type, 879 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 880 uint32_t flags) 881 { 882 int ret; 883 struct userspace_mem_region *region; 884 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 885 size_t alignment; 886 887 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 888 "Number of guest pages is not compatible with the host. " 889 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 890 891 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 892 "address not on a page boundary.\n" 893 " guest_paddr: 0x%lx vm->page_size: 0x%x", 894 guest_paddr, vm->page_size); 895 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 896 <= vm->max_gfn, "Physical range beyond maximum " 897 "supported physical address,\n" 898 " guest_paddr: 0x%lx npages: 0x%lx\n" 899 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 900 guest_paddr, npages, vm->max_gfn, vm->page_size); 901 902 /* 903 * Confirm a mem region with an overlapping address doesn't 904 * already exist. 905 */ 906 region = (struct userspace_mem_region *) userspace_mem_region_find( 907 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 908 if (region != NULL) 909 TEST_FAIL("overlapping userspace_mem_region already " 910 "exists\n" 911 " requested guest_paddr: 0x%lx npages: 0x%lx " 912 "page_size: 0x%x\n" 913 " existing guest_paddr: 0x%lx size: 0x%lx", 914 guest_paddr, npages, vm->page_size, 915 (uint64_t) region->region.guest_phys_addr, 916 (uint64_t) region->region.memory_size); 917 918 /* Confirm no region with the requested slot already exists. */ 919 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 920 slot) { 921 if (region->region.slot != slot) 922 continue; 923 924 TEST_FAIL("A mem region with the requested slot " 925 "already exists.\n" 926 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 927 " existing slot: %u paddr: 0x%lx size: 0x%lx", 928 slot, guest_paddr, npages, 929 region->region.slot, 930 (uint64_t) region->region.guest_phys_addr, 931 (uint64_t) region->region.memory_size); 932 } 933 934 /* Allocate and initialize new mem region structure. */ 935 region = calloc(1, sizeof(*region)); 936 TEST_ASSERT(region != NULL, "Insufficient Memory"); 937 region->mmap_size = npages * vm->page_size; 938 939 #ifdef __s390x__ 940 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 941 alignment = 0x100000; 942 #else 943 alignment = 1; 944 #endif 945 946 /* 947 * When using THP mmap is not guaranteed to returned a hugepage aligned 948 * address so we have to pad the mmap. Padding is not needed for HugeTLB 949 * because mmap will always return an address aligned to the HugeTLB 950 * page size. 951 */ 952 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 953 alignment = max(backing_src_pagesz, alignment); 954 955 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 956 957 /* Add enough memory to align up if necessary */ 958 if (alignment > 1) 959 region->mmap_size += alignment; 960 961 region->fd = -1; 962 if (backing_src_is_shared(src_type)) 963 region->fd = kvm_memfd_alloc(region->mmap_size, 964 src_type == VM_MEM_SRC_SHARED_HUGETLB); 965 966 region->mmap_start = mmap(NULL, region->mmap_size, 967 PROT_READ | PROT_WRITE, 968 vm_mem_backing_src_alias(src_type)->flag, 969 region->fd, 0); 970 TEST_ASSERT(region->mmap_start != MAP_FAILED, 971 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 972 973 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 974 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 975 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 976 region->mmap_start, backing_src_pagesz); 977 978 /* Align host address */ 979 region->host_mem = align_ptr_up(region->mmap_start, alignment); 980 981 /* As needed perform madvise */ 982 if ((src_type == VM_MEM_SRC_ANONYMOUS || 983 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 984 ret = madvise(region->host_mem, npages * vm->page_size, 985 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 986 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 987 region->host_mem, npages * vm->page_size, 988 vm_mem_backing_src_alias(src_type)->name); 989 } 990 991 region->unused_phy_pages = sparsebit_alloc(); 992 sparsebit_set_num(region->unused_phy_pages, 993 guest_paddr >> vm->page_shift, npages); 994 region->region.slot = slot; 995 region->region.flags = flags; 996 region->region.guest_phys_addr = guest_paddr; 997 region->region.memory_size = npages * vm->page_size; 998 region->region.userspace_addr = (uintptr_t) region->host_mem; 999 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1000 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1001 " rc: %i errno: %i\n" 1002 " slot: %u flags: 0x%x\n" 1003 " guest_phys_addr: 0x%lx size: 0x%lx", 1004 ret, errno, slot, flags, 1005 guest_paddr, (uint64_t) region->region.memory_size); 1006 1007 /* Add to quick lookup data structures */ 1008 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1009 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1010 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1011 1012 /* If shared memory, create an alias. */ 1013 if (region->fd >= 0) { 1014 region->mmap_alias = mmap(NULL, region->mmap_size, 1015 PROT_READ | PROT_WRITE, 1016 vm_mem_backing_src_alias(src_type)->flag, 1017 region->fd, 0); 1018 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1019 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1020 1021 /* Align host alias address */ 1022 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1023 } 1024 } 1025 1026 /* 1027 * Memslot to region 1028 * 1029 * Input Args: 1030 * vm - Virtual Machine 1031 * memslot - KVM memory slot ID 1032 * 1033 * Output Args: None 1034 * 1035 * Return: 1036 * Pointer to memory region structure that describe memory region 1037 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1038 * on error (e.g. currently no memory region using memslot as a KVM 1039 * memory slot ID). 1040 */ 1041 struct userspace_mem_region * 1042 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1043 { 1044 struct userspace_mem_region *region; 1045 1046 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1047 memslot) 1048 if (region->region.slot == memslot) 1049 return region; 1050 1051 fprintf(stderr, "No mem region with the requested slot found,\n" 1052 " requested slot: %u\n", memslot); 1053 fputs("---- vm dump ----\n", stderr); 1054 vm_dump(stderr, vm, 2); 1055 TEST_FAIL("Mem region not found"); 1056 return NULL; 1057 } 1058 1059 /* 1060 * VM Memory Region Flags Set 1061 * 1062 * Input Args: 1063 * vm - Virtual Machine 1064 * flags - Starting guest physical address 1065 * 1066 * Output Args: None 1067 * 1068 * Return: None 1069 * 1070 * Sets the flags of the memory region specified by the value of slot, 1071 * to the values given by flags. 1072 */ 1073 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1074 { 1075 int ret; 1076 struct userspace_mem_region *region; 1077 1078 region = memslot2region(vm, slot); 1079 1080 region->region.flags = flags; 1081 1082 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1083 1084 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1085 " rc: %i errno: %i slot: %u flags: 0x%x", 1086 ret, errno, slot, flags); 1087 } 1088 1089 /* 1090 * VM Memory Region Move 1091 * 1092 * Input Args: 1093 * vm - Virtual Machine 1094 * slot - Slot of the memory region to move 1095 * new_gpa - Starting guest physical address 1096 * 1097 * Output Args: None 1098 * 1099 * Return: None 1100 * 1101 * Change the gpa of a memory region. 1102 */ 1103 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1104 { 1105 struct userspace_mem_region *region; 1106 int ret; 1107 1108 region = memslot2region(vm, slot); 1109 1110 region->region.guest_phys_addr = new_gpa; 1111 1112 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1113 1114 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1115 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1116 ret, errno, slot, new_gpa); 1117 } 1118 1119 /* 1120 * VM Memory Region Delete 1121 * 1122 * Input Args: 1123 * vm - Virtual Machine 1124 * slot - Slot of the memory region to delete 1125 * 1126 * Output Args: None 1127 * 1128 * Return: None 1129 * 1130 * Delete a memory region. 1131 */ 1132 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1133 { 1134 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1135 } 1136 1137 /* Returns the size of a vCPU's kvm_run structure. */ 1138 static int vcpu_mmap_sz(void) 1139 { 1140 int dev_fd, ret; 1141 1142 dev_fd = open_kvm_dev_path_or_exit(); 1143 1144 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1145 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1146 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1147 1148 close(dev_fd); 1149 1150 return ret; 1151 } 1152 1153 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1154 { 1155 struct kvm_vcpu *vcpu; 1156 1157 list_for_each_entry(vcpu, &vm->vcpus, list) { 1158 if (vcpu->id == vcpu_id) 1159 return true; 1160 } 1161 1162 return false; 1163 } 1164 1165 /* 1166 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1167 * No additional vCPU setup is done. Returns the vCPU. 1168 */ 1169 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1170 { 1171 struct kvm_vcpu *vcpu; 1172 1173 /* Confirm a vcpu with the specified id doesn't already exist. */ 1174 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1175 1176 /* Allocate and initialize new vcpu structure. */ 1177 vcpu = calloc(1, sizeof(*vcpu)); 1178 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1179 1180 vcpu->vm = vm; 1181 vcpu->id = vcpu_id; 1182 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1183 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1184 1185 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1186 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1187 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1188 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1189 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1190 TEST_ASSERT(vcpu->run != MAP_FAILED, 1191 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1192 1193 /* Add to linked-list of VCPUs. */ 1194 list_add(&vcpu->list, &vm->vcpus); 1195 1196 return vcpu; 1197 } 1198 1199 /* 1200 * VM Virtual Address Unused Gap 1201 * 1202 * Input Args: 1203 * vm - Virtual Machine 1204 * sz - Size (bytes) 1205 * vaddr_min - Minimum Virtual Address 1206 * 1207 * Output Args: None 1208 * 1209 * Return: 1210 * Lowest virtual address at or below vaddr_min, with at least 1211 * sz unused bytes. TEST_ASSERT failure if no area of at least 1212 * size sz is available. 1213 * 1214 * Within the VM specified by vm, locates the lowest starting virtual 1215 * address >= vaddr_min, that has at least sz unallocated bytes. A 1216 * TEST_ASSERT failure occurs for invalid input or no area of at least 1217 * sz unallocated bytes >= vaddr_min is available. 1218 */ 1219 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1220 vm_vaddr_t vaddr_min) 1221 { 1222 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1223 1224 /* Determine lowest permitted virtual page index. */ 1225 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1226 if ((pgidx_start * vm->page_size) < vaddr_min) 1227 goto no_va_found; 1228 1229 /* Loop over section with enough valid virtual page indexes. */ 1230 if (!sparsebit_is_set_num(vm->vpages_valid, 1231 pgidx_start, pages)) 1232 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1233 pgidx_start, pages); 1234 do { 1235 /* 1236 * Are there enough unused virtual pages available at 1237 * the currently proposed starting virtual page index. 1238 * If not, adjust proposed starting index to next 1239 * possible. 1240 */ 1241 if (sparsebit_is_clear_num(vm->vpages_mapped, 1242 pgidx_start, pages)) 1243 goto va_found; 1244 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1245 pgidx_start, pages); 1246 if (pgidx_start == 0) 1247 goto no_va_found; 1248 1249 /* 1250 * If needed, adjust proposed starting virtual address, 1251 * to next range of valid virtual addresses. 1252 */ 1253 if (!sparsebit_is_set_num(vm->vpages_valid, 1254 pgidx_start, pages)) { 1255 pgidx_start = sparsebit_next_set_num( 1256 vm->vpages_valid, pgidx_start, pages); 1257 if (pgidx_start == 0) 1258 goto no_va_found; 1259 } 1260 } while (pgidx_start != 0); 1261 1262 no_va_found: 1263 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1264 1265 /* NOT REACHED */ 1266 return -1; 1267 1268 va_found: 1269 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1270 pgidx_start, pages), 1271 "Unexpected, invalid virtual page index range,\n" 1272 " pgidx_start: 0x%lx\n" 1273 " pages: 0x%lx", 1274 pgidx_start, pages); 1275 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1276 pgidx_start, pages), 1277 "Unexpected, pages already mapped,\n" 1278 " pgidx_start: 0x%lx\n" 1279 " pages: 0x%lx", 1280 pgidx_start, pages); 1281 1282 return pgidx_start * vm->page_size; 1283 } 1284 1285 /* 1286 * VM Virtual Address Allocate 1287 * 1288 * Input Args: 1289 * vm - Virtual Machine 1290 * sz - Size in bytes 1291 * vaddr_min - Minimum starting virtual address 1292 * 1293 * Output Args: None 1294 * 1295 * Return: 1296 * Starting guest virtual address 1297 * 1298 * Allocates at least sz bytes within the virtual address space of the vm 1299 * given by vm. The allocated bytes are mapped to a virtual address >= 1300 * the address given by vaddr_min. Note that each allocation uses a 1301 * a unique set of pages, with the minimum real allocation being at least 1302 * a page. 1303 */ 1304 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1305 { 1306 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1307 1308 virt_pgd_alloc(vm); 1309 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1310 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1311 1312 /* 1313 * Find an unused range of virtual page addresses of at least 1314 * pages in length. 1315 */ 1316 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1317 1318 /* Map the virtual pages. */ 1319 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1320 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1321 1322 virt_pg_map(vm, vaddr, paddr); 1323 1324 sparsebit_set(vm->vpages_mapped, 1325 vaddr >> vm->page_shift); 1326 } 1327 1328 return vaddr_start; 1329 } 1330 1331 /* 1332 * VM Virtual Address Allocate Pages 1333 * 1334 * Input Args: 1335 * vm - Virtual Machine 1336 * 1337 * Output Args: None 1338 * 1339 * Return: 1340 * Starting guest virtual address 1341 * 1342 * Allocates at least N system pages worth of bytes within the virtual address 1343 * space of the vm. 1344 */ 1345 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1346 { 1347 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1348 } 1349 1350 /* 1351 * VM Virtual Address Allocate Page 1352 * 1353 * Input Args: 1354 * vm - Virtual Machine 1355 * 1356 * Output Args: None 1357 * 1358 * Return: 1359 * Starting guest virtual address 1360 * 1361 * Allocates at least one system page worth of bytes within the virtual address 1362 * space of the vm. 1363 */ 1364 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1365 { 1366 return vm_vaddr_alloc_pages(vm, 1); 1367 } 1368 1369 /* 1370 * Map a range of VM virtual address to the VM's physical address 1371 * 1372 * Input Args: 1373 * vm - Virtual Machine 1374 * vaddr - Virtuall address to map 1375 * paddr - VM Physical Address 1376 * npages - The number of pages to map 1377 * 1378 * Output Args: None 1379 * 1380 * Return: None 1381 * 1382 * Within the VM given by @vm, creates a virtual translation for 1383 * @npages starting at @vaddr to the page range starting at @paddr. 1384 */ 1385 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1386 unsigned int npages) 1387 { 1388 size_t page_size = vm->page_size; 1389 size_t size = npages * page_size; 1390 1391 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1392 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1393 1394 while (npages--) { 1395 virt_pg_map(vm, vaddr, paddr); 1396 vaddr += page_size; 1397 paddr += page_size; 1398 } 1399 } 1400 1401 /* 1402 * Address VM Physical to Host Virtual 1403 * 1404 * Input Args: 1405 * vm - Virtual Machine 1406 * gpa - VM physical address 1407 * 1408 * Output Args: None 1409 * 1410 * Return: 1411 * Equivalent host virtual address 1412 * 1413 * Locates the memory region containing the VM physical address given 1414 * by gpa, within the VM given by vm. When found, the host virtual 1415 * address providing the memory to the vm physical address is returned. 1416 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1417 */ 1418 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1419 { 1420 struct userspace_mem_region *region; 1421 1422 region = userspace_mem_region_find(vm, gpa, gpa); 1423 if (!region) { 1424 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1425 return NULL; 1426 } 1427 1428 return (void *)((uintptr_t)region->host_mem 1429 + (gpa - region->region.guest_phys_addr)); 1430 } 1431 1432 /* 1433 * Address Host Virtual to VM Physical 1434 * 1435 * Input Args: 1436 * vm - Virtual Machine 1437 * hva - Host virtual address 1438 * 1439 * Output Args: None 1440 * 1441 * Return: 1442 * Equivalent VM physical address 1443 * 1444 * Locates the memory region containing the host virtual address given 1445 * by hva, within the VM given by vm. When found, the equivalent 1446 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1447 * region containing hva exists. 1448 */ 1449 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1450 { 1451 struct rb_node *node; 1452 1453 for (node = vm->regions.hva_tree.rb_node; node; ) { 1454 struct userspace_mem_region *region = 1455 container_of(node, struct userspace_mem_region, hva_node); 1456 1457 if (hva >= region->host_mem) { 1458 if (hva <= (region->host_mem 1459 + region->region.memory_size - 1)) 1460 return (vm_paddr_t)((uintptr_t) 1461 region->region.guest_phys_addr 1462 + (hva - (uintptr_t)region->host_mem)); 1463 1464 node = node->rb_right; 1465 } else 1466 node = node->rb_left; 1467 } 1468 1469 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1470 return -1; 1471 } 1472 1473 /* 1474 * Address VM physical to Host Virtual *alias*. 1475 * 1476 * Input Args: 1477 * vm - Virtual Machine 1478 * gpa - VM physical address 1479 * 1480 * Output Args: None 1481 * 1482 * Return: 1483 * Equivalent address within the host virtual *alias* area, or NULL 1484 * (without failing the test) if the guest memory is not shared (so 1485 * no alias exists). 1486 * 1487 * Create a writable, shared virtual=>physical alias for the specific GPA. 1488 * The primary use case is to allow the host selftest to manipulate guest 1489 * memory without mapping said memory in the guest's address space. And, for 1490 * userfaultfd-based demand paging, to do so without triggering userfaults. 1491 */ 1492 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1493 { 1494 struct userspace_mem_region *region; 1495 uintptr_t offset; 1496 1497 region = userspace_mem_region_find(vm, gpa, gpa); 1498 if (!region) 1499 return NULL; 1500 1501 if (!region->host_alias) 1502 return NULL; 1503 1504 offset = gpa - region->region.guest_phys_addr; 1505 return (void *) ((uintptr_t) region->host_alias + offset); 1506 } 1507 1508 /* Create an interrupt controller chip for the specified VM. */ 1509 void vm_create_irqchip(struct kvm_vm *vm) 1510 { 1511 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1512 1513 vm->has_irqchip = true; 1514 } 1515 1516 int _vcpu_run(struct kvm_vcpu *vcpu) 1517 { 1518 int rc; 1519 1520 do { 1521 rc = __vcpu_run(vcpu); 1522 } while (rc == -1 && errno == EINTR); 1523 1524 assert_on_unhandled_exception(vcpu); 1525 1526 return rc; 1527 } 1528 1529 /* 1530 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1531 * Assert if the KVM returns an error (other than -EINTR). 1532 */ 1533 void vcpu_run(struct kvm_vcpu *vcpu) 1534 { 1535 int ret = _vcpu_run(vcpu); 1536 1537 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1538 } 1539 1540 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1541 { 1542 int ret; 1543 1544 vcpu->run->immediate_exit = 1; 1545 ret = __vcpu_run(vcpu); 1546 vcpu->run->immediate_exit = 0; 1547 1548 TEST_ASSERT(ret == -1 && errno == EINTR, 1549 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1550 ret, errno); 1551 } 1552 1553 /* 1554 * Get the list of guest registers which are supported for 1555 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1556 * it is the caller's responsibility to free the list. 1557 */ 1558 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1559 { 1560 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1561 int ret; 1562 1563 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1564 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1565 1566 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1567 reg_list->n = reg_list_n.n; 1568 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1569 return reg_list; 1570 } 1571 1572 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1573 { 1574 uint32_t page_size = vcpu->vm->page_size; 1575 uint32_t size = vcpu->vm->dirty_ring_size; 1576 1577 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1578 1579 if (!vcpu->dirty_gfns) { 1580 void *addr; 1581 1582 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1583 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1584 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1585 1586 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1587 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1588 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1589 1590 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1591 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1592 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1593 1594 vcpu->dirty_gfns = addr; 1595 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1596 } 1597 1598 return vcpu->dirty_gfns; 1599 } 1600 1601 /* 1602 * Device Ioctl 1603 */ 1604 1605 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1606 { 1607 struct kvm_device_attr attribute = { 1608 .group = group, 1609 .attr = attr, 1610 .flags = 0, 1611 }; 1612 1613 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1614 } 1615 1616 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1617 { 1618 struct kvm_create_device create_dev = { 1619 .type = type, 1620 .flags = KVM_CREATE_DEVICE_TEST, 1621 }; 1622 1623 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1624 } 1625 1626 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1627 { 1628 struct kvm_create_device create_dev = { 1629 .type = type, 1630 .fd = -1, 1631 .flags = 0, 1632 }; 1633 int err; 1634 1635 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1636 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1637 return err ? : create_dev.fd; 1638 } 1639 1640 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1641 { 1642 struct kvm_device_attr kvmattr = { 1643 .group = group, 1644 .attr = attr, 1645 .flags = 0, 1646 .addr = (uintptr_t)val, 1647 }; 1648 1649 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1650 } 1651 1652 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1653 { 1654 struct kvm_device_attr kvmattr = { 1655 .group = group, 1656 .attr = attr, 1657 .flags = 0, 1658 .addr = (uintptr_t)val, 1659 }; 1660 1661 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1662 } 1663 1664 /* 1665 * IRQ related functions. 1666 */ 1667 1668 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1669 { 1670 struct kvm_irq_level irq_level = { 1671 .irq = irq, 1672 .level = level, 1673 }; 1674 1675 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1676 } 1677 1678 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1679 { 1680 int ret = _kvm_irq_line(vm, irq, level); 1681 1682 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1683 } 1684 1685 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1686 { 1687 struct kvm_irq_routing *routing; 1688 size_t size; 1689 1690 size = sizeof(struct kvm_irq_routing); 1691 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1692 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1693 routing = calloc(1, size); 1694 assert(routing); 1695 1696 return routing; 1697 } 1698 1699 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1700 uint32_t gsi, uint32_t pin) 1701 { 1702 int i; 1703 1704 assert(routing); 1705 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1706 1707 i = routing->nr; 1708 routing->entries[i].gsi = gsi; 1709 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1710 routing->entries[i].flags = 0; 1711 routing->entries[i].u.irqchip.irqchip = 0; 1712 routing->entries[i].u.irqchip.pin = pin; 1713 routing->nr++; 1714 } 1715 1716 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1717 { 1718 int ret; 1719 1720 assert(routing); 1721 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1722 free(routing); 1723 1724 return ret; 1725 } 1726 1727 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1728 { 1729 int ret; 1730 1731 ret = _kvm_gsi_routing_write(vm, routing); 1732 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1733 } 1734 1735 /* 1736 * VM Dump 1737 * 1738 * Input Args: 1739 * vm - Virtual Machine 1740 * indent - Left margin indent amount 1741 * 1742 * Output Args: 1743 * stream - Output FILE stream 1744 * 1745 * Return: None 1746 * 1747 * Dumps the current state of the VM given by vm, to the FILE stream 1748 * given by stream. 1749 */ 1750 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1751 { 1752 int ctr; 1753 struct userspace_mem_region *region; 1754 struct kvm_vcpu *vcpu; 1755 1756 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1757 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1758 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1759 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1760 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1761 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1762 "host_virt: %p\n", indent + 2, "", 1763 (uint64_t) region->region.guest_phys_addr, 1764 (uint64_t) region->region.memory_size, 1765 region->host_mem); 1766 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1767 sparsebit_dump(stream, region->unused_phy_pages, 0); 1768 } 1769 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1770 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1771 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1772 vm->pgd_created); 1773 if (vm->pgd_created) { 1774 fprintf(stream, "%*sVirtual Translation Tables:\n", 1775 indent + 2, ""); 1776 virt_dump(stream, vm, indent + 4); 1777 } 1778 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1779 1780 list_for_each_entry(vcpu, &vm->vcpus, list) 1781 vcpu_dump(stream, vcpu, indent + 2); 1782 } 1783 1784 /* Known KVM exit reasons */ 1785 static struct exit_reason { 1786 unsigned int reason; 1787 const char *name; 1788 } exit_reasons_known[] = { 1789 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1790 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1791 {KVM_EXIT_IO, "IO"}, 1792 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1793 {KVM_EXIT_DEBUG, "DEBUG"}, 1794 {KVM_EXIT_HLT, "HLT"}, 1795 {KVM_EXIT_MMIO, "MMIO"}, 1796 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1797 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1798 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1799 {KVM_EXIT_INTR, "INTR"}, 1800 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1801 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1802 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1803 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1804 {KVM_EXIT_DCR, "DCR"}, 1805 {KVM_EXIT_NMI, "NMI"}, 1806 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1807 {KVM_EXIT_OSI, "OSI"}, 1808 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1809 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1810 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1811 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1812 {KVM_EXIT_XEN, "XEN"}, 1813 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1814 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1815 #endif 1816 }; 1817 1818 /* 1819 * Exit Reason String 1820 * 1821 * Input Args: 1822 * exit_reason - Exit reason 1823 * 1824 * Output Args: None 1825 * 1826 * Return: 1827 * Constant string pointer describing the exit reason. 1828 * 1829 * Locates and returns a constant string that describes the KVM exit 1830 * reason given by exit_reason. If no such string is found, a constant 1831 * string of "Unknown" is returned. 1832 */ 1833 const char *exit_reason_str(unsigned int exit_reason) 1834 { 1835 unsigned int n1; 1836 1837 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1838 if (exit_reason == exit_reasons_known[n1].reason) 1839 return exit_reasons_known[n1].name; 1840 } 1841 1842 return "Unknown"; 1843 } 1844 1845 /* 1846 * Physical Contiguous Page Allocator 1847 * 1848 * Input Args: 1849 * vm - Virtual Machine 1850 * num - number of pages 1851 * paddr_min - Physical address minimum 1852 * memslot - Memory region to allocate page from 1853 * 1854 * Output Args: None 1855 * 1856 * Return: 1857 * Starting physical address 1858 * 1859 * Within the VM specified by vm, locates a range of available physical 1860 * pages at or above paddr_min. If found, the pages are marked as in use 1861 * and their base address is returned. A TEST_ASSERT failure occurs if 1862 * not enough pages are available at or above paddr_min. 1863 */ 1864 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1865 vm_paddr_t paddr_min, uint32_t memslot) 1866 { 1867 struct userspace_mem_region *region; 1868 sparsebit_idx_t pg, base; 1869 1870 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1871 1872 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1873 "not divisible by page size.\n" 1874 " paddr_min: 0x%lx page_size: 0x%x", 1875 paddr_min, vm->page_size); 1876 1877 region = memslot2region(vm, memslot); 1878 base = pg = paddr_min >> vm->page_shift; 1879 1880 do { 1881 for (; pg < base + num; ++pg) { 1882 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1883 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1884 break; 1885 } 1886 } 1887 } while (pg && pg != base + num); 1888 1889 if (pg == 0) { 1890 fprintf(stderr, "No guest physical page available, " 1891 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1892 paddr_min, vm->page_size, memslot); 1893 fputs("---- vm dump ----\n", stderr); 1894 vm_dump(stderr, vm, 2); 1895 abort(); 1896 } 1897 1898 for (pg = base; pg < base + num; ++pg) 1899 sparsebit_clear(region->unused_phy_pages, pg); 1900 1901 return base * vm->page_size; 1902 } 1903 1904 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1905 uint32_t memslot) 1906 { 1907 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1908 } 1909 1910 /* Arbitrary minimum physical address used for virtual translation tables. */ 1911 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1912 1913 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1914 { 1915 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1916 } 1917 1918 /* 1919 * Address Guest Virtual to Host Virtual 1920 * 1921 * Input Args: 1922 * vm - Virtual Machine 1923 * gva - VM virtual address 1924 * 1925 * Output Args: None 1926 * 1927 * Return: 1928 * Equivalent host virtual address 1929 */ 1930 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1931 { 1932 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1933 } 1934 1935 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1936 { 1937 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1938 } 1939 1940 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1941 unsigned int page_shift, 1942 unsigned int new_page_shift, 1943 bool ceil) 1944 { 1945 unsigned int n = 1 << (new_page_shift - page_shift); 1946 1947 if (page_shift >= new_page_shift) 1948 return num_pages * (1 << (page_shift - new_page_shift)); 1949 1950 return num_pages / n + !!(ceil && num_pages % n); 1951 } 1952 1953 static inline int getpageshift(void) 1954 { 1955 return __builtin_ffs(getpagesize()) - 1; 1956 } 1957 1958 unsigned int 1959 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1960 { 1961 return vm_calc_num_pages(num_guest_pages, 1962 vm_guest_mode_params[mode].page_shift, 1963 getpageshift(), true); 1964 } 1965 1966 unsigned int 1967 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1968 { 1969 return vm_calc_num_pages(num_host_pages, getpageshift(), 1970 vm_guest_mode_params[mode].page_shift, false); 1971 } 1972 1973 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1974 { 1975 unsigned int n; 1976 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1977 return vm_adjust_num_guest_pages(mode, n); 1978 } 1979 1980 /* 1981 * Read binary stats descriptors 1982 * 1983 * Input Args: 1984 * stats_fd - the file descriptor for the binary stats file from which to read 1985 * header - the binary stats metadata header corresponding to the given FD 1986 * 1987 * Output Args: None 1988 * 1989 * Return: 1990 * A pointer to a newly allocated series of stat descriptors. 1991 * Caller is responsible for freeing the returned kvm_stats_desc. 1992 * 1993 * Read the stats descriptors from the binary stats interface. 1994 */ 1995 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1996 struct kvm_stats_header *header) 1997 { 1998 struct kvm_stats_desc *stats_desc; 1999 ssize_t desc_size, total_size, ret; 2000 2001 desc_size = get_stats_descriptor_size(header); 2002 total_size = header->num_desc * desc_size; 2003 2004 stats_desc = calloc(header->num_desc, desc_size); 2005 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2006 2007 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2008 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2009 2010 return stats_desc; 2011 } 2012 2013 /* 2014 * Read stat data for a particular stat 2015 * 2016 * Input Args: 2017 * stats_fd - the file descriptor for the binary stats file from which to read 2018 * header - the binary stats metadata header corresponding to the given FD 2019 * desc - the binary stat metadata for the particular stat to be read 2020 * max_elements - the maximum number of 8-byte values to read into data 2021 * 2022 * Output Args: 2023 * data - the buffer into which stat data should be read 2024 * 2025 * Read the data values of a specified stat from the binary stats interface. 2026 */ 2027 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2028 struct kvm_stats_desc *desc, uint64_t *data, 2029 size_t max_elements) 2030 { 2031 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2032 size_t size = nr_elements * sizeof(*data); 2033 ssize_t ret; 2034 2035 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2036 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2037 2038 ret = pread(stats_fd, data, size, 2039 header->data_offset + desc->offset); 2040 2041 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2042 desc->name, errno, strerror(errno)); 2043 TEST_ASSERT(ret == size, 2044 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2045 desc->name, size, ret); 2046 } 2047 2048 /* 2049 * Read the data of the named stat 2050 * 2051 * Input Args: 2052 * vm - the VM for which the stat should be read 2053 * stat_name - the name of the stat to read 2054 * max_elements - the maximum number of 8-byte values to read into data 2055 * 2056 * Output Args: 2057 * data - the buffer into which stat data should be read 2058 * 2059 * Read the data values of a specified stat from the binary stats interface. 2060 */ 2061 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2062 size_t max_elements) 2063 { 2064 struct kvm_stats_desc *desc; 2065 size_t size_desc; 2066 int i; 2067 2068 if (!vm->stats_fd) { 2069 vm->stats_fd = vm_get_stats_fd(vm); 2070 read_stats_header(vm->stats_fd, &vm->stats_header); 2071 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2072 &vm->stats_header); 2073 } 2074 2075 size_desc = get_stats_descriptor_size(&vm->stats_header); 2076 2077 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2078 desc = (void *)vm->stats_desc + (i * size_desc); 2079 2080 if (strcmp(desc->name, stat_name)) 2081 continue; 2082 2083 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2084 data, max_elements); 2085 2086 break; 2087 } 2088 } 2089