1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_intel_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm_intel", param); 86 } 87 88 bool get_kvm_amd_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_amd", param); 91 } 92 93 /* 94 * Capability 95 * 96 * Input Args: 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: 102 * On success, the Value corresponding to the capability (KVM_CAP_*) 103 * specified by the value of cap. On failure a TEST_ASSERT failure 104 * is produced. 105 * 106 * Looks up and returns the value corresponding to the capability 107 * (KVM_CAP_*) given by cap. 108 */ 109 unsigned int kvm_check_cap(long cap) 110 { 111 int ret; 112 int kvm_fd; 113 114 kvm_fd = open_kvm_dev_path_or_exit(); 115 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 116 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 117 118 close(kvm_fd); 119 120 return (unsigned int)ret; 121 } 122 123 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 124 { 125 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 126 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 127 else 128 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 129 vm->dirty_ring_size = ring_size; 130 } 131 132 static void vm_open(struct kvm_vm *vm) 133 { 134 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 135 136 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 137 138 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 139 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 140 } 141 142 const char *vm_guest_mode_string(uint32_t i) 143 { 144 static const char * const strings[] = { 145 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 146 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 147 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 148 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 149 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 150 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 151 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 152 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 153 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 154 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 155 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 156 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 157 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 158 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 159 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 160 }; 161 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 162 "Missing new mode strings?"); 163 164 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 165 166 return strings[i]; 167 } 168 169 const struct vm_guest_mode_params vm_guest_mode_params[] = { 170 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 171 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 172 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 173 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 174 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 175 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 176 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 177 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 178 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 179 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 180 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 181 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 182 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 183 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 184 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 185 }; 186 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 187 "Missing new mode params?"); 188 189 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) 190 { 191 struct kvm_vm *vm; 192 193 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 194 vm_guest_mode_string(mode), nr_pages); 195 196 vm = calloc(1, sizeof(*vm)); 197 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 198 199 INIT_LIST_HEAD(&vm->vcpus); 200 vm->regions.gpa_tree = RB_ROOT; 201 vm->regions.hva_tree = RB_ROOT; 202 hash_init(vm->regions.slot_hash); 203 204 vm->mode = mode; 205 vm->type = 0; 206 207 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 208 vm->va_bits = vm_guest_mode_params[mode].va_bits; 209 vm->page_size = vm_guest_mode_params[mode].page_size; 210 vm->page_shift = vm_guest_mode_params[mode].page_shift; 211 212 /* Setup mode specific traits. */ 213 switch (vm->mode) { 214 case VM_MODE_P52V48_4K: 215 vm->pgtable_levels = 4; 216 break; 217 case VM_MODE_P52V48_64K: 218 vm->pgtable_levels = 3; 219 break; 220 case VM_MODE_P48V48_4K: 221 vm->pgtable_levels = 4; 222 break; 223 case VM_MODE_P48V48_64K: 224 vm->pgtable_levels = 3; 225 break; 226 case VM_MODE_P40V48_4K: 227 case VM_MODE_P36V48_4K: 228 vm->pgtable_levels = 4; 229 break; 230 case VM_MODE_P40V48_64K: 231 case VM_MODE_P36V48_64K: 232 vm->pgtable_levels = 3; 233 break; 234 case VM_MODE_P48V48_16K: 235 case VM_MODE_P40V48_16K: 236 case VM_MODE_P36V48_16K: 237 vm->pgtable_levels = 4; 238 break; 239 case VM_MODE_P36V47_16K: 240 vm->pgtable_levels = 3; 241 break; 242 case VM_MODE_PXXV48_4K: 243 #ifdef __x86_64__ 244 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 245 /* 246 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 247 * it doesn't take effect unless a CR4.LA57 is set, which it 248 * isn't for this VM_MODE. 249 */ 250 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 251 "Linear address width (%d bits) not supported", 252 vm->va_bits); 253 pr_debug("Guest physical address width detected: %d\n", 254 vm->pa_bits); 255 vm->pgtable_levels = 4; 256 vm->va_bits = 48; 257 #else 258 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 259 #endif 260 break; 261 case VM_MODE_P47V64_4K: 262 vm->pgtable_levels = 5; 263 break; 264 case VM_MODE_P44V64_4K: 265 vm->pgtable_levels = 5; 266 break; 267 default: 268 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 269 } 270 271 #ifdef __aarch64__ 272 if (vm->pa_bits != 40) 273 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 274 #endif 275 276 vm_open(vm); 277 278 /* Limit to VA-bit canonical virtual addresses. */ 279 vm->vpages_valid = sparsebit_alloc(); 280 sparsebit_set_num(vm->vpages_valid, 281 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 282 sparsebit_set_num(vm->vpages_valid, 283 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 284 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 285 286 /* Limit physical addresses to PA-bits. */ 287 vm->max_gfn = vm_compute_max_gfn(vm); 288 289 /* Allocate and setup memory for guest. */ 290 vm->vpages_mapped = sparsebit_alloc(); 291 if (nr_pages != 0) 292 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 293 0, 0, nr_pages, 0); 294 295 return vm; 296 } 297 298 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 299 uint32_t nr_runnable_vcpus, 300 uint64_t extra_mem_pages) 301 { 302 uint64_t nr_pages; 303 304 TEST_ASSERT(nr_runnable_vcpus, 305 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 306 307 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 308 "nr_vcpus = %d too large for host, max-vcpus = %d", 309 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 310 311 /* 312 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 313 * test code and other per-VM assets that will be loaded into memslot0. 314 */ 315 nr_pages = 512; 316 317 /* Account for the per-vCPU stacks on behalf of the test. */ 318 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 319 320 /* 321 * Account for the number of pages needed for the page tables. The 322 * maximum page table size for a memory region will be when the 323 * smallest page size is used. Considering each page contains x page 324 * table descriptors, the total extra size for page tables (for extra 325 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 326 * than N/x*2. 327 */ 328 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 329 330 return vm_adjust_num_guest_pages(mode, nr_pages); 331 } 332 333 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 334 uint64_t nr_extra_pages) 335 { 336 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 337 nr_extra_pages); 338 struct userspace_mem_region *slot0; 339 struct kvm_vm *vm; 340 341 vm = ____vm_create(mode, nr_pages); 342 343 kvm_vm_elf_load(vm, program_invocation_name); 344 345 /* 346 * TODO: Add proper defines to protect the library's memslots, and then 347 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 348 * read-only memslots as MMIO, and creating a read-only memslot for the 349 * MMIO region would prevent silently clobbering the MMIO region. 350 */ 351 slot0 = memslot2region(vm, 0); 352 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 353 354 kvm_arch_vm_post_create(vm); 355 356 return vm; 357 } 358 359 /* 360 * VM Create with customized parameters 361 * 362 * Input Args: 363 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 364 * nr_vcpus - VCPU count 365 * extra_mem_pages - Non-slot0 physical memory total size 366 * guest_code - Guest entry point 367 * vcpuids - VCPU IDs 368 * 369 * Output Args: None 370 * 371 * Return: 372 * Pointer to opaque structure that describes the created VM. 373 * 374 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 375 * extra_mem_pages is only used to calculate the maximum page table size, 376 * no real memory allocation for non-slot0 memory in this function. 377 */ 378 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 379 uint64_t extra_mem_pages, 380 void *guest_code, struct kvm_vcpu *vcpus[]) 381 { 382 struct kvm_vm *vm; 383 int i; 384 385 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 386 387 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 388 389 for (i = 0; i < nr_vcpus; ++i) 390 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 391 392 return vm; 393 } 394 395 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 396 uint64_t extra_mem_pages, 397 void *guest_code) 398 { 399 struct kvm_vcpu *vcpus[1]; 400 struct kvm_vm *vm; 401 402 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 403 guest_code, vcpus); 404 405 *vcpu = vcpus[0]; 406 return vm; 407 } 408 409 /* 410 * VM Restart 411 * 412 * Input Args: 413 * vm - VM that has been released before 414 * 415 * Output Args: None 416 * 417 * Reopens the file descriptors associated to the VM and reinstates the 418 * global state, such as the irqchip and the memory regions that are mapped 419 * into the guest. 420 */ 421 void kvm_vm_restart(struct kvm_vm *vmp) 422 { 423 int ctr; 424 struct userspace_mem_region *region; 425 426 vm_open(vmp); 427 if (vmp->has_irqchip) 428 vm_create_irqchip(vmp); 429 430 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 431 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 432 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 433 " rc: %i errno: %i\n" 434 " slot: %u flags: 0x%x\n" 435 " guest_phys_addr: 0x%llx size: 0x%llx", 436 ret, errno, region->region.slot, 437 region->region.flags, 438 region->region.guest_phys_addr, 439 region->region.memory_size); 440 } 441 } 442 443 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 444 uint32_t vcpu_id) 445 { 446 return __vm_vcpu_add(vm, vcpu_id); 447 } 448 449 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 450 { 451 kvm_vm_restart(vm); 452 453 return vm_vcpu_recreate(vm, 0); 454 } 455 456 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 457 { 458 cpu_set_t mask; 459 int r; 460 461 CPU_ZERO(&mask); 462 CPU_SET(pcpu, &mask); 463 r = sched_setaffinity(0, sizeof(mask), &mask); 464 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 465 } 466 467 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 468 { 469 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 470 471 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 472 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 473 return pcpu; 474 } 475 476 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 477 int nr_vcpus) 478 { 479 cpu_set_t allowed_mask; 480 char *cpu, *cpu_list; 481 char delim[2] = ","; 482 int i, r; 483 484 cpu_list = strdup(pcpus_string); 485 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 486 487 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 488 TEST_ASSERT(!r, "sched_getaffinity() failed"); 489 490 cpu = strtok(cpu_list, delim); 491 492 /* 1. Get all pcpus for vcpus. */ 493 for (i = 0; i < nr_vcpus; i++) { 494 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 495 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 496 cpu = strtok(NULL, delim); 497 } 498 499 /* 2. Check if the main worker needs to be pinned. */ 500 if (cpu) { 501 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 502 cpu = strtok(NULL, delim); 503 } 504 505 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 506 free(cpu_list); 507 } 508 509 /* 510 * Userspace Memory Region Find 511 * 512 * Input Args: 513 * vm - Virtual Machine 514 * start - Starting VM physical address 515 * end - Ending VM physical address, inclusive. 516 * 517 * Output Args: None 518 * 519 * Return: 520 * Pointer to overlapping region, NULL if no such region. 521 * 522 * Searches for a region with any physical memory that overlaps with 523 * any portion of the guest physical addresses from start to end 524 * inclusive. If multiple overlapping regions exist, a pointer to any 525 * of the regions is returned. Null is returned only when no overlapping 526 * region exists. 527 */ 528 static struct userspace_mem_region * 529 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 530 { 531 struct rb_node *node; 532 533 for (node = vm->regions.gpa_tree.rb_node; node; ) { 534 struct userspace_mem_region *region = 535 container_of(node, struct userspace_mem_region, gpa_node); 536 uint64_t existing_start = region->region.guest_phys_addr; 537 uint64_t existing_end = region->region.guest_phys_addr 538 + region->region.memory_size - 1; 539 if (start <= existing_end && end >= existing_start) 540 return region; 541 542 if (start < existing_start) 543 node = node->rb_left; 544 else 545 node = node->rb_right; 546 } 547 548 return NULL; 549 } 550 551 /* 552 * KVM Userspace Memory Region Find 553 * 554 * Input Args: 555 * vm - Virtual Machine 556 * start - Starting VM physical address 557 * end - Ending VM physical address, inclusive. 558 * 559 * Output Args: None 560 * 561 * Return: 562 * Pointer to overlapping region, NULL if no such region. 563 * 564 * Public interface to userspace_mem_region_find. Allows tests to look up 565 * the memslot datastructure for a given range of guest physical memory. 566 */ 567 struct kvm_userspace_memory_region * 568 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 569 uint64_t end) 570 { 571 struct userspace_mem_region *region; 572 573 region = userspace_mem_region_find(vm, start, end); 574 if (!region) 575 return NULL; 576 577 return ®ion->region; 578 } 579 580 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 581 { 582 583 } 584 585 /* 586 * VM VCPU Remove 587 * 588 * Input Args: 589 * vcpu - VCPU to remove 590 * 591 * Output Args: None 592 * 593 * Return: None, TEST_ASSERT failures for all error conditions 594 * 595 * Removes a vCPU from a VM and frees its resources. 596 */ 597 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 598 { 599 int ret; 600 601 if (vcpu->dirty_gfns) { 602 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 603 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 604 vcpu->dirty_gfns = NULL; 605 } 606 607 ret = munmap(vcpu->run, vcpu_mmap_sz()); 608 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 609 610 ret = close(vcpu->fd); 611 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 612 613 list_del(&vcpu->list); 614 615 vcpu_arch_free(vcpu); 616 free(vcpu); 617 } 618 619 void kvm_vm_release(struct kvm_vm *vmp) 620 { 621 struct kvm_vcpu *vcpu, *tmp; 622 int ret; 623 624 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 625 vm_vcpu_rm(vmp, vcpu); 626 627 ret = close(vmp->fd); 628 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 629 630 ret = close(vmp->kvm_fd); 631 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 632 } 633 634 static void __vm_mem_region_delete(struct kvm_vm *vm, 635 struct userspace_mem_region *region, 636 bool unlink) 637 { 638 int ret; 639 640 if (unlink) { 641 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 642 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 643 hash_del(®ion->slot_node); 644 } 645 646 region->region.memory_size = 0; 647 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 648 649 sparsebit_free(®ion->unused_phy_pages); 650 ret = munmap(region->mmap_start, region->mmap_size); 651 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 652 653 free(region); 654 } 655 656 /* 657 * Destroys and frees the VM pointed to by vmp. 658 */ 659 void kvm_vm_free(struct kvm_vm *vmp) 660 { 661 int ctr; 662 struct hlist_node *node; 663 struct userspace_mem_region *region; 664 665 if (vmp == NULL) 666 return; 667 668 /* Free cached stats metadata and close FD */ 669 if (vmp->stats_fd) { 670 free(vmp->stats_desc); 671 close(vmp->stats_fd); 672 } 673 674 /* Free userspace_mem_regions. */ 675 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 676 __vm_mem_region_delete(vmp, region, false); 677 678 /* Free sparsebit arrays. */ 679 sparsebit_free(&vmp->vpages_valid); 680 sparsebit_free(&vmp->vpages_mapped); 681 682 kvm_vm_release(vmp); 683 684 /* Free the structure describing the VM. */ 685 free(vmp); 686 } 687 688 int kvm_memfd_alloc(size_t size, bool hugepages) 689 { 690 int memfd_flags = MFD_CLOEXEC; 691 int fd, r; 692 693 if (hugepages) 694 memfd_flags |= MFD_HUGETLB; 695 696 fd = memfd_create("kvm_selftest", memfd_flags); 697 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 698 699 r = ftruncate(fd, size); 700 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 701 702 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 703 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 704 705 return fd; 706 } 707 708 /* 709 * Memory Compare, host virtual to guest virtual 710 * 711 * Input Args: 712 * hva - Starting host virtual address 713 * vm - Virtual Machine 714 * gva - Starting guest virtual address 715 * len - number of bytes to compare 716 * 717 * Output Args: None 718 * 719 * Input/Output Args: None 720 * 721 * Return: 722 * Returns 0 if the bytes starting at hva for a length of len 723 * are equal the guest virtual bytes starting at gva. Returns 724 * a value < 0, if bytes at hva are less than those at gva. 725 * Otherwise a value > 0 is returned. 726 * 727 * Compares the bytes starting at the host virtual address hva, for 728 * a length of len, to the guest bytes starting at the guest virtual 729 * address given by gva. 730 */ 731 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 732 { 733 size_t amt; 734 735 /* 736 * Compare a batch of bytes until either a match is found 737 * or all the bytes have been compared. 738 */ 739 for (uintptr_t offset = 0; offset < len; offset += amt) { 740 uintptr_t ptr1 = (uintptr_t)hva + offset; 741 742 /* 743 * Determine host address for guest virtual address 744 * at offset. 745 */ 746 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 747 748 /* 749 * Determine amount to compare on this pass. 750 * Don't allow the comparsion to cross a page boundary. 751 */ 752 amt = len - offset; 753 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 754 amt = vm->page_size - (ptr1 % vm->page_size); 755 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 756 amt = vm->page_size - (ptr2 % vm->page_size); 757 758 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 759 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 760 761 /* 762 * Perform the comparison. If there is a difference 763 * return that result to the caller, otherwise need 764 * to continue on looking for a mismatch. 765 */ 766 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 767 if (ret != 0) 768 return ret; 769 } 770 771 /* 772 * No mismatch found. Let the caller know the two memory 773 * areas are equal. 774 */ 775 return 0; 776 } 777 778 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 779 struct userspace_mem_region *region) 780 { 781 struct rb_node **cur, *parent; 782 783 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 784 struct userspace_mem_region *cregion; 785 786 cregion = container_of(*cur, typeof(*cregion), gpa_node); 787 parent = *cur; 788 if (region->region.guest_phys_addr < 789 cregion->region.guest_phys_addr) 790 cur = &(*cur)->rb_left; 791 else { 792 TEST_ASSERT(region->region.guest_phys_addr != 793 cregion->region.guest_phys_addr, 794 "Duplicate GPA in region tree"); 795 796 cur = &(*cur)->rb_right; 797 } 798 } 799 800 rb_link_node(®ion->gpa_node, parent, cur); 801 rb_insert_color(®ion->gpa_node, gpa_tree); 802 } 803 804 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 805 struct userspace_mem_region *region) 806 { 807 struct rb_node **cur, *parent; 808 809 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 810 struct userspace_mem_region *cregion; 811 812 cregion = container_of(*cur, typeof(*cregion), hva_node); 813 parent = *cur; 814 if (region->host_mem < cregion->host_mem) 815 cur = &(*cur)->rb_left; 816 else { 817 TEST_ASSERT(region->host_mem != 818 cregion->host_mem, 819 "Duplicate HVA in region tree"); 820 821 cur = &(*cur)->rb_right; 822 } 823 } 824 825 rb_link_node(®ion->hva_node, parent, cur); 826 rb_insert_color(®ion->hva_node, hva_tree); 827 } 828 829 830 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 831 uint64_t gpa, uint64_t size, void *hva) 832 { 833 struct kvm_userspace_memory_region region = { 834 .slot = slot, 835 .flags = flags, 836 .guest_phys_addr = gpa, 837 .memory_size = size, 838 .userspace_addr = (uintptr_t)hva, 839 }; 840 841 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 842 } 843 844 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 845 uint64_t gpa, uint64_t size, void *hva) 846 { 847 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 848 849 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 850 errno, strerror(errno)); 851 } 852 853 /* 854 * VM Userspace Memory Region Add 855 * 856 * Input Args: 857 * vm - Virtual Machine 858 * src_type - Storage source for this region. 859 * NULL to use anonymous memory. 860 * guest_paddr - Starting guest physical address 861 * slot - KVM region slot 862 * npages - Number of physical pages 863 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 864 * 865 * Output Args: None 866 * 867 * Return: None 868 * 869 * Allocates a memory area of the number of pages specified by npages 870 * and maps it to the VM specified by vm, at a starting physical address 871 * given by guest_paddr. The region is created with a KVM region slot 872 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 873 * region is created with the flags given by flags. 874 */ 875 void vm_userspace_mem_region_add(struct kvm_vm *vm, 876 enum vm_mem_backing_src_type src_type, 877 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 878 uint32_t flags) 879 { 880 int ret; 881 struct userspace_mem_region *region; 882 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 883 size_t alignment; 884 885 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 886 "Number of guest pages is not compatible with the host. " 887 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 888 889 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 890 "address not on a page boundary.\n" 891 " guest_paddr: 0x%lx vm->page_size: 0x%x", 892 guest_paddr, vm->page_size); 893 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 894 <= vm->max_gfn, "Physical range beyond maximum " 895 "supported physical address,\n" 896 " guest_paddr: 0x%lx npages: 0x%lx\n" 897 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 898 guest_paddr, npages, vm->max_gfn, vm->page_size); 899 900 /* 901 * Confirm a mem region with an overlapping address doesn't 902 * already exist. 903 */ 904 region = (struct userspace_mem_region *) userspace_mem_region_find( 905 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 906 if (region != NULL) 907 TEST_FAIL("overlapping userspace_mem_region already " 908 "exists\n" 909 " requested guest_paddr: 0x%lx npages: 0x%lx " 910 "page_size: 0x%x\n" 911 " existing guest_paddr: 0x%lx size: 0x%lx", 912 guest_paddr, npages, vm->page_size, 913 (uint64_t) region->region.guest_phys_addr, 914 (uint64_t) region->region.memory_size); 915 916 /* Confirm no region with the requested slot already exists. */ 917 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 918 slot) { 919 if (region->region.slot != slot) 920 continue; 921 922 TEST_FAIL("A mem region with the requested slot " 923 "already exists.\n" 924 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 925 " existing slot: %u paddr: 0x%lx size: 0x%lx", 926 slot, guest_paddr, npages, 927 region->region.slot, 928 (uint64_t) region->region.guest_phys_addr, 929 (uint64_t) region->region.memory_size); 930 } 931 932 /* Allocate and initialize new mem region structure. */ 933 region = calloc(1, sizeof(*region)); 934 TEST_ASSERT(region != NULL, "Insufficient Memory"); 935 region->mmap_size = npages * vm->page_size; 936 937 #ifdef __s390x__ 938 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 939 alignment = 0x100000; 940 #else 941 alignment = 1; 942 #endif 943 944 /* 945 * When using THP mmap is not guaranteed to returned a hugepage aligned 946 * address so we have to pad the mmap. Padding is not needed for HugeTLB 947 * because mmap will always return an address aligned to the HugeTLB 948 * page size. 949 */ 950 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 951 alignment = max(backing_src_pagesz, alignment); 952 953 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 954 955 /* Add enough memory to align up if necessary */ 956 if (alignment > 1) 957 region->mmap_size += alignment; 958 959 region->fd = -1; 960 if (backing_src_is_shared(src_type)) 961 region->fd = kvm_memfd_alloc(region->mmap_size, 962 src_type == VM_MEM_SRC_SHARED_HUGETLB); 963 964 region->mmap_start = mmap(NULL, region->mmap_size, 965 PROT_READ | PROT_WRITE, 966 vm_mem_backing_src_alias(src_type)->flag, 967 region->fd, 0); 968 TEST_ASSERT(region->mmap_start != MAP_FAILED, 969 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 970 971 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 972 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 973 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 974 region->mmap_start, backing_src_pagesz); 975 976 /* Align host address */ 977 region->host_mem = align_ptr_up(region->mmap_start, alignment); 978 979 /* As needed perform madvise */ 980 if ((src_type == VM_MEM_SRC_ANONYMOUS || 981 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 982 ret = madvise(region->host_mem, npages * vm->page_size, 983 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 984 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 985 region->host_mem, npages * vm->page_size, 986 vm_mem_backing_src_alias(src_type)->name); 987 } 988 989 region->unused_phy_pages = sparsebit_alloc(); 990 sparsebit_set_num(region->unused_phy_pages, 991 guest_paddr >> vm->page_shift, npages); 992 region->region.slot = slot; 993 region->region.flags = flags; 994 region->region.guest_phys_addr = guest_paddr; 995 region->region.memory_size = npages * vm->page_size; 996 region->region.userspace_addr = (uintptr_t) region->host_mem; 997 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 998 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 999 " rc: %i errno: %i\n" 1000 " slot: %u flags: 0x%x\n" 1001 " guest_phys_addr: 0x%lx size: 0x%lx", 1002 ret, errno, slot, flags, 1003 guest_paddr, (uint64_t) region->region.memory_size); 1004 1005 /* Add to quick lookup data structures */ 1006 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1007 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1008 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1009 1010 /* If shared memory, create an alias. */ 1011 if (region->fd >= 0) { 1012 region->mmap_alias = mmap(NULL, region->mmap_size, 1013 PROT_READ | PROT_WRITE, 1014 vm_mem_backing_src_alias(src_type)->flag, 1015 region->fd, 0); 1016 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1017 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1018 1019 /* Align host alias address */ 1020 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1021 } 1022 } 1023 1024 /* 1025 * Memslot to region 1026 * 1027 * Input Args: 1028 * vm - Virtual Machine 1029 * memslot - KVM memory slot ID 1030 * 1031 * Output Args: None 1032 * 1033 * Return: 1034 * Pointer to memory region structure that describe memory region 1035 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1036 * on error (e.g. currently no memory region using memslot as a KVM 1037 * memory slot ID). 1038 */ 1039 struct userspace_mem_region * 1040 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1041 { 1042 struct userspace_mem_region *region; 1043 1044 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1045 memslot) 1046 if (region->region.slot == memslot) 1047 return region; 1048 1049 fprintf(stderr, "No mem region with the requested slot found,\n" 1050 " requested slot: %u\n", memslot); 1051 fputs("---- vm dump ----\n", stderr); 1052 vm_dump(stderr, vm, 2); 1053 TEST_FAIL("Mem region not found"); 1054 return NULL; 1055 } 1056 1057 /* 1058 * VM Memory Region Flags Set 1059 * 1060 * Input Args: 1061 * vm - Virtual Machine 1062 * flags - Starting guest physical address 1063 * 1064 * Output Args: None 1065 * 1066 * Return: None 1067 * 1068 * Sets the flags of the memory region specified by the value of slot, 1069 * to the values given by flags. 1070 */ 1071 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1072 { 1073 int ret; 1074 struct userspace_mem_region *region; 1075 1076 region = memslot2region(vm, slot); 1077 1078 region->region.flags = flags; 1079 1080 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1081 1082 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1083 " rc: %i errno: %i slot: %u flags: 0x%x", 1084 ret, errno, slot, flags); 1085 } 1086 1087 /* 1088 * VM Memory Region Move 1089 * 1090 * Input Args: 1091 * vm - Virtual Machine 1092 * slot - Slot of the memory region to move 1093 * new_gpa - Starting guest physical address 1094 * 1095 * Output Args: None 1096 * 1097 * Return: None 1098 * 1099 * Change the gpa of a memory region. 1100 */ 1101 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1102 { 1103 struct userspace_mem_region *region; 1104 int ret; 1105 1106 region = memslot2region(vm, slot); 1107 1108 region->region.guest_phys_addr = new_gpa; 1109 1110 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1111 1112 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1113 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1114 ret, errno, slot, new_gpa); 1115 } 1116 1117 /* 1118 * VM Memory Region Delete 1119 * 1120 * Input Args: 1121 * vm - Virtual Machine 1122 * slot - Slot of the memory region to delete 1123 * 1124 * Output Args: None 1125 * 1126 * Return: None 1127 * 1128 * Delete a memory region. 1129 */ 1130 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1131 { 1132 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1133 } 1134 1135 /* Returns the size of a vCPU's kvm_run structure. */ 1136 static int vcpu_mmap_sz(void) 1137 { 1138 int dev_fd, ret; 1139 1140 dev_fd = open_kvm_dev_path_or_exit(); 1141 1142 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1143 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1144 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1145 1146 close(dev_fd); 1147 1148 return ret; 1149 } 1150 1151 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1152 { 1153 struct kvm_vcpu *vcpu; 1154 1155 list_for_each_entry(vcpu, &vm->vcpus, list) { 1156 if (vcpu->id == vcpu_id) 1157 return true; 1158 } 1159 1160 return false; 1161 } 1162 1163 /* 1164 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1165 * No additional vCPU setup is done. Returns the vCPU. 1166 */ 1167 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1168 { 1169 struct kvm_vcpu *vcpu; 1170 1171 /* Confirm a vcpu with the specified id doesn't already exist. */ 1172 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1173 1174 /* Allocate and initialize new vcpu structure. */ 1175 vcpu = calloc(1, sizeof(*vcpu)); 1176 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1177 1178 vcpu->vm = vm; 1179 vcpu->id = vcpu_id; 1180 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1181 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1182 1183 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1184 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1185 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1186 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1187 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1188 TEST_ASSERT(vcpu->run != MAP_FAILED, 1189 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1190 1191 /* Add to linked-list of VCPUs. */ 1192 list_add(&vcpu->list, &vm->vcpus); 1193 1194 return vcpu; 1195 } 1196 1197 /* 1198 * VM Virtual Address Unused Gap 1199 * 1200 * Input Args: 1201 * vm - Virtual Machine 1202 * sz - Size (bytes) 1203 * vaddr_min - Minimum Virtual Address 1204 * 1205 * Output Args: None 1206 * 1207 * Return: 1208 * Lowest virtual address at or below vaddr_min, with at least 1209 * sz unused bytes. TEST_ASSERT failure if no area of at least 1210 * size sz is available. 1211 * 1212 * Within the VM specified by vm, locates the lowest starting virtual 1213 * address >= vaddr_min, that has at least sz unallocated bytes. A 1214 * TEST_ASSERT failure occurs for invalid input or no area of at least 1215 * sz unallocated bytes >= vaddr_min is available. 1216 */ 1217 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1218 vm_vaddr_t vaddr_min) 1219 { 1220 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1221 1222 /* Determine lowest permitted virtual page index. */ 1223 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1224 if ((pgidx_start * vm->page_size) < vaddr_min) 1225 goto no_va_found; 1226 1227 /* Loop over section with enough valid virtual page indexes. */ 1228 if (!sparsebit_is_set_num(vm->vpages_valid, 1229 pgidx_start, pages)) 1230 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1231 pgidx_start, pages); 1232 do { 1233 /* 1234 * Are there enough unused virtual pages available at 1235 * the currently proposed starting virtual page index. 1236 * If not, adjust proposed starting index to next 1237 * possible. 1238 */ 1239 if (sparsebit_is_clear_num(vm->vpages_mapped, 1240 pgidx_start, pages)) 1241 goto va_found; 1242 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1243 pgidx_start, pages); 1244 if (pgidx_start == 0) 1245 goto no_va_found; 1246 1247 /* 1248 * If needed, adjust proposed starting virtual address, 1249 * to next range of valid virtual addresses. 1250 */ 1251 if (!sparsebit_is_set_num(vm->vpages_valid, 1252 pgidx_start, pages)) { 1253 pgidx_start = sparsebit_next_set_num( 1254 vm->vpages_valid, pgidx_start, pages); 1255 if (pgidx_start == 0) 1256 goto no_va_found; 1257 } 1258 } while (pgidx_start != 0); 1259 1260 no_va_found: 1261 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1262 1263 /* NOT REACHED */ 1264 return -1; 1265 1266 va_found: 1267 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1268 pgidx_start, pages), 1269 "Unexpected, invalid virtual page index range,\n" 1270 " pgidx_start: 0x%lx\n" 1271 " pages: 0x%lx", 1272 pgidx_start, pages); 1273 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1274 pgidx_start, pages), 1275 "Unexpected, pages already mapped,\n" 1276 " pgidx_start: 0x%lx\n" 1277 " pages: 0x%lx", 1278 pgidx_start, pages); 1279 1280 return pgidx_start * vm->page_size; 1281 } 1282 1283 /* 1284 * VM Virtual Address Allocate 1285 * 1286 * Input Args: 1287 * vm - Virtual Machine 1288 * sz - Size in bytes 1289 * vaddr_min - Minimum starting virtual address 1290 * 1291 * Output Args: None 1292 * 1293 * Return: 1294 * Starting guest virtual address 1295 * 1296 * Allocates at least sz bytes within the virtual address space of the vm 1297 * given by vm. The allocated bytes are mapped to a virtual address >= 1298 * the address given by vaddr_min. Note that each allocation uses a 1299 * a unique set of pages, with the minimum real allocation being at least 1300 * a page. 1301 */ 1302 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1303 { 1304 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1305 1306 virt_pgd_alloc(vm); 1307 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1308 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1309 1310 /* 1311 * Find an unused range of virtual page addresses of at least 1312 * pages in length. 1313 */ 1314 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1315 1316 /* Map the virtual pages. */ 1317 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1318 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1319 1320 virt_pg_map(vm, vaddr, paddr); 1321 1322 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1323 } 1324 1325 return vaddr_start; 1326 } 1327 1328 /* 1329 * VM Virtual Address Allocate Pages 1330 * 1331 * Input Args: 1332 * vm - Virtual Machine 1333 * 1334 * Output Args: None 1335 * 1336 * Return: 1337 * Starting guest virtual address 1338 * 1339 * Allocates at least N system pages worth of bytes within the virtual address 1340 * space of the vm. 1341 */ 1342 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1343 { 1344 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1345 } 1346 1347 /* 1348 * VM Virtual Address Allocate Page 1349 * 1350 * Input Args: 1351 * vm - Virtual Machine 1352 * 1353 * Output Args: None 1354 * 1355 * Return: 1356 * Starting guest virtual address 1357 * 1358 * Allocates at least one system page worth of bytes within the virtual address 1359 * space of the vm. 1360 */ 1361 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1362 { 1363 return vm_vaddr_alloc_pages(vm, 1); 1364 } 1365 1366 /* 1367 * Map a range of VM virtual address to the VM's physical address 1368 * 1369 * Input Args: 1370 * vm - Virtual Machine 1371 * vaddr - Virtuall address to map 1372 * paddr - VM Physical Address 1373 * npages - The number of pages to map 1374 * 1375 * Output Args: None 1376 * 1377 * Return: None 1378 * 1379 * Within the VM given by @vm, creates a virtual translation for 1380 * @npages starting at @vaddr to the page range starting at @paddr. 1381 */ 1382 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1383 unsigned int npages) 1384 { 1385 size_t page_size = vm->page_size; 1386 size_t size = npages * page_size; 1387 1388 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1389 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1390 1391 while (npages--) { 1392 virt_pg_map(vm, vaddr, paddr); 1393 vaddr += page_size; 1394 paddr += page_size; 1395 1396 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1397 } 1398 } 1399 1400 /* 1401 * Address VM Physical to Host Virtual 1402 * 1403 * Input Args: 1404 * vm - Virtual Machine 1405 * gpa - VM physical address 1406 * 1407 * Output Args: None 1408 * 1409 * Return: 1410 * Equivalent host virtual address 1411 * 1412 * Locates the memory region containing the VM physical address given 1413 * by gpa, within the VM given by vm. When found, the host virtual 1414 * address providing the memory to the vm physical address is returned. 1415 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1416 */ 1417 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1418 { 1419 struct userspace_mem_region *region; 1420 1421 region = userspace_mem_region_find(vm, gpa, gpa); 1422 if (!region) { 1423 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1424 return NULL; 1425 } 1426 1427 return (void *)((uintptr_t)region->host_mem 1428 + (gpa - region->region.guest_phys_addr)); 1429 } 1430 1431 /* 1432 * Address Host Virtual to VM Physical 1433 * 1434 * Input Args: 1435 * vm - Virtual Machine 1436 * hva - Host virtual address 1437 * 1438 * Output Args: None 1439 * 1440 * Return: 1441 * Equivalent VM physical address 1442 * 1443 * Locates the memory region containing the host virtual address given 1444 * by hva, within the VM given by vm. When found, the equivalent 1445 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1446 * region containing hva exists. 1447 */ 1448 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1449 { 1450 struct rb_node *node; 1451 1452 for (node = vm->regions.hva_tree.rb_node; node; ) { 1453 struct userspace_mem_region *region = 1454 container_of(node, struct userspace_mem_region, hva_node); 1455 1456 if (hva >= region->host_mem) { 1457 if (hva <= (region->host_mem 1458 + region->region.memory_size - 1)) 1459 return (vm_paddr_t)((uintptr_t) 1460 region->region.guest_phys_addr 1461 + (hva - (uintptr_t)region->host_mem)); 1462 1463 node = node->rb_right; 1464 } else 1465 node = node->rb_left; 1466 } 1467 1468 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1469 return -1; 1470 } 1471 1472 /* 1473 * Address VM physical to Host Virtual *alias*. 1474 * 1475 * Input Args: 1476 * vm - Virtual Machine 1477 * gpa - VM physical address 1478 * 1479 * Output Args: None 1480 * 1481 * Return: 1482 * Equivalent address within the host virtual *alias* area, or NULL 1483 * (without failing the test) if the guest memory is not shared (so 1484 * no alias exists). 1485 * 1486 * Create a writable, shared virtual=>physical alias for the specific GPA. 1487 * The primary use case is to allow the host selftest to manipulate guest 1488 * memory without mapping said memory in the guest's address space. And, for 1489 * userfaultfd-based demand paging, to do so without triggering userfaults. 1490 */ 1491 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1492 { 1493 struct userspace_mem_region *region; 1494 uintptr_t offset; 1495 1496 region = userspace_mem_region_find(vm, gpa, gpa); 1497 if (!region) 1498 return NULL; 1499 1500 if (!region->host_alias) 1501 return NULL; 1502 1503 offset = gpa - region->region.guest_phys_addr; 1504 return (void *) ((uintptr_t) region->host_alias + offset); 1505 } 1506 1507 /* Create an interrupt controller chip for the specified VM. */ 1508 void vm_create_irqchip(struct kvm_vm *vm) 1509 { 1510 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1511 1512 vm->has_irqchip = true; 1513 } 1514 1515 int _vcpu_run(struct kvm_vcpu *vcpu) 1516 { 1517 int rc; 1518 1519 do { 1520 rc = __vcpu_run(vcpu); 1521 } while (rc == -1 && errno == EINTR); 1522 1523 assert_on_unhandled_exception(vcpu); 1524 1525 return rc; 1526 } 1527 1528 /* 1529 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1530 * Assert if the KVM returns an error (other than -EINTR). 1531 */ 1532 void vcpu_run(struct kvm_vcpu *vcpu) 1533 { 1534 int ret = _vcpu_run(vcpu); 1535 1536 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1537 } 1538 1539 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1540 { 1541 int ret; 1542 1543 vcpu->run->immediate_exit = 1; 1544 ret = __vcpu_run(vcpu); 1545 vcpu->run->immediate_exit = 0; 1546 1547 TEST_ASSERT(ret == -1 && errno == EINTR, 1548 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1549 ret, errno); 1550 } 1551 1552 /* 1553 * Get the list of guest registers which are supported for 1554 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1555 * it is the caller's responsibility to free the list. 1556 */ 1557 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1558 { 1559 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1560 int ret; 1561 1562 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1563 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1564 1565 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1566 reg_list->n = reg_list_n.n; 1567 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1568 return reg_list; 1569 } 1570 1571 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1572 { 1573 uint32_t page_size = vcpu->vm->page_size; 1574 uint32_t size = vcpu->vm->dirty_ring_size; 1575 1576 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1577 1578 if (!vcpu->dirty_gfns) { 1579 void *addr; 1580 1581 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1582 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1583 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1584 1585 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1586 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1587 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1588 1589 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1590 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1591 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1592 1593 vcpu->dirty_gfns = addr; 1594 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1595 } 1596 1597 return vcpu->dirty_gfns; 1598 } 1599 1600 /* 1601 * Device Ioctl 1602 */ 1603 1604 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1605 { 1606 struct kvm_device_attr attribute = { 1607 .group = group, 1608 .attr = attr, 1609 .flags = 0, 1610 }; 1611 1612 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1613 } 1614 1615 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1616 { 1617 struct kvm_create_device create_dev = { 1618 .type = type, 1619 .flags = KVM_CREATE_DEVICE_TEST, 1620 }; 1621 1622 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1623 } 1624 1625 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1626 { 1627 struct kvm_create_device create_dev = { 1628 .type = type, 1629 .fd = -1, 1630 .flags = 0, 1631 }; 1632 int err; 1633 1634 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1635 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1636 return err ? : create_dev.fd; 1637 } 1638 1639 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1640 { 1641 struct kvm_device_attr kvmattr = { 1642 .group = group, 1643 .attr = attr, 1644 .flags = 0, 1645 .addr = (uintptr_t)val, 1646 }; 1647 1648 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1649 } 1650 1651 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1652 { 1653 struct kvm_device_attr kvmattr = { 1654 .group = group, 1655 .attr = attr, 1656 .flags = 0, 1657 .addr = (uintptr_t)val, 1658 }; 1659 1660 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1661 } 1662 1663 /* 1664 * IRQ related functions. 1665 */ 1666 1667 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1668 { 1669 struct kvm_irq_level irq_level = { 1670 .irq = irq, 1671 .level = level, 1672 }; 1673 1674 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1675 } 1676 1677 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1678 { 1679 int ret = _kvm_irq_line(vm, irq, level); 1680 1681 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1682 } 1683 1684 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1685 { 1686 struct kvm_irq_routing *routing; 1687 size_t size; 1688 1689 size = sizeof(struct kvm_irq_routing); 1690 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1691 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1692 routing = calloc(1, size); 1693 assert(routing); 1694 1695 return routing; 1696 } 1697 1698 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1699 uint32_t gsi, uint32_t pin) 1700 { 1701 int i; 1702 1703 assert(routing); 1704 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1705 1706 i = routing->nr; 1707 routing->entries[i].gsi = gsi; 1708 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1709 routing->entries[i].flags = 0; 1710 routing->entries[i].u.irqchip.irqchip = 0; 1711 routing->entries[i].u.irqchip.pin = pin; 1712 routing->nr++; 1713 } 1714 1715 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1716 { 1717 int ret; 1718 1719 assert(routing); 1720 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1721 free(routing); 1722 1723 return ret; 1724 } 1725 1726 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1727 { 1728 int ret; 1729 1730 ret = _kvm_gsi_routing_write(vm, routing); 1731 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1732 } 1733 1734 /* 1735 * VM Dump 1736 * 1737 * Input Args: 1738 * vm - Virtual Machine 1739 * indent - Left margin indent amount 1740 * 1741 * Output Args: 1742 * stream - Output FILE stream 1743 * 1744 * Return: None 1745 * 1746 * Dumps the current state of the VM given by vm, to the FILE stream 1747 * given by stream. 1748 */ 1749 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1750 { 1751 int ctr; 1752 struct userspace_mem_region *region; 1753 struct kvm_vcpu *vcpu; 1754 1755 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1756 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1757 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1758 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1759 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1760 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1761 "host_virt: %p\n", indent + 2, "", 1762 (uint64_t) region->region.guest_phys_addr, 1763 (uint64_t) region->region.memory_size, 1764 region->host_mem); 1765 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1766 sparsebit_dump(stream, region->unused_phy_pages, 0); 1767 } 1768 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1769 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1770 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1771 vm->pgd_created); 1772 if (vm->pgd_created) { 1773 fprintf(stream, "%*sVirtual Translation Tables:\n", 1774 indent + 2, ""); 1775 virt_dump(stream, vm, indent + 4); 1776 } 1777 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1778 1779 list_for_each_entry(vcpu, &vm->vcpus, list) 1780 vcpu_dump(stream, vcpu, indent + 2); 1781 } 1782 1783 /* Known KVM exit reasons */ 1784 static struct exit_reason { 1785 unsigned int reason; 1786 const char *name; 1787 } exit_reasons_known[] = { 1788 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1789 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1790 {KVM_EXIT_IO, "IO"}, 1791 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1792 {KVM_EXIT_DEBUG, "DEBUG"}, 1793 {KVM_EXIT_HLT, "HLT"}, 1794 {KVM_EXIT_MMIO, "MMIO"}, 1795 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1796 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1797 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1798 {KVM_EXIT_INTR, "INTR"}, 1799 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1800 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1801 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1802 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1803 {KVM_EXIT_DCR, "DCR"}, 1804 {KVM_EXIT_NMI, "NMI"}, 1805 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1806 {KVM_EXIT_OSI, "OSI"}, 1807 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1808 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1809 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1810 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1811 {KVM_EXIT_XEN, "XEN"}, 1812 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1813 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1814 #endif 1815 }; 1816 1817 /* 1818 * Exit Reason String 1819 * 1820 * Input Args: 1821 * exit_reason - Exit reason 1822 * 1823 * Output Args: None 1824 * 1825 * Return: 1826 * Constant string pointer describing the exit reason. 1827 * 1828 * Locates and returns a constant string that describes the KVM exit 1829 * reason given by exit_reason. If no such string is found, a constant 1830 * string of "Unknown" is returned. 1831 */ 1832 const char *exit_reason_str(unsigned int exit_reason) 1833 { 1834 unsigned int n1; 1835 1836 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1837 if (exit_reason == exit_reasons_known[n1].reason) 1838 return exit_reasons_known[n1].name; 1839 } 1840 1841 return "Unknown"; 1842 } 1843 1844 /* 1845 * Physical Contiguous Page Allocator 1846 * 1847 * Input Args: 1848 * vm - Virtual Machine 1849 * num - number of pages 1850 * paddr_min - Physical address minimum 1851 * memslot - Memory region to allocate page from 1852 * 1853 * Output Args: None 1854 * 1855 * Return: 1856 * Starting physical address 1857 * 1858 * Within the VM specified by vm, locates a range of available physical 1859 * pages at or above paddr_min. If found, the pages are marked as in use 1860 * and their base address is returned. A TEST_ASSERT failure occurs if 1861 * not enough pages are available at or above paddr_min. 1862 */ 1863 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1864 vm_paddr_t paddr_min, uint32_t memslot) 1865 { 1866 struct userspace_mem_region *region; 1867 sparsebit_idx_t pg, base; 1868 1869 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1870 1871 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1872 "not divisible by page size.\n" 1873 " paddr_min: 0x%lx page_size: 0x%x", 1874 paddr_min, vm->page_size); 1875 1876 region = memslot2region(vm, memslot); 1877 base = pg = paddr_min >> vm->page_shift; 1878 1879 do { 1880 for (; pg < base + num; ++pg) { 1881 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1882 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1883 break; 1884 } 1885 } 1886 } while (pg && pg != base + num); 1887 1888 if (pg == 0) { 1889 fprintf(stderr, "No guest physical page available, " 1890 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1891 paddr_min, vm->page_size, memslot); 1892 fputs("---- vm dump ----\n", stderr); 1893 vm_dump(stderr, vm, 2); 1894 abort(); 1895 } 1896 1897 for (pg = base; pg < base + num; ++pg) 1898 sparsebit_clear(region->unused_phy_pages, pg); 1899 1900 return base * vm->page_size; 1901 } 1902 1903 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1904 uint32_t memslot) 1905 { 1906 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1907 } 1908 1909 /* Arbitrary minimum physical address used for virtual translation tables. */ 1910 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1911 1912 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1913 { 1914 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1915 } 1916 1917 /* 1918 * Address Guest Virtual to Host Virtual 1919 * 1920 * Input Args: 1921 * vm - Virtual Machine 1922 * gva - VM virtual address 1923 * 1924 * Output Args: None 1925 * 1926 * Return: 1927 * Equivalent host virtual address 1928 */ 1929 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1930 { 1931 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1932 } 1933 1934 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1935 { 1936 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1937 } 1938 1939 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1940 unsigned int page_shift, 1941 unsigned int new_page_shift, 1942 bool ceil) 1943 { 1944 unsigned int n = 1 << (new_page_shift - page_shift); 1945 1946 if (page_shift >= new_page_shift) 1947 return num_pages * (1 << (page_shift - new_page_shift)); 1948 1949 return num_pages / n + !!(ceil && num_pages % n); 1950 } 1951 1952 static inline int getpageshift(void) 1953 { 1954 return __builtin_ffs(getpagesize()) - 1; 1955 } 1956 1957 unsigned int 1958 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1959 { 1960 return vm_calc_num_pages(num_guest_pages, 1961 vm_guest_mode_params[mode].page_shift, 1962 getpageshift(), true); 1963 } 1964 1965 unsigned int 1966 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1967 { 1968 return vm_calc_num_pages(num_host_pages, getpageshift(), 1969 vm_guest_mode_params[mode].page_shift, false); 1970 } 1971 1972 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1973 { 1974 unsigned int n; 1975 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1976 return vm_adjust_num_guest_pages(mode, n); 1977 } 1978 1979 /* 1980 * Read binary stats descriptors 1981 * 1982 * Input Args: 1983 * stats_fd - the file descriptor for the binary stats file from which to read 1984 * header - the binary stats metadata header corresponding to the given FD 1985 * 1986 * Output Args: None 1987 * 1988 * Return: 1989 * A pointer to a newly allocated series of stat descriptors. 1990 * Caller is responsible for freeing the returned kvm_stats_desc. 1991 * 1992 * Read the stats descriptors from the binary stats interface. 1993 */ 1994 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1995 struct kvm_stats_header *header) 1996 { 1997 struct kvm_stats_desc *stats_desc; 1998 ssize_t desc_size, total_size, ret; 1999 2000 desc_size = get_stats_descriptor_size(header); 2001 total_size = header->num_desc * desc_size; 2002 2003 stats_desc = calloc(header->num_desc, desc_size); 2004 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2005 2006 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2007 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2008 2009 return stats_desc; 2010 } 2011 2012 /* 2013 * Read stat data for a particular stat 2014 * 2015 * Input Args: 2016 * stats_fd - the file descriptor for the binary stats file from which to read 2017 * header - the binary stats metadata header corresponding to the given FD 2018 * desc - the binary stat metadata for the particular stat to be read 2019 * max_elements - the maximum number of 8-byte values to read into data 2020 * 2021 * Output Args: 2022 * data - the buffer into which stat data should be read 2023 * 2024 * Read the data values of a specified stat from the binary stats interface. 2025 */ 2026 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2027 struct kvm_stats_desc *desc, uint64_t *data, 2028 size_t max_elements) 2029 { 2030 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2031 size_t size = nr_elements * sizeof(*data); 2032 ssize_t ret; 2033 2034 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2035 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2036 2037 ret = pread(stats_fd, data, size, 2038 header->data_offset + desc->offset); 2039 2040 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2041 desc->name, errno, strerror(errno)); 2042 TEST_ASSERT(ret == size, 2043 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2044 desc->name, size, ret); 2045 } 2046 2047 /* 2048 * Read the data of the named stat 2049 * 2050 * Input Args: 2051 * vm - the VM for which the stat should be read 2052 * stat_name - the name of the stat to read 2053 * max_elements - the maximum number of 8-byte values to read into data 2054 * 2055 * Output Args: 2056 * data - the buffer into which stat data should be read 2057 * 2058 * Read the data values of a specified stat from the binary stats interface. 2059 */ 2060 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2061 size_t max_elements) 2062 { 2063 struct kvm_stats_desc *desc; 2064 size_t size_desc; 2065 int i; 2066 2067 if (!vm->stats_fd) { 2068 vm->stats_fd = vm_get_stats_fd(vm); 2069 read_stats_header(vm->stats_fd, &vm->stats_header); 2070 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2071 &vm->stats_header); 2072 } 2073 2074 size_desc = get_stats_descriptor_size(&vm->stats_header); 2075 2076 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2077 desc = (void *)vm->stats_desc + (i * size_desc); 2078 2079 if (strcmp(desc->name, stat_name)) 2080 continue; 2081 2082 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2083 data, max_elements); 2084 2085 break; 2086 } 2087 } 2088 2089 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2090 { 2091 } 2092 2093 __weak void kvm_selftest_arch_init(void) 2094 { 2095 } 2096 2097 void __attribute((constructor)) kvm_selftest_init(void) 2098 { 2099 /* Tell stdout not to buffer its content. */ 2100 setbuf(stdout, NULL); 2101 2102 kvm_selftest_arch_init(); 2103 } 2104