1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); 31 TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); 32 33 return fd; 34 } 35 36 /* 37 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 38 * 39 * Input Args: 40 * flags - The flags to pass when opening KVM_DEV_PATH. 41 * 42 * Return: 43 * The opened file descriptor of /dev/kvm. 44 */ 45 static int _open_kvm_dev_path_or_exit(int flags) 46 { 47 return open_path_or_exit(KVM_DEV_PATH, flags); 48 } 49 50 int open_kvm_dev_path_or_exit(void) 51 { 52 return _open_kvm_dev_path_or_exit(O_RDONLY); 53 } 54 55 static ssize_t get_module_param(const char *module_name, const char *param, 56 void *buffer, size_t buffer_size) 57 { 58 const int path_size = 128; 59 char path[path_size]; 60 ssize_t bytes_read; 61 int fd, r; 62 63 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 64 module_name, param); 65 TEST_ASSERT(r < path_size, 66 "Failed to construct sysfs path in %d bytes.", path_size); 67 68 fd = open_path_or_exit(path, O_RDONLY); 69 70 bytes_read = read(fd, buffer, buffer_size); 71 TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes", 72 path, bytes_read, buffer_size); 73 74 r = close(fd); 75 TEST_ASSERT(!r, "close(%s) failed", path); 76 return bytes_read; 77 } 78 79 static int get_module_param_integer(const char *module_name, const char *param) 80 { 81 /* 82 * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the 83 * NUL char, and 1 byte because the kernel sucks and inserts a newline 84 * at the end. 85 */ 86 char value[16 + 1 + 1]; 87 ssize_t r; 88 89 memset(value, '\0', sizeof(value)); 90 91 r = get_module_param(module_name, param, value, sizeof(value)); 92 TEST_ASSERT(value[r - 1] == '\n', 93 "Expected trailing newline, got char '%c'", value[r - 1]); 94 95 /* 96 * Squash the newline, otherwise atoi_paranoid() will complain about 97 * trailing non-NUL characters in the string. 98 */ 99 value[r - 1] = '\0'; 100 return atoi_paranoid(value); 101 } 102 103 static bool get_module_param_bool(const char *module_name, const char *param) 104 { 105 char value; 106 ssize_t r; 107 108 r = get_module_param(module_name, param, &value, sizeof(value)); 109 TEST_ASSERT_EQ(r, 1); 110 111 if (value == 'Y') 112 return true; 113 else if (value == 'N') 114 return false; 115 116 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 117 } 118 119 bool get_kvm_param_bool(const char *param) 120 { 121 return get_module_param_bool("kvm", param); 122 } 123 124 bool get_kvm_intel_param_bool(const char *param) 125 { 126 return get_module_param_bool("kvm_intel", param); 127 } 128 129 bool get_kvm_amd_param_bool(const char *param) 130 { 131 return get_module_param_bool("kvm_amd", param); 132 } 133 134 int get_kvm_param_integer(const char *param) 135 { 136 return get_module_param_integer("kvm", param); 137 } 138 139 int get_kvm_intel_param_integer(const char *param) 140 { 141 return get_module_param_integer("kvm_intel", param); 142 } 143 144 int get_kvm_amd_param_integer(const char *param) 145 { 146 return get_module_param_integer("kvm_amd", param); 147 } 148 149 /* 150 * Capability 151 * 152 * Input Args: 153 * cap - Capability 154 * 155 * Output Args: None 156 * 157 * Return: 158 * On success, the Value corresponding to the capability (KVM_CAP_*) 159 * specified by the value of cap. On failure a TEST_ASSERT failure 160 * is produced. 161 * 162 * Looks up and returns the value corresponding to the capability 163 * (KVM_CAP_*) given by cap. 164 */ 165 unsigned int kvm_check_cap(long cap) 166 { 167 int ret; 168 int kvm_fd; 169 170 kvm_fd = open_kvm_dev_path_or_exit(); 171 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 172 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 173 174 close(kvm_fd); 175 176 return (unsigned int)ret; 177 } 178 179 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 180 { 181 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 182 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 183 else 184 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 185 vm->dirty_ring_size = ring_size; 186 } 187 188 static void vm_open(struct kvm_vm *vm) 189 { 190 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 191 192 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 193 194 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 195 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 196 } 197 198 const char *vm_guest_mode_string(uint32_t i) 199 { 200 static const char * const strings[] = { 201 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 202 [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages", 203 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 204 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 205 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 206 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 207 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 208 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 209 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 210 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 211 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 212 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 213 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 214 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 215 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 216 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 217 }; 218 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 219 "Missing new mode strings?"); 220 221 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 222 223 return strings[i]; 224 } 225 226 const struct vm_guest_mode_params vm_guest_mode_params[] = { 227 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 228 [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 }, 229 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 230 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 231 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 232 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 233 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 234 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 235 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 236 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 237 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 238 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 239 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 240 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 241 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 242 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 243 }; 244 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 245 "Missing new mode params?"); 246 247 /* 248 * Initializes vm->vpages_valid to match the canonical VA space of the 249 * architecture. 250 * 251 * The default implementation is valid for architectures which split the 252 * range addressed by a single page table into a low and high region 253 * based on the MSB of the VA. On architectures with this behavior 254 * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. 255 */ 256 __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) 257 { 258 sparsebit_set_num(vm->vpages_valid, 259 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 260 sparsebit_set_num(vm->vpages_valid, 261 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 262 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 263 } 264 265 struct kvm_vm *____vm_create(struct vm_shape shape) 266 { 267 struct kvm_vm *vm; 268 269 vm = calloc(1, sizeof(*vm)); 270 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 271 272 INIT_LIST_HEAD(&vm->vcpus); 273 vm->regions.gpa_tree = RB_ROOT; 274 vm->regions.hva_tree = RB_ROOT; 275 hash_init(vm->regions.slot_hash); 276 277 vm->mode = shape.mode; 278 vm->type = shape.type; 279 vm->subtype = shape.subtype; 280 281 vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; 282 vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; 283 vm->page_size = vm_guest_mode_params[vm->mode].page_size; 284 vm->page_shift = vm_guest_mode_params[vm->mode].page_shift; 285 286 /* Setup mode specific traits. */ 287 switch (vm->mode) { 288 case VM_MODE_P52V48_4K: 289 vm->pgtable_levels = 4; 290 break; 291 case VM_MODE_P52V48_64K: 292 vm->pgtable_levels = 3; 293 break; 294 case VM_MODE_P48V48_4K: 295 vm->pgtable_levels = 4; 296 break; 297 case VM_MODE_P48V48_64K: 298 vm->pgtable_levels = 3; 299 break; 300 case VM_MODE_P40V48_4K: 301 case VM_MODE_P36V48_4K: 302 vm->pgtable_levels = 4; 303 break; 304 case VM_MODE_P40V48_64K: 305 case VM_MODE_P36V48_64K: 306 vm->pgtable_levels = 3; 307 break; 308 case VM_MODE_P52V48_16K: 309 case VM_MODE_P48V48_16K: 310 case VM_MODE_P40V48_16K: 311 case VM_MODE_P36V48_16K: 312 vm->pgtable_levels = 4; 313 break; 314 case VM_MODE_P36V47_16K: 315 vm->pgtable_levels = 3; 316 break; 317 case VM_MODE_PXXV48_4K: 318 #ifdef __x86_64__ 319 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 320 kvm_init_vm_address_properties(vm); 321 /* 322 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 323 * it doesn't take effect unless a CR4.LA57 is set, which it 324 * isn't for this mode (48-bit virtual address space). 325 */ 326 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 327 "Linear address width (%d bits) not supported", 328 vm->va_bits); 329 pr_debug("Guest physical address width detected: %d\n", 330 vm->pa_bits); 331 vm->pgtable_levels = 4; 332 vm->va_bits = 48; 333 #else 334 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 335 #endif 336 break; 337 case VM_MODE_P47V64_4K: 338 vm->pgtable_levels = 5; 339 break; 340 case VM_MODE_P44V64_4K: 341 vm->pgtable_levels = 5; 342 break; 343 default: 344 TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); 345 } 346 347 #ifdef __aarch64__ 348 TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types"); 349 if (vm->pa_bits != 40) 350 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 351 #endif 352 353 vm_open(vm); 354 355 /* Limit to VA-bit canonical virtual addresses. */ 356 vm->vpages_valid = sparsebit_alloc(); 357 vm_vaddr_populate_bitmap(vm); 358 359 /* Limit physical addresses to PA-bits. */ 360 vm->max_gfn = vm_compute_max_gfn(vm); 361 362 /* Allocate and setup memory for guest. */ 363 vm->vpages_mapped = sparsebit_alloc(); 364 365 return vm; 366 } 367 368 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 369 uint32_t nr_runnable_vcpus, 370 uint64_t extra_mem_pages) 371 { 372 uint64_t page_size = vm_guest_mode_params[mode].page_size; 373 uint64_t nr_pages; 374 375 TEST_ASSERT(nr_runnable_vcpus, 376 "Use vm_create_barebones() for VMs that _never_ have vCPUs"); 377 378 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 379 "nr_vcpus = %d too large for host, max-vcpus = %d", 380 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 381 382 /* 383 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 384 * test code and other per-VM assets that will be loaded into memslot0. 385 */ 386 nr_pages = 512; 387 388 /* Account for the per-vCPU stacks on behalf of the test. */ 389 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 390 391 /* 392 * Account for the number of pages needed for the page tables. The 393 * maximum page table size for a memory region will be when the 394 * smallest page size is used. Considering each page contains x page 395 * table descriptors, the total extra size for page tables (for extra 396 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 397 * than N/x*2. 398 */ 399 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 400 401 /* Account for the number of pages needed by ucall. */ 402 nr_pages += ucall_nr_pages_required(page_size); 403 404 return vm_adjust_num_guest_pages(mode, nr_pages); 405 } 406 407 struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, 408 uint64_t nr_extra_pages) 409 { 410 uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, 411 nr_extra_pages); 412 struct userspace_mem_region *slot0; 413 struct kvm_vm *vm; 414 int i; 415 416 pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, 417 vm_guest_mode_string(shape.mode), shape.type, nr_pages); 418 419 vm = ____vm_create(shape); 420 421 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 422 for (i = 0; i < NR_MEM_REGIONS; i++) 423 vm->memslots[i] = 0; 424 425 kvm_vm_elf_load(vm, program_invocation_name); 426 427 /* 428 * TODO: Add proper defines to protect the library's memslots, and then 429 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 430 * read-only memslots as MMIO, and creating a read-only memslot for the 431 * MMIO region would prevent silently clobbering the MMIO region. 432 */ 433 slot0 = memslot2region(vm, 0); 434 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 435 436 kvm_arch_vm_post_create(vm); 437 438 return vm; 439 } 440 441 /* 442 * VM Create with customized parameters 443 * 444 * Input Args: 445 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 446 * nr_vcpus - VCPU count 447 * extra_mem_pages - Non-slot0 physical memory total size 448 * guest_code - Guest entry point 449 * vcpuids - VCPU IDs 450 * 451 * Output Args: None 452 * 453 * Return: 454 * Pointer to opaque structure that describes the created VM. 455 * 456 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 457 * extra_mem_pages is only used to calculate the maximum page table size, 458 * no real memory allocation for non-slot0 memory in this function. 459 */ 460 struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, 461 uint64_t extra_mem_pages, 462 void *guest_code, struct kvm_vcpu *vcpus[]) 463 { 464 struct kvm_vm *vm; 465 int i; 466 467 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 468 469 vm = __vm_create(shape, nr_vcpus, extra_mem_pages); 470 471 for (i = 0; i < nr_vcpus; ++i) 472 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 473 474 return vm; 475 } 476 477 struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, 478 struct kvm_vcpu **vcpu, 479 uint64_t extra_mem_pages, 480 void *guest_code) 481 { 482 struct kvm_vcpu *vcpus[1]; 483 struct kvm_vm *vm; 484 485 vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus); 486 487 *vcpu = vcpus[0]; 488 return vm; 489 } 490 491 /* 492 * VM Restart 493 * 494 * Input Args: 495 * vm - VM that has been released before 496 * 497 * Output Args: None 498 * 499 * Reopens the file descriptors associated to the VM and reinstates the 500 * global state, such as the irqchip and the memory regions that are mapped 501 * into the guest. 502 */ 503 void kvm_vm_restart(struct kvm_vm *vmp) 504 { 505 int ctr; 506 struct userspace_mem_region *region; 507 508 vm_open(vmp); 509 if (vmp->has_irqchip) 510 vm_create_irqchip(vmp); 511 512 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 513 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region); 514 515 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" 516 " rc: %i errno: %i\n" 517 " slot: %u flags: 0x%x\n" 518 " guest_phys_addr: 0x%llx size: 0x%llx", 519 ret, errno, region->region.slot, 520 region->region.flags, 521 region->region.guest_phys_addr, 522 region->region.memory_size); 523 } 524 } 525 526 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 527 uint32_t vcpu_id) 528 { 529 return __vm_vcpu_add(vm, vcpu_id); 530 } 531 532 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 533 { 534 kvm_vm_restart(vm); 535 536 return vm_vcpu_recreate(vm, 0); 537 } 538 539 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 540 { 541 cpu_set_t mask; 542 int r; 543 544 CPU_ZERO(&mask); 545 CPU_SET(pcpu, &mask); 546 r = sched_setaffinity(0, sizeof(mask), &mask); 547 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); 548 } 549 550 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 551 { 552 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 553 554 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 555 "Not allowed to run on pCPU '%d', check cgroups?", pcpu); 556 return pcpu; 557 } 558 559 void kvm_print_vcpu_pinning_help(void) 560 { 561 const char *name = program_invocation_name; 562 563 printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" 564 " values (target pCPU), one for each vCPU, plus an optional\n" 565 " entry for the main application task (specified via entry\n" 566 " <nr_vcpus + 1>). If used, entries must be provided for all\n" 567 " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" 568 " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" 569 " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" 570 " %s -v 3 -c 22,23,24,50\n\n" 571 " To leave the application task unpinned, drop the final entry:\n\n" 572 " %s -v 3 -c 22,23,24\n\n" 573 " (default: no pinning)\n", name, name); 574 } 575 576 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 577 int nr_vcpus) 578 { 579 cpu_set_t allowed_mask; 580 char *cpu, *cpu_list; 581 char delim[2] = ","; 582 int i, r; 583 584 cpu_list = strdup(pcpus_string); 585 TEST_ASSERT(cpu_list, "strdup() allocation failed."); 586 587 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 588 TEST_ASSERT(!r, "sched_getaffinity() failed"); 589 590 cpu = strtok(cpu_list, delim); 591 592 /* 1. Get all pcpus for vcpus. */ 593 for (i = 0; i < nr_vcpus; i++) { 594 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i); 595 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 596 cpu = strtok(NULL, delim); 597 } 598 599 /* 2. Check if the main worker needs to be pinned. */ 600 if (cpu) { 601 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 602 cpu = strtok(NULL, delim); 603 } 604 605 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 606 free(cpu_list); 607 } 608 609 /* 610 * Userspace Memory Region Find 611 * 612 * Input Args: 613 * vm - Virtual Machine 614 * start - Starting VM physical address 615 * end - Ending VM physical address, inclusive. 616 * 617 * Output Args: None 618 * 619 * Return: 620 * Pointer to overlapping region, NULL if no such region. 621 * 622 * Searches for a region with any physical memory that overlaps with 623 * any portion of the guest physical addresses from start to end 624 * inclusive. If multiple overlapping regions exist, a pointer to any 625 * of the regions is returned. Null is returned only when no overlapping 626 * region exists. 627 */ 628 static struct userspace_mem_region * 629 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 630 { 631 struct rb_node *node; 632 633 for (node = vm->regions.gpa_tree.rb_node; node; ) { 634 struct userspace_mem_region *region = 635 container_of(node, struct userspace_mem_region, gpa_node); 636 uint64_t existing_start = region->region.guest_phys_addr; 637 uint64_t existing_end = region->region.guest_phys_addr 638 + region->region.memory_size - 1; 639 if (start <= existing_end && end >= existing_start) 640 return region; 641 642 if (start < existing_start) 643 node = node->rb_left; 644 else 645 node = node->rb_right; 646 } 647 648 return NULL; 649 } 650 651 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 652 { 653 654 } 655 656 /* 657 * VM VCPU Remove 658 * 659 * Input Args: 660 * vcpu - VCPU to remove 661 * 662 * Output Args: None 663 * 664 * Return: None, TEST_ASSERT failures for all error conditions 665 * 666 * Removes a vCPU from a VM and frees its resources. 667 */ 668 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 669 { 670 int ret; 671 672 if (vcpu->dirty_gfns) { 673 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 674 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 675 vcpu->dirty_gfns = NULL; 676 } 677 678 ret = munmap(vcpu->run, vcpu_mmap_sz()); 679 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 680 681 ret = close(vcpu->fd); 682 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 683 684 list_del(&vcpu->list); 685 686 vcpu_arch_free(vcpu); 687 free(vcpu); 688 } 689 690 void kvm_vm_release(struct kvm_vm *vmp) 691 { 692 struct kvm_vcpu *vcpu, *tmp; 693 int ret; 694 695 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 696 vm_vcpu_rm(vmp, vcpu); 697 698 ret = close(vmp->fd); 699 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 700 701 ret = close(vmp->kvm_fd); 702 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 703 } 704 705 static void __vm_mem_region_delete(struct kvm_vm *vm, 706 struct userspace_mem_region *region, 707 bool unlink) 708 { 709 int ret; 710 711 if (unlink) { 712 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 713 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 714 hash_del(®ion->slot_node); 715 } 716 717 region->region.memory_size = 0; 718 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); 719 720 sparsebit_free(®ion->unused_phy_pages); 721 sparsebit_free(®ion->protected_phy_pages); 722 ret = munmap(region->mmap_start, region->mmap_size); 723 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 724 if (region->fd >= 0) { 725 /* There's an extra map when using shared memory. */ 726 ret = munmap(region->mmap_alias, region->mmap_size); 727 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 728 close(region->fd); 729 } 730 if (region->region.guest_memfd >= 0) 731 close(region->region.guest_memfd); 732 733 free(region); 734 } 735 736 /* 737 * Destroys and frees the VM pointed to by vmp. 738 */ 739 void kvm_vm_free(struct kvm_vm *vmp) 740 { 741 int ctr; 742 struct hlist_node *node; 743 struct userspace_mem_region *region; 744 745 if (vmp == NULL) 746 return; 747 748 /* Free cached stats metadata and close FD */ 749 if (vmp->stats_fd) { 750 free(vmp->stats_desc); 751 close(vmp->stats_fd); 752 } 753 754 /* Free userspace_mem_regions. */ 755 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 756 __vm_mem_region_delete(vmp, region, false); 757 758 /* Free sparsebit arrays. */ 759 sparsebit_free(&vmp->vpages_valid); 760 sparsebit_free(&vmp->vpages_mapped); 761 762 kvm_vm_release(vmp); 763 764 /* Free the structure describing the VM. */ 765 free(vmp); 766 } 767 768 int kvm_memfd_alloc(size_t size, bool hugepages) 769 { 770 int memfd_flags = MFD_CLOEXEC; 771 int fd, r; 772 773 if (hugepages) 774 memfd_flags |= MFD_HUGETLB; 775 776 fd = memfd_create("kvm_selftest", memfd_flags); 777 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 778 779 r = ftruncate(fd, size); 780 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 781 782 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 783 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 784 785 return fd; 786 } 787 788 /* 789 * Memory Compare, host virtual to guest virtual 790 * 791 * Input Args: 792 * hva - Starting host virtual address 793 * vm - Virtual Machine 794 * gva - Starting guest virtual address 795 * len - number of bytes to compare 796 * 797 * Output Args: None 798 * 799 * Input/Output Args: None 800 * 801 * Return: 802 * Returns 0 if the bytes starting at hva for a length of len 803 * are equal the guest virtual bytes starting at gva. Returns 804 * a value < 0, if bytes at hva are less than those at gva. 805 * Otherwise a value > 0 is returned. 806 * 807 * Compares the bytes starting at the host virtual address hva, for 808 * a length of len, to the guest bytes starting at the guest virtual 809 * address given by gva. 810 */ 811 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 812 { 813 size_t amt; 814 815 /* 816 * Compare a batch of bytes until either a match is found 817 * or all the bytes have been compared. 818 */ 819 for (uintptr_t offset = 0; offset < len; offset += amt) { 820 uintptr_t ptr1 = (uintptr_t)hva + offset; 821 822 /* 823 * Determine host address for guest virtual address 824 * at offset. 825 */ 826 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 827 828 /* 829 * Determine amount to compare on this pass. 830 * Don't allow the comparsion to cross a page boundary. 831 */ 832 amt = len - offset; 833 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 834 amt = vm->page_size - (ptr1 % vm->page_size); 835 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 836 amt = vm->page_size - (ptr2 % vm->page_size); 837 838 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 839 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 840 841 /* 842 * Perform the comparison. If there is a difference 843 * return that result to the caller, otherwise need 844 * to continue on looking for a mismatch. 845 */ 846 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 847 if (ret != 0) 848 return ret; 849 } 850 851 /* 852 * No mismatch found. Let the caller know the two memory 853 * areas are equal. 854 */ 855 return 0; 856 } 857 858 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 859 struct userspace_mem_region *region) 860 { 861 struct rb_node **cur, *parent; 862 863 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 864 struct userspace_mem_region *cregion; 865 866 cregion = container_of(*cur, typeof(*cregion), gpa_node); 867 parent = *cur; 868 if (region->region.guest_phys_addr < 869 cregion->region.guest_phys_addr) 870 cur = &(*cur)->rb_left; 871 else { 872 TEST_ASSERT(region->region.guest_phys_addr != 873 cregion->region.guest_phys_addr, 874 "Duplicate GPA in region tree"); 875 876 cur = &(*cur)->rb_right; 877 } 878 } 879 880 rb_link_node(®ion->gpa_node, parent, cur); 881 rb_insert_color(®ion->gpa_node, gpa_tree); 882 } 883 884 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 885 struct userspace_mem_region *region) 886 { 887 struct rb_node **cur, *parent; 888 889 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 890 struct userspace_mem_region *cregion; 891 892 cregion = container_of(*cur, typeof(*cregion), hva_node); 893 parent = *cur; 894 if (region->host_mem < cregion->host_mem) 895 cur = &(*cur)->rb_left; 896 else { 897 TEST_ASSERT(region->host_mem != 898 cregion->host_mem, 899 "Duplicate HVA in region tree"); 900 901 cur = &(*cur)->rb_right; 902 } 903 } 904 905 rb_link_node(®ion->hva_node, parent, cur); 906 rb_insert_color(®ion->hva_node, hva_tree); 907 } 908 909 910 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 911 uint64_t gpa, uint64_t size, void *hva) 912 { 913 struct kvm_userspace_memory_region region = { 914 .slot = slot, 915 .flags = flags, 916 .guest_phys_addr = gpa, 917 .memory_size = size, 918 .userspace_addr = (uintptr_t)hva, 919 }; 920 921 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 922 } 923 924 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 925 uint64_t gpa, uint64_t size, void *hva) 926 { 927 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 928 929 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 930 errno, strerror(errno)); 931 } 932 933 int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 934 uint64_t gpa, uint64_t size, void *hva, 935 uint32_t guest_memfd, uint64_t guest_memfd_offset) 936 { 937 struct kvm_userspace_memory_region2 region = { 938 .slot = slot, 939 .flags = flags, 940 .guest_phys_addr = gpa, 941 .memory_size = size, 942 .userspace_addr = (uintptr_t)hva, 943 .guest_memfd = guest_memfd, 944 .guest_memfd_offset = guest_memfd_offset, 945 }; 946 947 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); 948 } 949 950 void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 951 uint64_t gpa, uint64_t size, void *hva, 952 uint32_t guest_memfd, uint64_t guest_memfd_offset) 953 { 954 int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, 955 guest_memfd, guest_memfd_offset); 956 957 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)", 958 errno, strerror(errno)); 959 } 960 961 962 /* FIXME: This thing needs to be ripped apart and rewritten. */ 963 void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, 964 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 965 uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) 966 { 967 int ret; 968 struct userspace_mem_region *region; 969 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 970 size_t mem_size = npages * vm->page_size; 971 size_t alignment; 972 973 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 974 "Number of guest pages is not compatible with the host. " 975 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 976 977 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 978 "address not on a page boundary.\n" 979 " guest_paddr: 0x%lx vm->page_size: 0x%x", 980 guest_paddr, vm->page_size); 981 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 982 <= vm->max_gfn, "Physical range beyond maximum " 983 "supported physical address,\n" 984 " guest_paddr: 0x%lx npages: 0x%lx\n" 985 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 986 guest_paddr, npages, vm->max_gfn, vm->page_size); 987 988 /* 989 * Confirm a mem region with an overlapping address doesn't 990 * already exist. 991 */ 992 region = (struct userspace_mem_region *) userspace_mem_region_find( 993 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 994 if (region != NULL) 995 TEST_FAIL("overlapping userspace_mem_region already " 996 "exists\n" 997 " requested guest_paddr: 0x%lx npages: 0x%lx " 998 "page_size: 0x%x\n" 999 " existing guest_paddr: 0x%lx size: 0x%lx", 1000 guest_paddr, npages, vm->page_size, 1001 (uint64_t) region->region.guest_phys_addr, 1002 (uint64_t) region->region.memory_size); 1003 1004 /* Confirm no region with the requested slot already exists. */ 1005 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1006 slot) { 1007 if (region->region.slot != slot) 1008 continue; 1009 1010 TEST_FAIL("A mem region with the requested slot " 1011 "already exists.\n" 1012 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 1013 " existing slot: %u paddr: 0x%lx size: 0x%lx", 1014 slot, guest_paddr, npages, 1015 region->region.slot, 1016 (uint64_t) region->region.guest_phys_addr, 1017 (uint64_t) region->region.memory_size); 1018 } 1019 1020 /* Allocate and initialize new mem region structure. */ 1021 region = calloc(1, sizeof(*region)); 1022 TEST_ASSERT(region != NULL, "Insufficient Memory"); 1023 region->mmap_size = mem_size; 1024 1025 #ifdef __s390x__ 1026 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 1027 alignment = 0x100000; 1028 #else 1029 alignment = 1; 1030 #endif 1031 1032 /* 1033 * When using THP mmap is not guaranteed to returned a hugepage aligned 1034 * address so we have to pad the mmap. Padding is not needed for HugeTLB 1035 * because mmap will always return an address aligned to the HugeTLB 1036 * page size. 1037 */ 1038 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 1039 alignment = max(backing_src_pagesz, alignment); 1040 1041 TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 1042 1043 /* Add enough memory to align up if necessary */ 1044 if (alignment > 1) 1045 region->mmap_size += alignment; 1046 1047 region->fd = -1; 1048 if (backing_src_is_shared(src_type)) 1049 region->fd = kvm_memfd_alloc(region->mmap_size, 1050 src_type == VM_MEM_SRC_SHARED_HUGETLB); 1051 1052 region->mmap_start = mmap(NULL, region->mmap_size, 1053 PROT_READ | PROT_WRITE, 1054 vm_mem_backing_src_alias(src_type)->flag, 1055 region->fd, 0); 1056 TEST_ASSERT(region->mmap_start != MAP_FAILED, 1057 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1058 1059 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 1060 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 1061 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 1062 region->mmap_start, backing_src_pagesz); 1063 1064 /* Align host address */ 1065 region->host_mem = align_ptr_up(region->mmap_start, alignment); 1066 1067 /* As needed perform madvise */ 1068 if ((src_type == VM_MEM_SRC_ANONYMOUS || 1069 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 1070 ret = madvise(region->host_mem, mem_size, 1071 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 1072 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 1073 region->host_mem, mem_size, 1074 vm_mem_backing_src_alias(src_type)->name); 1075 } 1076 1077 region->backing_src_type = src_type; 1078 1079 if (flags & KVM_MEM_GUEST_MEMFD) { 1080 if (guest_memfd < 0) { 1081 uint32_t guest_memfd_flags = 0; 1082 TEST_ASSERT(!guest_memfd_offset, 1083 "Offset must be zero when creating new guest_memfd"); 1084 guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); 1085 } else { 1086 /* 1087 * Install a unique fd for each memslot so that the fd 1088 * can be closed when the region is deleted without 1089 * needing to track if the fd is owned by the framework 1090 * or by the caller. 1091 */ 1092 guest_memfd = dup(guest_memfd); 1093 TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); 1094 } 1095 1096 region->region.guest_memfd = guest_memfd; 1097 region->region.guest_memfd_offset = guest_memfd_offset; 1098 } else { 1099 region->region.guest_memfd = -1; 1100 } 1101 1102 region->unused_phy_pages = sparsebit_alloc(); 1103 if (vm_arch_has_protected_memory(vm)) 1104 region->protected_phy_pages = sparsebit_alloc(); 1105 sparsebit_set_num(region->unused_phy_pages, 1106 guest_paddr >> vm->page_shift, npages); 1107 region->region.slot = slot; 1108 region->region.flags = flags; 1109 region->region.guest_phys_addr = guest_paddr; 1110 region->region.memory_size = npages * vm->page_size; 1111 region->region.userspace_addr = (uintptr_t) region->host_mem; 1112 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); 1113 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" 1114 " rc: %i errno: %i\n" 1115 " slot: %u flags: 0x%x\n" 1116 " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", 1117 ret, errno, slot, flags, 1118 guest_paddr, (uint64_t) region->region.memory_size, 1119 region->region.guest_memfd); 1120 1121 /* Add to quick lookup data structures */ 1122 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1123 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1124 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1125 1126 /* If shared memory, create an alias. */ 1127 if (region->fd >= 0) { 1128 region->mmap_alias = mmap(NULL, region->mmap_size, 1129 PROT_READ | PROT_WRITE, 1130 vm_mem_backing_src_alias(src_type)->flag, 1131 region->fd, 0); 1132 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1133 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1134 1135 /* Align host alias address */ 1136 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1137 } 1138 } 1139 1140 void vm_userspace_mem_region_add(struct kvm_vm *vm, 1141 enum vm_mem_backing_src_type src_type, 1142 uint64_t guest_paddr, uint32_t slot, 1143 uint64_t npages, uint32_t flags) 1144 { 1145 vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); 1146 } 1147 1148 /* 1149 * Memslot to region 1150 * 1151 * Input Args: 1152 * vm - Virtual Machine 1153 * memslot - KVM memory slot ID 1154 * 1155 * Output Args: None 1156 * 1157 * Return: 1158 * Pointer to memory region structure that describe memory region 1159 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1160 * on error (e.g. currently no memory region using memslot as a KVM 1161 * memory slot ID). 1162 */ 1163 struct userspace_mem_region * 1164 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1165 { 1166 struct userspace_mem_region *region; 1167 1168 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1169 memslot) 1170 if (region->region.slot == memslot) 1171 return region; 1172 1173 fprintf(stderr, "No mem region with the requested slot found,\n" 1174 " requested slot: %u\n", memslot); 1175 fputs("---- vm dump ----\n", stderr); 1176 vm_dump(stderr, vm, 2); 1177 TEST_FAIL("Mem region not found"); 1178 return NULL; 1179 } 1180 1181 /* 1182 * VM Memory Region Flags Set 1183 * 1184 * Input Args: 1185 * vm - Virtual Machine 1186 * flags - Starting guest physical address 1187 * 1188 * Output Args: None 1189 * 1190 * Return: None 1191 * 1192 * Sets the flags of the memory region specified by the value of slot, 1193 * to the values given by flags. 1194 */ 1195 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1196 { 1197 int ret; 1198 struct userspace_mem_region *region; 1199 1200 region = memslot2region(vm, slot); 1201 1202 region->region.flags = flags; 1203 1204 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); 1205 1206 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" 1207 " rc: %i errno: %i slot: %u flags: 0x%x", 1208 ret, errno, slot, flags); 1209 } 1210 1211 /* 1212 * VM Memory Region Move 1213 * 1214 * Input Args: 1215 * vm - Virtual Machine 1216 * slot - Slot of the memory region to move 1217 * new_gpa - Starting guest physical address 1218 * 1219 * Output Args: None 1220 * 1221 * Return: None 1222 * 1223 * Change the gpa of a memory region. 1224 */ 1225 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1226 { 1227 struct userspace_mem_region *region; 1228 int ret; 1229 1230 region = memslot2region(vm, slot); 1231 1232 region->region.guest_phys_addr = new_gpa; 1233 1234 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); 1235 1236 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n" 1237 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1238 ret, errno, slot, new_gpa); 1239 } 1240 1241 /* 1242 * VM Memory Region Delete 1243 * 1244 * Input Args: 1245 * vm - Virtual Machine 1246 * slot - Slot of the memory region to delete 1247 * 1248 * Output Args: None 1249 * 1250 * Return: None 1251 * 1252 * Delete a memory region. 1253 */ 1254 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1255 { 1256 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1257 } 1258 1259 void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, 1260 bool punch_hole) 1261 { 1262 const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); 1263 struct userspace_mem_region *region; 1264 uint64_t end = base + size; 1265 uint64_t gpa, len; 1266 off_t fd_offset; 1267 int ret; 1268 1269 for (gpa = base; gpa < end; gpa += len) { 1270 uint64_t offset; 1271 1272 region = userspace_mem_region_find(vm, gpa, gpa); 1273 TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, 1274 "Private memory region not found for GPA 0x%lx", gpa); 1275 1276 offset = gpa - region->region.guest_phys_addr; 1277 fd_offset = region->region.guest_memfd_offset + offset; 1278 len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); 1279 1280 ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); 1281 TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", 1282 punch_hole ? "punch hole" : "allocate", gpa, len, 1283 region->region.guest_memfd, mode, fd_offset); 1284 } 1285 } 1286 1287 /* Returns the size of a vCPU's kvm_run structure. */ 1288 static int vcpu_mmap_sz(void) 1289 { 1290 int dev_fd, ret; 1291 1292 dev_fd = open_kvm_dev_path_or_exit(); 1293 1294 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1295 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1296 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1297 1298 close(dev_fd); 1299 1300 return ret; 1301 } 1302 1303 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1304 { 1305 struct kvm_vcpu *vcpu; 1306 1307 list_for_each_entry(vcpu, &vm->vcpus, list) { 1308 if (vcpu->id == vcpu_id) 1309 return true; 1310 } 1311 1312 return false; 1313 } 1314 1315 /* 1316 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1317 * No additional vCPU setup is done. Returns the vCPU. 1318 */ 1319 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1320 { 1321 struct kvm_vcpu *vcpu; 1322 1323 /* Confirm a vcpu with the specified id doesn't already exist. */ 1324 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id); 1325 1326 /* Allocate and initialize new vcpu structure. */ 1327 vcpu = calloc(1, sizeof(*vcpu)); 1328 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1329 1330 vcpu->vm = vm; 1331 vcpu->id = vcpu_id; 1332 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1333 TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); 1334 1335 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1336 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1337 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1338 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1339 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1340 TEST_ASSERT(vcpu->run != MAP_FAILED, 1341 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1342 1343 /* Add to linked-list of VCPUs. */ 1344 list_add(&vcpu->list, &vm->vcpus); 1345 1346 return vcpu; 1347 } 1348 1349 /* 1350 * VM Virtual Address Unused Gap 1351 * 1352 * Input Args: 1353 * vm - Virtual Machine 1354 * sz - Size (bytes) 1355 * vaddr_min - Minimum Virtual Address 1356 * 1357 * Output Args: None 1358 * 1359 * Return: 1360 * Lowest virtual address at or below vaddr_min, with at least 1361 * sz unused bytes. TEST_ASSERT failure if no area of at least 1362 * size sz is available. 1363 * 1364 * Within the VM specified by vm, locates the lowest starting virtual 1365 * address >= vaddr_min, that has at least sz unallocated bytes. A 1366 * TEST_ASSERT failure occurs for invalid input or no area of at least 1367 * sz unallocated bytes >= vaddr_min is available. 1368 */ 1369 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1370 vm_vaddr_t vaddr_min) 1371 { 1372 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1373 1374 /* Determine lowest permitted virtual page index. */ 1375 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1376 if ((pgidx_start * vm->page_size) < vaddr_min) 1377 goto no_va_found; 1378 1379 /* Loop over section with enough valid virtual page indexes. */ 1380 if (!sparsebit_is_set_num(vm->vpages_valid, 1381 pgidx_start, pages)) 1382 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1383 pgidx_start, pages); 1384 do { 1385 /* 1386 * Are there enough unused virtual pages available at 1387 * the currently proposed starting virtual page index. 1388 * If not, adjust proposed starting index to next 1389 * possible. 1390 */ 1391 if (sparsebit_is_clear_num(vm->vpages_mapped, 1392 pgidx_start, pages)) 1393 goto va_found; 1394 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1395 pgidx_start, pages); 1396 if (pgidx_start == 0) 1397 goto no_va_found; 1398 1399 /* 1400 * If needed, adjust proposed starting virtual address, 1401 * to next range of valid virtual addresses. 1402 */ 1403 if (!sparsebit_is_set_num(vm->vpages_valid, 1404 pgidx_start, pages)) { 1405 pgidx_start = sparsebit_next_set_num( 1406 vm->vpages_valid, pgidx_start, pages); 1407 if (pgidx_start == 0) 1408 goto no_va_found; 1409 } 1410 } while (pgidx_start != 0); 1411 1412 no_va_found: 1413 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1414 1415 /* NOT REACHED */ 1416 return -1; 1417 1418 va_found: 1419 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1420 pgidx_start, pages), 1421 "Unexpected, invalid virtual page index range,\n" 1422 " pgidx_start: 0x%lx\n" 1423 " pages: 0x%lx", 1424 pgidx_start, pages); 1425 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1426 pgidx_start, pages), 1427 "Unexpected, pages already mapped,\n" 1428 " pgidx_start: 0x%lx\n" 1429 " pages: 0x%lx", 1430 pgidx_start, pages); 1431 1432 return pgidx_start * vm->page_size; 1433 } 1434 1435 static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, 1436 vm_vaddr_t vaddr_min, 1437 enum kvm_mem_region_type type, 1438 bool protected) 1439 { 1440 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1441 1442 virt_pgd_alloc(vm); 1443 vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages, 1444 KVM_UTIL_MIN_PFN * vm->page_size, 1445 vm->memslots[type], protected); 1446 1447 /* 1448 * Find an unused range of virtual page addresses of at least 1449 * pages in length. 1450 */ 1451 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1452 1453 /* Map the virtual pages. */ 1454 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1455 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1456 1457 virt_pg_map(vm, vaddr, paddr); 1458 1459 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1460 } 1461 1462 return vaddr_start; 1463 } 1464 1465 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1466 enum kvm_mem_region_type type) 1467 { 1468 return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, 1469 vm_arch_has_protected_memory(vm)); 1470 } 1471 1472 vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, 1473 vm_vaddr_t vaddr_min, 1474 enum kvm_mem_region_type type) 1475 { 1476 return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); 1477 } 1478 1479 /* 1480 * VM Virtual Address Allocate 1481 * 1482 * Input Args: 1483 * vm - Virtual Machine 1484 * sz - Size in bytes 1485 * vaddr_min - Minimum starting virtual address 1486 * 1487 * Output Args: None 1488 * 1489 * Return: 1490 * Starting guest virtual address 1491 * 1492 * Allocates at least sz bytes within the virtual address space of the vm 1493 * given by vm. The allocated bytes are mapped to a virtual address >= 1494 * the address given by vaddr_min. Note that each allocation uses a 1495 * a unique set of pages, with the minimum real allocation being at least 1496 * a page. The allocated physical space comes from the TEST_DATA memory region. 1497 */ 1498 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1499 { 1500 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); 1501 } 1502 1503 /* 1504 * VM Virtual Address Allocate Pages 1505 * 1506 * Input Args: 1507 * vm - Virtual Machine 1508 * 1509 * Output Args: None 1510 * 1511 * Return: 1512 * Starting guest virtual address 1513 * 1514 * Allocates at least N system pages worth of bytes within the virtual address 1515 * space of the vm. 1516 */ 1517 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1518 { 1519 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1520 } 1521 1522 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) 1523 { 1524 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); 1525 } 1526 1527 /* 1528 * VM Virtual Address Allocate Page 1529 * 1530 * Input Args: 1531 * vm - Virtual Machine 1532 * 1533 * Output Args: None 1534 * 1535 * Return: 1536 * Starting guest virtual address 1537 * 1538 * Allocates at least one system page worth of bytes within the virtual address 1539 * space of the vm. 1540 */ 1541 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1542 { 1543 return vm_vaddr_alloc_pages(vm, 1); 1544 } 1545 1546 /* 1547 * Map a range of VM virtual address to the VM's physical address 1548 * 1549 * Input Args: 1550 * vm - Virtual Machine 1551 * vaddr - Virtuall address to map 1552 * paddr - VM Physical Address 1553 * npages - The number of pages to map 1554 * 1555 * Output Args: None 1556 * 1557 * Return: None 1558 * 1559 * Within the VM given by @vm, creates a virtual translation for 1560 * @npages starting at @vaddr to the page range starting at @paddr. 1561 */ 1562 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1563 unsigned int npages) 1564 { 1565 size_t page_size = vm->page_size; 1566 size_t size = npages * page_size; 1567 1568 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1569 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1570 1571 while (npages--) { 1572 virt_pg_map(vm, vaddr, paddr); 1573 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1574 1575 vaddr += page_size; 1576 paddr += page_size; 1577 } 1578 } 1579 1580 /* 1581 * Address VM Physical to Host Virtual 1582 * 1583 * Input Args: 1584 * vm - Virtual Machine 1585 * gpa - VM physical address 1586 * 1587 * Output Args: None 1588 * 1589 * Return: 1590 * Equivalent host virtual address 1591 * 1592 * Locates the memory region containing the VM physical address given 1593 * by gpa, within the VM given by vm. When found, the host virtual 1594 * address providing the memory to the vm physical address is returned. 1595 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1596 */ 1597 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1598 { 1599 struct userspace_mem_region *region; 1600 1601 gpa = vm_untag_gpa(vm, gpa); 1602 1603 region = userspace_mem_region_find(vm, gpa, gpa); 1604 if (!region) { 1605 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1606 return NULL; 1607 } 1608 1609 return (void *)((uintptr_t)region->host_mem 1610 + (gpa - region->region.guest_phys_addr)); 1611 } 1612 1613 /* 1614 * Address Host Virtual to VM Physical 1615 * 1616 * Input Args: 1617 * vm - Virtual Machine 1618 * hva - Host virtual address 1619 * 1620 * Output Args: None 1621 * 1622 * Return: 1623 * Equivalent VM physical address 1624 * 1625 * Locates the memory region containing the host virtual address given 1626 * by hva, within the VM given by vm. When found, the equivalent 1627 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1628 * region containing hva exists. 1629 */ 1630 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1631 { 1632 struct rb_node *node; 1633 1634 for (node = vm->regions.hva_tree.rb_node; node; ) { 1635 struct userspace_mem_region *region = 1636 container_of(node, struct userspace_mem_region, hva_node); 1637 1638 if (hva >= region->host_mem) { 1639 if (hva <= (region->host_mem 1640 + region->region.memory_size - 1)) 1641 return (vm_paddr_t)((uintptr_t) 1642 region->region.guest_phys_addr 1643 + (hva - (uintptr_t)region->host_mem)); 1644 1645 node = node->rb_right; 1646 } else 1647 node = node->rb_left; 1648 } 1649 1650 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1651 return -1; 1652 } 1653 1654 /* 1655 * Address VM physical to Host Virtual *alias*. 1656 * 1657 * Input Args: 1658 * vm - Virtual Machine 1659 * gpa - VM physical address 1660 * 1661 * Output Args: None 1662 * 1663 * Return: 1664 * Equivalent address within the host virtual *alias* area, or NULL 1665 * (without failing the test) if the guest memory is not shared (so 1666 * no alias exists). 1667 * 1668 * Create a writable, shared virtual=>physical alias for the specific GPA. 1669 * The primary use case is to allow the host selftest to manipulate guest 1670 * memory without mapping said memory in the guest's address space. And, for 1671 * userfaultfd-based demand paging, to do so without triggering userfaults. 1672 */ 1673 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1674 { 1675 struct userspace_mem_region *region; 1676 uintptr_t offset; 1677 1678 region = userspace_mem_region_find(vm, gpa, gpa); 1679 if (!region) 1680 return NULL; 1681 1682 if (!region->host_alias) 1683 return NULL; 1684 1685 offset = gpa - region->region.guest_phys_addr; 1686 return (void *) ((uintptr_t) region->host_alias + offset); 1687 } 1688 1689 /* Create an interrupt controller chip for the specified VM. */ 1690 void vm_create_irqchip(struct kvm_vm *vm) 1691 { 1692 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1693 1694 vm->has_irqchip = true; 1695 } 1696 1697 int _vcpu_run(struct kvm_vcpu *vcpu) 1698 { 1699 int rc; 1700 1701 do { 1702 rc = __vcpu_run(vcpu); 1703 } while (rc == -1 && errno == EINTR); 1704 1705 assert_on_unhandled_exception(vcpu); 1706 1707 return rc; 1708 } 1709 1710 /* 1711 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1712 * Assert if the KVM returns an error (other than -EINTR). 1713 */ 1714 void vcpu_run(struct kvm_vcpu *vcpu) 1715 { 1716 int ret = _vcpu_run(vcpu); 1717 1718 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1719 } 1720 1721 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1722 { 1723 int ret; 1724 1725 vcpu->run->immediate_exit = 1; 1726 ret = __vcpu_run(vcpu); 1727 vcpu->run->immediate_exit = 0; 1728 1729 TEST_ASSERT(ret == -1 && errno == EINTR, 1730 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1731 ret, errno); 1732 } 1733 1734 /* 1735 * Get the list of guest registers which are supported for 1736 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1737 * it is the caller's responsibility to free the list. 1738 */ 1739 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1740 { 1741 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1742 int ret; 1743 1744 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1745 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1746 1747 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1748 reg_list->n = reg_list_n.n; 1749 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1750 return reg_list; 1751 } 1752 1753 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1754 { 1755 uint32_t page_size = getpagesize(); 1756 uint32_t size = vcpu->vm->dirty_ring_size; 1757 1758 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1759 1760 if (!vcpu->dirty_gfns) { 1761 void *addr; 1762 1763 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1764 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1765 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1766 1767 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1768 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1769 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1770 1771 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1772 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1773 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1774 1775 vcpu->dirty_gfns = addr; 1776 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1777 } 1778 1779 return vcpu->dirty_gfns; 1780 } 1781 1782 /* 1783 * Device Ioctl 1784 */ 1785 1786 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1787 { 1788 struct kvm_device_attr attribute = { 1789 .group = group, 1790 .attr = attr, 1791 .flags = 0, 1792 }; 1793 1794 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1795 } 1796 1797 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1798 { 1799 struct kvm_create_device create_dev = { 1800 .type = type, 1801 .flags = KVM_CREATE_DEVICE_TEST, 1802 }; 1803 1804 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1805 } 1806 1807 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1808 { 1809 struct kvm_create_device create_dev = { 1810 .type = type, 1811 .fd = -1, 1812 .flags = 0, 1813 }; 1814 int err; 1815 1816 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1817 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1818 return err ? : create_dev.fd; 1819 } 1820 1821 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1822 { 1823 struct kvm_device_attr kvmattr = { 1824 .group = group, 1825 .attr = attr, 1826 .flags = 0, 1827 .addr = (uintptr_t)val, 1828 }; 1829 1830 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1831 } 1832 1833 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1834 { 1835 struct kvm_device_attr kvmattr = { 1836 .group = group, 1837 .attr = attr, 1838 .flags = 0, 1839 .addr = (uintptr_t)val, 1840 }; 1841 1842 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1843 } 1844 1845 /* 1846 * IRQ related functions. 1847 */ 1848 1849 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1850 { 1851 struct kvm_irq_level irq_level = { 1852 .irq = irq, 1853 .level = level, 1854 }; 1855 1856 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1857 } 1858 1859 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1860 { 1861 int ret = _kvm_irq_line(vm, irq, level); 1862 1863 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1864 } 1865 1866 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1867 { 1868 struct kvm_irq_routing *routing; 1869 size_t size; 1870 1871 size = sizeof(struct kvm_irq_routing); 1872 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1873 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1874 routing = calloc(1, size); 1875 assert(routing); 1876 1877 return routing; 1878 } 1879 1880 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1881 uint32_t gsi, uint32_t pin) 1882 { 1883 int i; 1884 1885 assert(routing); 1886 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1887 1888 i = routing->nr; 1889 routing->entries[i].gsi = gsi; 1890 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1891 routing->entries[i].flags = 0; 1892 routing->entries[i].u.irqchip.irqchip = 0; 1893 routing->entries[i].u.irqchip.pin = pin; 1894 routing->nr++; 1895 } 1896 1897 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1898 { 1899 int ret; 1900 1901 assert(routing); 1902 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1903 free(routing); 1904 1905 return ret; 1906 } 1907 1908 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1909 { 1910 int ret; 1911 1912 ret = _kvm_gsi_routing_write(vm, routing); 1913 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1914 } 1915 1916 /* 1917 * VM Dump 1918 * 1919 * Input Args: 1920 * vm - Virtual Machine 1921 * indent - Left margin indent amount 1922 * 1923 * Output Args: 1924 * stream - Output FILE stream 1925 * 1926 * Return: None 1927 * 1928 * Dumps the current state of the VM given by vm, to the FILE stream 1929 * given by stream. 1930 */ 1931 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1932 { 1933 int ctr; 1934 struct userspace_mem_region *region; 1935 struct kvm_vcpu *vcpu; 1936 1937 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1938 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1939 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1940 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1941 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1942 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1943 "host_virt: %p\n", indent + 2, "", 1944 (uint64_t) region->region.guest_phys_addr, 1945 (uint64_t) region->region.memory_size, 1946 region->host_mem); 1947 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1948 sparsebit_dump(stream, region->unused_phy_pages, 0); 1949 if (region->protected_phy_pages) { 1950 fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, ""); 1951 sparsebit_dump(stream, region->protected_phy_pages, 0); 1952 } 1953 } 1954 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1955 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1956 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1957 vm->pgd_created); 1958 if (vm->pgd_created) { 1959 fprintf(stream, "%*sVirtual Translation Tables:\n", 1960 indent + 2, ""); 1961 virt_dump(stream, vm, indent + 4); 1962 } 1963 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1964 1965 list_for_each_entry(vcpu, &vm->vcpus, list) 1966 vcpu_dump(stream, vcpu, indent + 2); 1967 } 1968 1969 #define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x} 1970 1971 /* Known KVM exit reasons */ 1972 static struct exit_reason { 1973 unsigned int reason; 1974 const char *name; 1975 } exit_reasons_known[] = { 1976 KVM_EXIT_STRING(UNKNOWN), 1977 KVM_EXIT_STRING(EXCEPTION), 1978 KVM_EXIT_STRING(IO), 1979 KVM_EXIT_STRING(HYPERCALL), 1980 KVM_EXIT_STRING(DEBUG), 1981 KVM_EXIT_STRING(HLT), 1982 KVM_EXIT_STRING(MMIO), 1983 KVM_EXIT_STRING(IRQ_WINDOW_OPEN), 1984 KVM_EXIT_STRING(SHUTDOWN), 1985 KVM_EXIT_STRING(FAIL_ENTRY), 1986 KVM_EXIT_STRING(INTR), 1987 KVM_EXIT_STRING(SET_TPR), 1988 KVM_EXIT_STRING(TPR_ACCESS), 1989 KVM_EXIT_STRING(S390_SIEIC), 1990 KVM_EXIT_STRING(S390_RESET), 1991 KVM_EXIT_STRING(DCR), 1992 KVM_EXIT_STRING(NMI), 1993 KVM_EXIT_STRING(INTERNAL_ERROR), 1994 KVM_EXIT_STRING(OSI), 1995 KVM_EXIT_STRING(PAPR_HCALL), 1996 KVM_EXIT_STRING(S390_UCONTROL), 1997 KVM_EXIT_STRING(WATCHDOG), 1998 KVM_EXIT_STRING(S390_TSCH), 1999 KVM_EXIT_STRING(EPR), 2000 KVM_EXIT_STRING(SYSTEM_EVENT), 2001 KVM_EXIT_STRING(S390_STSI), 2002 KVM_EXIT_STRING(IOAPIC_EOI), 2003 KVM_EXIT_STRING(HYPERV), 2004 KVM_EXIT_STRING(ARM_NISV), 2005 KVM_EXIT_STRING(X86_RDMSR), 2006 KVM_EXIT_STRING(X86_WRMSR), 2007 KVM_EXIT_STRING(DIRTY_RING_FULL), 2008 KVM_EXIT_STRING(AP_RESET_HOLD), 2009 KVM_EXIT_STRING(X86_BUS_LOCK), 2010 KVM_EXIT_STRING(XEN), 2011 KVM_EXIT_STRING(RISCV_SBI), 2012 KVM_EXIT_STRING(RISCV_CSR), 2013 KVM_EXIT_STRING(NOTIFY), 2014 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 2015 KVM_EXIT_STRING(MEMORY_NOT_PRESENT), 2016 #endif 2017 }; 2018 2019 /* 2020 * Exit Reason String 2021 * 2022 * Input Args: 2023 * exit_reason - Exit reason 2024 * 2025 * Output Args: None 2026 * 2027 * Return: 2028 * Constant string pointer describing the exit reason. 2029 * 2030 * Locates and returns a constant string that describes the KVM exit 2031 * reason given by exit_reason. If no such string is found, a constant 2032 * string of "Unknown" is returned. 2033 */ 2034 const char *exit_reason_str(unsigned int exit_reason) 2035 { 2036 unsigned int n1; 2037 2038 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 2039 if (exit_reason == exit_reasons_known[n1].reason) 2040 return exit_reasons_known[n1].name; 2041 } 2042 2043 return "Unknown"; 2044 } 2045 2046 /* 2047 * Physical Contiguous Page Allocator 2048 * 2049 * Input Args: 2050 * vm - Virtual Machine 2051 * num - number of pages 2052 * paddr_min - Physical address minimum 2053 * memslot - Memory region to allocate page from 2054 * protected - True if the pages will be used as protected/private memory 2055 * 2056 * Output Args: None 2057 * 2058 * Return: 2059 * Starting physical address 2060 * 2061 * Within the VM specified by vm, locates a range of available physical 2062 * pages at or above paddr_min. If found, the pages are marked as in use 2063 * and their base address is returned. A TEST_ASSERT failure occurs if 2064 * not enough pages are available at or above paddr_min. 2065 */ 2066 vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 2067 vm_paddr_t paddr_min, uint32_t memslot, 2068 bool protected) 2069 { 2070 struct userspace_mem_region *region; 2071 sparsebit_idx_t pg, base; 2072 2073 TEST_ASSERT(num > 0, "Must allocate at least one page"); 2074 2075 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 2076 "not divisible by page size.\n" 2077 " paddr_min: 0x%lx page_size: 0x%x", 2078 paddr_min, vm->page_size); 2079 2080 region = memslot2region(vm, memslot); 2081 TEST_ASSERT(!protected || region->protected_phy_pages, 2082 "Region doesn't support protected memory"); 2083 2084 base = pg = paddr_min >> vm->page_shift; 2085 do { 2086 for (; pg < base + num; ++pg) { 2087 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 2088 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 2089 break; 2090 } 2091 } 2092 } while (pg && pg != base + num); 2093 2094 if (pg == 0) { 2095 fprintf(stderr, "No guest physical page available, " 2096 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 2097 paddr_min, vm->page_size, memslot); 2098 fputs("---- vm dump ----\n", stderr); 2099 vm_dump(stderr, vm, 2); 2100 abort(); 2101 } 2102 2103 for (pg = base; pg < base + num; ++pg) { 2104 sparsebit_clear(region->unused_phy_pages, pg); 2105 if (protected) 2106 sparsebit_set(region->protected_phy_pages, pg); 2107 } 2108 2109 return base * vm->page_size; 2110 } 2111 2112 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 2113 uint32_t memslot) 2114 { 2115 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 2116 } 2117 2118 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 2119 { 2120 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 2121 vm->memslots[MEM_REGION_PT]); 2122 } 2123 2124 /* 2125 * Address Guest Virtual to Host Virtual 2126 * 2127 * Input Args: 2128 * vm - Virtual Machine 2129 * gva - VM virtual address 2130 * 2131 * Output Args: None 2132 * 2133 * Return: 2134 * Equivalent host virtual address 2135 */ 2136 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 2137 { 2138 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 2139 } 2140 2141 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 2142 { 2143 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 2144 } 2145 2146 static unsigned int vm_calc_num_pages(unsigned int num_pages, 2147 unsigned int page_shift, 2148 unsigned int new_page_shift, 2149 bool ceil) 2150 { 2151 unsigned int n = 1 << (new_page_shift - page_shift); 2152 2153 if (page_shift >= new_page_shift) 2154 return num_pages * (1 << (page_shift - new_page_shift)); 2155 2156 return num_pages / n + !!(ceil && num_pages % n); 2157 } 2158 2159 static inline int getpageshift(void) 2160 { 2161 return __builtin_ffs(getpagesize()) - 1; 2162 } 2163 2164 unsigned int 2165 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 2166 { 2167 return vm_calc_num_pages(num_guest_pages, 2168 vm_guest_mode_params[mode].page_shift, 2169 getpageshift(), true); 2170 } 2171 2172 unsigned int 2173 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 2174 { 2175 return vm_calc_num_pages(num_host_pages, getpageshift(), 2176 vm_guest_mode_params[mode].page_shift, false); 2177 } 2178 2179 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2180 { 2181 unsigned int n; 2182 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2183 return vm_adjust_num_guest_pages(mode, n); 2184 } 2185 2186 /* 2187 * Read binary stats descriptors 2188 * 2189 * Input Args: 2190 * stats_fd - the file descriptor for the binary stats file from which to read 2191 * header - the binary stats metadata header corresponding to the given FD 2192 * 2193 * Output Args: None 2194 * 2195 * Return: 2196 * A pointer to a newly allocated series of stat descriptors. 2197 * Caller is responsible for freeing the returned kvm_stats_desc. 2198 * 2199 * Read the stats descriptors from the binary stats interface. 2200 */ 2201 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 2202 struct kvm_stats_header *header) 2203 { 2204 struct kvm_stats_desc *stats_desc; 2205 ssize_t desc_size, total_size, ret; 2206 2207 desc_size = get_stats_descriptor_size(header); 2208 total_size = header->num_desc * desc_size; 2209 2210 stats_desc = calloc(header->num_desc, desc_size); 2211 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2212 2213 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2214 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2215 2216 return stats_desc; 2217 } 2218 2219 /* 2220 * Read stat data for a particular stat 2221 * 2222 * Input Args: 2223 * stats_fd - the file descriptor for the binary stats file from which to read 2224 * header - the binary stats metadata header corresponding to the given FD 2225 * desc - the binary stat metadata for the particular stat to be read 2226 * max_elements - the maximum number of 8-byte values to read into data 2227 * 2228 * Output Args: 2229 * data - the buffer into which stat data should be read 2230 * 2231 * Read the data values of a specified stat from the binary stats interface. 2232 */ 2233 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2234 struct kvm_stats_desc *desc, uint64_t *data, 2235 size_t max_elements) 2236 { 2237 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2238 size_t size = nr_elements * sizeof(*data); 2239 ssize_t ret; 2240 2241 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2242 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2243 2244 ret = pread(stats_fd, data, size, 2245 header->data_offset + desc->offset); 2246 2247 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2248 desc->name, errno, strerror(errno)); 2249 TEST_ASSERT(ret == size, 2250 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2251 desc->name, size, ret); 2252 } 2253 2254 /* 2255 * Read the data of the named stat 2256 * 2257 * Input Args: 2258 * vm - the VM for which the stat should be read 2259 * stat_name - the name of the stat to read 2260 * max_elements - the maximum number of 8-byte values to read into data 2261 * 2262 * Output Args: 2263 * data - the buffer into which stat data should be read 2264 * 2265 * Read the data values of a specified stat from the binary stats interface. 2266 */ 2267 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2268 size_t max_elements) 2269 { 2270 struct kvm_stats_desc *desc; 2271 size_t size_desc; 2272 int i; 2273 2274 if (!vm->stats_fd) { 2275 vm->stats_fd = vm_get_stats_fd(vm); 2276 read_stats_header(vm->stats_fd, &vm->stats_header); 2277 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2278 &vm->stats_header); 2279 } 2280 2281 size_desc = get_stats_descriptor_size(&vm->stats_header); 2282 2283 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2284 desc = (void *)vm->stats_desc + (i * size_desc); 2285 2286 if (strcmp(desc->name, stat_name)) 2287 continue; 2288 2289 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2290 data, max_elements); 2291 2292 break; 2293 } 2294 } 2295 2296 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2297 { 2298 } 2299 2300 __weak void kvm_selftest_arch_init(void) 2301 { 2302 } 2303 2304 void __attribute((constructor)) kvm_selftest_init(void) 2305 { 2306 /* Tell stdout not to buffer its content. */ 2307 setbuf(stdout, NULL); 2308 2309 kvm_selftest_arch_init(); 2310 } 2311 2312 bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) 2313 { 2314 sparsebit_idx_t pg = 0; 2315 struct userspace_mem_region *region; 2316 2317 if (!vm_arch_has_protected_memory(vm)) 2318 return false; 2319 2320 region = userspace_mem_region_find(vm, paddr, paddr); 2321 TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr); 2322 2323 pg = paddr >> vm->page_shift; 2324 return sparsebit_is_set(region->protected_phy_pages, pg); 2325 } 2326