xref: /linux/tools/testing/selftests/kvm/lib/kvm_util.c (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/kvm_util.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #include "test_util.h"
9 #include "kvm_util.h"
10 #include "kvm_util_internal.h"
11 #include "processor.h"
12 
13 #include <assert.h>
14 #include <sys/mman.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <linux/kernel.h>
18 
19 #define KVM_UTIL_PGS_PER_HUGEPG 512
20 #define KVM_UTIL_MIN_PFN	2
21 
22 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
23 static void *align(void *x, size_t size)
24 {
25 	size_t mask = size - 1;
26 	TEST_ASSERT(size != 0 && !(size & (size - 1)),
27 		    "size not a power of 2: %lu", size);
28 	return (void *) (((size_t) x + mask) & ~mask);
29 }
30 
31 /*
32  * Capability
33  *
34  * Input Args:
35  *   cap - Capability
36  *
37  * Output Args: None
38  *
39  * Return:
40  *   On success, the Value corresponding to the capability (KVM_CAP_*)
41  *   specified by the value of cap.  On failure a TEST_ASSERT failure
42  *   is produced.
43  *
44  * Looks up and returns the value corresponding to the capability
45  * (KVM_CAP_*) given by cap.
46  */
47 int kvm_check_cap(long cap)
48 {
49 	int ret;
50 	int kvm_fd;
51 
52 	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
53 	if (kvm_fd < 0)
54 		exit(KSFT_SKIP);
55 
56 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
57 	TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
58 		"  rc: %i errno: %i", ret, errno);
59 
60 	close(kvm_fd);
61 
62 	return ret;
63 }
64 
65 /* VM Enable Capability
66  *
67  * Input Args:
68  *   vm - Virtual Machine
69  *   cap - Capability
70  *
71  * Output Args: None
72  *
73  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
74  *
75  * Enables a capability (KVM_CAP_*) on the VM.
76  */
77 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
78 {
79 	int ret;
80 
81 	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
82 	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
83 		"  rc: %i errno: %i", ret, errno);
84 
85 	return ret;
86 }
87 
88 static void vm_open(struct kvm_vm *vm, int perm)
89 {
90 	vm->kvm_fd = open(KVM_DEV_PATH, perm);
91 	if (vm->kvm_fd < 0)
92 		exit(KSFT_SKIP);
93 
94 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
95 		fprintf(stderr, "immediate_exit not available, skipping test\n");
96 		exit(KSFT_SKIP);
97 	}
98 
99 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
100 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
101 		"rc: %i errno: %i", vm->fd, errno);
102 }
103 
104 const char * const vm_guest_mode_string[] = {
105 	"PA-bits:52,  VA-bits:48,  4K pages",
106 	"PA-bits:52,  VA-bits:48, 64K pages",
107 	"PA-bits:48,  VA-bits:48,  4K pages",
108 	"PA-bits:48,  VA-bits:48, 64K pages",
109 	"PA-bits:40,  VA-bits:48,  4K pages",
110 	"PA-bits:40,  VA-bits:48, 64K pages",
111 	"PA-bits:ANY, VA-bits:48,  4K pages",
112 };
113 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
114 	       "Missing new mode strings?");
115 
116 /*
117  * VM Create
118  *
119  * Input Args:
120  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
121  *   phy_pages - Physical memory pages
122  *   perm - permission
123  *
124  * Output Args: None
125  *
126  * Return:
127  *   Pointer to opaque structure that describes the created VM.
128  *
129  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
130  * When phy_pages is non-zero, a memory region of phy_pages physical pages
131  * is created and mapped starting at guest physical address 0.  The file
132  * descriptor to control the created VM is created with the permissions
133  * given by perm (e.g. O_RDWR).
134  */
135 struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
136 {
137 	struct kvm_vm *vm;
138 
139 	DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
140 
141 	vm = calloc(1, sizeof(*vm));
142 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
143 
144 	vm->mode = mode;
145 	vm->type = 0;
146 
147 	/* Setup mode specific traits. */
148 	switch (vm->mode) {
149 	case VM_MODE_P52V48_4K:
150 		vm->pgtable_levels = 4;
151 		vm->pa_bits = 52;
152 		vm->va_bits = 48;
153 		vm->page_size = 0x1000;
154 		vm->page_shift = 12;
155 		break;
156 	case VM_MODE_P52V48_64K:
157 		vm->pgtable_levels = 3;
158 		vm->pa_bits = 52;
159 		vm->va_bits = 48;
160 		vm->page_size = 0x10000;
161 		vm->page_shift = 16;
162 		break;
163 	case VM_MODE_P48V48_4K:
164 		vm->pgtable_levels = 4;
165 		vm->pa_bits = 48;
166 		vm->va_bits = 48;
167 		vm->page_size = 0x1000;
168 		vm->page_shift = 12;
169 		break;
170 	case VM_MODE_P48V48_64K:
171 		vm->pgtable_levels = 3;
172 		vm->pa_bits = 48;
173 		vm->va_bits = 48;
174 		vm->page_size = 0x10000;
175 		vm->page_shift = 16;
176 		break;
177 	case VM_MODE_P40V48_4K:
178 		vm->pgtable_levels = 4;
179 		vm->pa_bits = 40;
180 		vm->va_bits = 48;
181 		vm->page_size = 0x1000;
182 		vm->page_shift = 12;
183 		break;
184 	case VM_MODE_P40V48_64K:
185 		vm->pgtable_levels = 3;
186 		vm->pa_bits = 40;
187 		vm->va_bits = 48;
188 		vm->page_size = 0x10000;
189 		vm->page_shift = 16;
190 		break;
191 	case VM_MODE_PXXV48_4K:
192 #ifdef __x86_64__
193 		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
194 		TEST_ASSERT(vm->va_bits == 48, "Linear address width "
195 			    "(%d bits) not supported", vm->va_bits);
196 		vm->pgtable_levels = 4;
197 		vm->page_size = 0x1000;
198 		vm->page_shift = 12;
199 		DEBUG("Guest physical address width detected: %d\n",
200 		      vm->pa_bits);
201 #else
202 		TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
203 			    "non-x86 platforms");
204 #endif
205 		break;
206 	default:
207 		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
208 	}
209 
210 #ifdef __aarch64__
211 	if (vm->pa_bits != 40)
212 		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
213 #endif
214 
215 	vm_open(vm, perm);
216 
217 	/* Limit to VA-bit canonical virtual addresses. */
218 	vm->vpages_valid = sparsebit_alloc();
219 	sparsebit_set_num(vm->vpages_valid,
220 		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
221 	sparsebit_set_num(vm->vpages_valid,
222 		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
223 		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
224 
225 	/* Limit physical addresses to PA-bits. */
226 	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
227 
228 	/* Allocate and setup memory for guest. */
229 	vm->vpages_mapped = sparsebit_alloc();
230 	if (phy_pages != 0)
231 		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
232 					    0, 0, phy_pages, 0);
233 
234 	return vm;
235 }
236 
237 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
238 {
239 	return _vm_create(mode, phy_pages, perm);
240 }
241 
242 /*
243  * VM Restart
244  *
245  * Input Args:
246  *   vm - VM that has been released before
247  *   perm - permission
248  *
249  * Output Args: None
250  *
251  * Reopens the file descriptors associated to the VM and reinstates the
252  * global state, such as the irqchip and the memory regions that are mapped
253  * into the guest.
254  */
255 void kvm_vm_restart(struct kvm_vm *vmp, int perm)
256 {
257 	struct userspace_mem_region *region;
258 
259 	vm_open(vmp, perm);
260 	if (vmp->has_irqchip)
261 		vm_create_irqchip(vmp);
262 
263 	for (region = vmp->userspace_mem_region_head; region;
264 		region = region->next) {
265 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
266 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
267 			    "  rc: %i errno: %i\n"
268 			    "  slot: %u flags: 0x%x\n"
269 			    "  guest_phys_addr: 0x%lx size: 0x%lx",
270 			    ret, errno, region->region.slot,
271 			    region->region.flags,
272 			    region->region.guest_phys_addr,
273 			    region->region.memory_size);
274 	}
275 }
276 
277 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
278 {
279 	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
280 	int ret;
281 
282 	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
283 	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
284 		    strerror(-ret));
285 }
286 
287 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
288 			    uint64_t first_page, uint32_t num_pages)
289 {
290 	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
291 		                            .first_page = first_page,
292 	                                    .num_pages = num_pages };
293 	int ret;
294 
295 	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
296 	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
297 		    strerror(-ret));
298 }
299 
300 /*
301  * Userspace Memory Region Find
302  *
303  * Input Args:
304  *   vm - Virtual Machine
305  *   start - Starting VM physical address
306  *   end - Ending VM physical address, inclusive.
307  *
308  * Output Args: None
309  *
310  * Return:
311  *   Pointer to overlapping region, NULL if no such region.
312  *
313  * Searches for a region with any physical memory that overlaps with
314  * any portion of the guest physical addresses from start to end
315  * inclusive.  If multiple overlapping regions exist, a pointer to any
316  * of the regions is returned.  Null is returned only when no overlapping
317  * region exists.
318  */
319 static struct userspace_mem_region *
320 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
321 {
322 	struct userspace_mem_region *region;
323 
324 	for (region = vm->userspace_mem_region_head; region;
325 		region = region->next) {
326 		uint64_t existing_start = region->region.guest_phys_addr;
327 		uint64_t existing_end = region->region.guest_phys_addr
328 			+ region->region.memory_size - 1;
329 		if (start <= existing_end && end >= existing_start)
330 			return region;
331 	}
332 
333 	return NULL;
334 }
335 
336 /*
337  * KVM Userspace Memory Region Find
338  *
339  * Input Args:
340  *   vm - Virtual Machine
341  *   start - Starting VM physical address
342  *   end - Ending VM physical address, inclusive.
343  *
344  * Output Args: None
345  *
346  * Return:
347  *   Pointer to overlapping region, NULL if no such region.
348  *
349  * Public interface to userspace_mem_region_find. Allows tests to look up
350  * the memslot datastructure for a given range of guest physical memory.
351  */
352 struct kvm_userspace_memory_region *
353 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
354 				 uint64_t end)
355 {
356 	struct userspace_mem_region *region;
357 
358 	region = userspace_mem_region_find(vm, start, end);
359 	if (!region)
360 		return NULL;
361 
362 	return &region->region;
363 }
364 
365 /*
366  * VCPU Find
367  *
368  * Input Args:
369  *   vm - Virtual Machine
370  *   vcpuid - VCPU ID
371  *
372  * Output Args: None
373  *
374  * Return:
375  *   Pointer to VCPU structure
376  *
377  * Locates a vcpu structure that describes the VCPU specified by vcpuid and
378  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
379  * for the specified vcpuid.
380  */
381 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
382 {
383 	struct vcpu *vcpup;
384 
385 	for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
386 		if (vcpup->id == vcpuid)
387 			return vcpup;
388 	}
389 
390 	return NULL;
391 }
392 
393 /*
394  * VM VCPU Remove
395  *
396  * Input Args:
397  *   vm - Virtual Machine
398  *   vcpuid - VCPU ID
399  *
400  * Output Args: None
401  *
402  * Return: None, TEST_ASSERT failures for all error conditions
403  *
404  * Within the VM specified by vm, removes the VCPU given by vcpuid.
405  */
406 static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
407 {
408 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
409 	int ret;
410 
411 	ret = munmap(vcpu->state, sizeof(*vcpu->state));
412 	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
413 		"errno: %i", ret, errno);
414 	close(vcpu->fd);
415 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
416 		"errno: %i", ret, errno);
417 
418 	if (vcpu->next)
419 		vcpu->next->prev = vcpu->prev;
420 	if (vcpu->prev)
421 		vcpu->prev->next = vcpu->next;
422 	else
423 		vm->vcpu_head = vcpu->next;
424 	free(vcpu);
425 }
426 
427 void kvm_vm_release(struct kvm_vm *vmp)
428 {
429 	int ret;
430 
431 	while (vmp->vcpu_head)
432 		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
433 
434 	ret = close(vmp->fd);
435 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
436 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
437 
438 	close(vmp->kvm_fd);
439 	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
440 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
441 }
442 
443 /*
444  * Destroys and frees the VM pointed to by vmp.
445  */
446 void kvm_vm_free(struct kvm_vm *vmp)
447 {
448 	int ret;
449 
450 	if (vmp == NULL)
451 		return;
452 
453 	/* Free userspace_mem_regions. */
454 	while (vmp->userspace_mem_region_head) {
455 		struct userspace_mem_region *region
456 			= vmp->userspace_mem_region_head;
457 
458 		region->region.memory_size = 0;
459 		ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
460 			&region->region);
461 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
462 			"rc: %i errno: %i", ret, errno);
463 
464 		vmp->userspace_mem_region_head = region->next;
465 		sparsebit_free(&region->unused_phy_pages);
466 		ret = munmap(region->mmap_start, region->mmap_size);
467 		TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
468 			    ret, errno);
469 
470 		free(region);
471 	}
472 
473 	/* Free sparsebit arrays. */
474 	sparsebit_free(&vmp->vpages_valid);
475 	sparsebit_free(&vmp->vpages_mapped);
476 
477 	kvm_vm_release(vmp);
478 
479 	/* Free the structure describing the VM. */
480 	free(vmp);
481 }
482 
483 /*
484  * Memory Compare, host virtual to guest virtual
485  *
486  * Input Args:
487  *   hva - Starting host virtual address
488  *   vm - Virtual Machine
489  *   gva - Starting guest virtual address
490  *   len - number of bytes to compare
491  *
492  * Output Args: None
493  *
494  * Input/Output Args: None
495  *
496  * Return:
497  *   Returns 0 if the bytes starting at hva for a length of len
498  *   are equal the guest virtual bytes starting at gva.  Returns
499  *   a value < 0, if bytes at hva are less than those at gva.
500  *   Otherwise a value > 0 is returned.
501  *
502  * Compares the bytes starting at the host virtual address hva, for
503  * a length of len, to the guest bytes starting at the guest virtual
504  * address given by gva.
505  */
506 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
507 {
508 	size_t amt;
509 
510 	/*
511 	 * Compare a batch of bytes until either a match is found
512 	 * or all the bytes have been compared.
513 	 */
514 	for (uintptr_t offset = 0; offset < len; offset += amt) {
515 		uintptr_t ptr1 = (uintptr_t)hva + offset;
516 
517 		/*
518 		 * Determine host address for guest virtual address
519 		 * at offset.
520 		 */
521 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
522 
523 		/*
524 		 * Determine amount to compare on this pass.
525 		 * Don't allow the comparsion to cross a page boundary.
526 		 */
527 		amt = len - offset;
528 		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
529 			amt = vm->page_size - (ptr1 % vm->page_size);
530 		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
531 			amt = vm->page_size - (ptr2 % vm->page_size);
532 
533 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
534 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
535 
536 		/*
537 		 * Perform the comparison.  If there is a difference
538 		 * return that result to the caller, otherwise need
539 		 * to continue on looking for a mismatch.
540 		 */
541 		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
542 		if (ret != 0)
543 			return ret;
544 	}
545 
546 	/*
547 	 * No mismatch found.  Let the caller know the two memory
548 	 * areas are equal.
549 	 */
550 	return 0;
551 }
552 
553 /*
554  * VM Userspace Memory Region Add
555  *
556  * Input Args:
557  *   vm - Virtual Machine
558  *   backing_src - Storage source for this region.
559  *                 NULL to use anonymous memory.
560  *   guest_paddr - Starting guest physical address
561  *   slot - KVM region slot
562  *   npages - Number of physical pages
563  *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
564  *
565  * Output Args: None
566  *
567  * Return: None
568  *
569  * Allocates a memory area of the number of pages specified by npages
570  * and maps it to the VM specified by vm, at a starting physical address
571  * given by guest_paddr.  The region is created with a KVM region slot
572  * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
573  * region is created with the flags given by flags.
574  */
575 void vm_userspace_mem_region_add(struct kvm_vm *vm,
576 	enum vm_mem_backing_src_type src_type,
577 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
578 	uint32_t flags)
579 {
580 	int ret;
581 	struct userspace_mem_region *region;
582 	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
583 	size_t alignment;
584 
585 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
586 		"address not on a page boundary.\n"
587 		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
588 		guest_paddr, vm->page_size);
589 	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
590 		<= vm->max_gfn, "Physical range beyond maximum "
591 		"supported physical address,\n"
592 		"  guest_paddr: 0x%lx npages: 0x%lx\n"
593 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
594 		guest_paddr, npages, vm->max_gfn, vm->page_size);
595 
596 	/*
597 	 * Confirm a mem region with an overlapping address doesn't
598 	 * already exist.
599 	 */
600 	region = (struct userspace_mem_region *) userspace_mem_region_find(
601 		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
602 	if (region != NULL)
603 		TEST_ASSERT(false, "overlapping userspace_mem_region already "
604 			"exists\n"
605 			"  requested guest_paddr: 0x%lx npages: 0x%lx "
606 			"page_size: 0x%x\n"
607 			"  existing guest_paddr: 0x%lx size: 0x%lx",
608 			guest_paddr, npages, vm->page_size,
609 			(uint64_t) region->region.guest_phys_addr,
610 			(uint64_t) region->region.memory_size);
611 
612 	/* Confirm no region with the requested slot already exists. */
613 	for (region = vm->userspace_mem_region_head; region;
614 		region = region->next) {
615 		if (region->region.slot == slot)
616 			break;
617 	}
618 	if (region != NULL)
619 		TEST_ASSERT(false, "A mem region with the requested slot "
620 			"already exists.\n"
621 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
622 			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
623 			slot, guest_paddr, npages,
624 			region->region.slot,
625 			(uint64_t) region->region.guest_phys_addr,
626 			(uint64_t) region->region.memory_size);
627 
628 	/* Allocate and initialize new mem region structure. */
629 	region = calloc(1, sizeof(*region));
630 	TEST_ASSERT(region != NULL, "Insufficient Memory");
631 	region->mmap_size = npages * vm->page_size;
632 
633 #ifdef __s390x__
634 	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
635 	alignment = 0x100000;
636 #else
637 	alignment = 1;
638 #endif
639 
640 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
641 		alignment = max(huge_page_size, alignment);
642 
643 	/* Add enough memory to align up if necessary */
644 	if (alignment > 1)
645 		region->mmap_size += alignment;
646 
647 	region->mmap_start = mmap(NULL, region->mmap_size,
648 				  PROT_READ | PROT_WRITE,
649 				  MAP_PRIVATE | MAP_ANONYMOUS
650 				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
651 				  -1, 0);
652 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
653 		    "test_malloc failed, mmap_start: %p errno: %i",
654 		    region->mmap_start, errno);
655 
656 	/* Align host address */
657 	region->host_mem = align(region->mmap_start, alignment);
658 
659 	/* As needed perform madvise */
660 	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
661 		ret = madvise(region->host_mem, npages * vm->page_size,
662 			     src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
663 		TEST_ASSERT(ret == 0, "madvise failed,\n"
664 			    "  addr: %p\n"
665 			    "  length: 0x%lx\n"
666 			    "  src_type: %x",
667 			    region->host_mem, npages * vm->page_size, src_type);
668 	}
669 
670 	region->unused_phy_pages = sparsebit_alloc();
671 	sparsebit_set_num(region->unused_phy_pages,
672 		guest_paddr >> vm->page_shift, npages);
673 	region->region.slot = slot;
674 	region->region.flags = flags;
675 	region->region.guest_phys_addr = guest_paddr;
676 	region->region.memory_size = npages * vm->page_size;
677 	region->region.userspace_addr = (uintptr_t) region->host_mem;
678 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
679 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
680 		"  rc: %i errno: %i\n"
681 		"  slot: %u flags: 0x%x\n"
682 		"  guest_phys_addr: 0x%lx size: 0x%lx",
683 		ret, errno, slot, flags,
684 		guest_paddr, (uint64_t) region->region.memory_size);
685 
686 	/* Add to linked-list of memory regions. */
687 	if (vm->userspace_mem_region_head)
688 		vm->userspace_mem_region_head->prev = region;
689 	region->next = vm->userspace_mem_region_head;
690 	vm->userspace_mem_region_head = region;
691 }
692 
693 /*
694  * Memslot to region
695  *
696  * Input Args:
697  *   vm - Virtual Machine
698  *   memslot - KVM memory slot ID
699  *
700  * Output Args: None
701  *
702  * Return:
703  *   Pointer to memory region structure that describe memory region
704  *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
705  *   on error (e.g. currently no memory region using memslot as a KVM
706  *   memory slot ID).
707  */
708 struct userspace_mem_region *
709 memslot2region(struct kvm_vm *vm, uint32_t memslot)
710 {
711 	struct userspace_mem_region *region;
712 
713 	for (region = vm->userspace_mem_region_head; region;
714 		region = region->next) {
715 		if (region->region.slot == memslot)
716 			break;
717 	}
718 	if (region == NULL) {
719 		fprintf(stderr, "No mem region with the requested slot found,\n"
720 			"  requested slot: %u\n", memslot);
721 		fputs("---- vm dump ----\n", stderr);
722 		vm_dump(stderr, vm, 2);
723 		TEST_ASSERT(false, "Mem region not found");
724 	}
725 
726 	return region;
727 }
728 
729 /*
730  * VM Memory Region Flags Set
731  *
732  * Input Args:
733  *   vm - Virtual Machine
734  *   flags - Starting guest physical address
735  *
736  * Output Args: None
737  *
738  * Return: None
739  *
740  * Sets the flags of the memory region specified by the value of slot,
741  * to the values given by flags.
742  */
743 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
744 {
745 	int ret;
746 	struct userspace_mem_region *region;
747 
748 	region = memslot2region(vm, slot);
749 
750 	region->region.flags = flags;
751 
752 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
753 
754 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
755 		"  rc: %i errno: %i slot: %u flags: 0x%x",
756 		ret, errno, slot, flags);
757 }
758 
759 /*
760  * VCPU mmap Size
761  *
762  * Input Args: None
763  *
764  * Output Args: None
765  *
766  * Return:
767  *   Size of VCPU state
768  *
769  * Returns the size of the structure pointed to by the return value
770  * of vcpu_state().
771  */
772 static int vcpu_mmap_sz(void)
773 {
774 	int dev_fd, ret;
775 
776 	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
777 	if (dev_fd < 0)
778 		exit(KSFT_SKIP);
779 
780 	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
781 	TEST_ASSERT(ret >= sizeof(struct kvm_run),
782 		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
783 		__func__, ret, errno);
784 
785 	close(dev_fd);
786 
787 	return ret;
788 }
789 
790 /*
791  * VM VCPU Add
792  *
793  * Input Args:
794  *   vm - Virtual Machine
795  *   vcpuid - VCPU ID
796  *
797  * Output Args: None
798  *
799  * Return: None
800  *
801  * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
802  * No additional VCPU setup is done.
803  */
804 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
805 {
806 	struct vcpu *vcpu;
807 
808 	/* Confirm a vcpu with the specified id doesn't already exist. */
809 	vcpu = vcpu_find(vm, vcpuid);
810 	if (vcpu != NULL)
811 		TEST_ASSERT(false, "vcpu with the specified id "
812 			"already exists,\n"
813 			"  requested vcpuid: %u\n"
814 			"  existing vcpuid: %u state: %p",
815 			vcpuid, vcpu->id, vcpu->state);
816 
817 	/* Allocate and initialize new vcpu structure. */
818 	vcpu = calloc(1, sizeof(*vcpu));
819 	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
820 	vcpu->id = vcpuid;
821 	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
822 	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
823 		vcpu->fd, errno);
824 
825 	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
826 		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
827 		vcpu_mmap_sz(), sizeof(*vcpu->state));
828 	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
829 		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
830 	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
831 		"vcpu id: %u errno: %i", vcpuid, errno);
832 
833 	/* Add to linked-list of VCPUs. */
834 	if (vm->vcpu_head)
835 		vm->vcpu_head->prev = vcpu;
836 	vcpu->next = vm->vcpu_head;
837 	vm->vcpu_head = vcpu;
838 }
839 
840 /*
841  * VM Virtual Address Unused Gap
842  *
843  * Input Args:
844  *   vm - Virtual Machine
845  *   sz - Size (bytes)
846  *   vaddr_min - Minimum Virtual Address
847  *
848  * Output Args: None
849  *
850  * Return:
851  *   Lowest virtual address at or below vaddr_min, with at least
852  *   sz unused bytes.  TEST_ASSERT failure if no area of at least
853  *   size sz is available.
854  *
855  * Within the VM specified by vm, locates the lowest starting virtual
856  * address >= vaddr_min, that has at least sz unallocated bytes.  A
857  * TEST_ASSERT failure occurs for invalid input or no area of at least
858  * sz unallocated bytes >= vaddr_min is available.
859  */
860 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
861 				      vm_vaddr_t vaddr_min)
862 {
863 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
864 
865 	/* Determine lowest permitted virtual page index. */
866 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
867 	if ((pgidx_start * vm->page_size) < vaddr_min)
868 		goto no_va_found;
869 
870 	/* Loop over section with enough valid virtual page indexes. */
871 	if (!sparsebit_is_set_num(vm->vpages_valid,
872 		pgidx_start, pages))
873 		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
874 			pgidx_start, pages);
875 	do {
876 		/*
877 		 * Are there enough unused virtual pages available at
878 		 * the currently proposed starting virtual page index.
879 		 * If not, adjust proposed starting index to next
880 		 * possible.
881 		 */
882 		if (sparsebit_is_clear_num(vm->vpages_mapped,
883 			pgidx_start, pages))
884 			goto va_found;
885 		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
886 			pgidx_start, pages);
887 		if (pgidx_start == 0)
888 			goto no_va_found;
889 
890 		/*
891 		 * If needed, adjust proposed starting virtual address,
892 		 * to next range of valid virtual addresses.
893 		 */
894 		if (!sparsebit_is_set_num(vm->vpages_valid,
895 			pgidx_start, pages)) {
896 			pgidx_start = sparsebit_next_set_num(
897 				vm->vpages_valid, pgidx_start, pages);
898 			if (pgidx_start == 0)
899 				goto no_va_found;
900 		}
901 	} while (pgidx_start != 0);
902 
903 no_va_found:
904 	TEST_ASSERT(false, "No vaddr of specified pages available, "
905 		"pages: 0x%lx", pages);
906 
907 	/* NOT REACHED */
908 	return -1;
909 
910 va_found:
911 	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
912 		pgidx_start, pages),
913 		"Unexpected, invalid virtual page index range,\n"
914 		"  pgidx_start: 0x%lx\n"
915 		"  pages: 0x%lx",
916 		pgidx_start, pages);
917 	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
918 		pgidx_start, pages),
919 		"Unexpected, pages already mapped,\n"
920 		"  pgidx_start: 0x%lx\n"
921 		"  pages: 0x%lx",
922 		pgidx_start, pages);
923 
924 	return pgidx_start * vm->page_size;
925 }
926 
927 /*
928  * VM Virtual Address Allocate
929  *
930  * Input Args:
931  *   vm - Virtual Machine
932  *   sz - Size in bytes
933  *   vaddr_min - Minimum starting virtual address
934  *   data_memslot - Memory region slot for data pages
935  *   pgd_memslot - Memory region slot for new virtual translation tables
936  *
937  * Output Args: None
938  *
939  * Return:
940  *   Starting guest virtual address
941  *
942  * Allocates at least sz bytes within the virtual address space of the vm
943  * given by vm.  The allocated bytes are mapped to a virtual address >=
944  * the address given by vaddr_min.  Note that each allocation uses a
945  * a unique set of pages, with the minimum real allocation being at least
946  * a page.
947  */
948 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
949 			  uint32_t data_memslot, uint32_t pgd_memslot)
950 {
951 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
952 
953 	virt_pgd_alloc(vm, pgd_memslot);
954 
955 	/*
956 	 * Find an unused range of virtual page addresses of at least
957 	 * pages in length.
958 	 */
959 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
960 
961 	/* Map the virtual pages. */
962 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
963 		pages--, vaddr += vm->page_size) {
964 		vm_paddr_t paddr;
965 
966 		paddr = vm_phy_page_alloc(vm,
967 				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
968 
969 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
970 
971 		sparsebit_set(vm->vpages_mapped,
972 			vaddr >> vm->page_shift);
973 	}
974 
975 	return vaddr_start;
976 }
977 
978 /*
979  * Map a range of VM virtual address to the VM's physical address
980  *
981  * Input Args:
982  *   vm - Virtual Machine
983  *   vaddr - Virtuall address to map
984  *   paddr - VM Physical Address
985  *   size - The size of the range to map
986  *   pgd_memslot - Memory region slot for new virtual translation tables
987  *
988  * Output Args: None
989  *
990  * Return: None
991  *
992  * Within the VM given by vm, creates a virtual translation for the
993  * page range starting at vaddr to the page range starting at paddr.
994  */
995 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
996 	      size_t size, uint32_t pgd_memslot)
997 {
998 	size_t page_size = vm->page_size;
999 	size_t npages = size / page_size;
1000 
1001 	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1002 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1003 
1004 	while (npages--) {
1005 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
1006 		vaddr += page_size;
1007 		paddr += page_size;
1008 	}
1009 }
1010 
1011 /*
1012  * Address VM Physical to Host Virtual
1013  *
1014  * Input Args:
1015  *   vm - Virtual Machine
1016  *   gpa - VM physical address
1017  *
1018  * Output Args: None
1019  *
1020  * Return:
1021  *   Equivalent host virtual address
1022  *
1023  * Locates the memory region containing the VM physical address given
1024  * by gpa, within the VM given by vm.  When found, the host virtual
1025  * address providing the memory to the vm physical address is returned.
1026  * A TEST_ASSERT failure occurs if no region containing gpa exists.
1027  */
1028 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1029 {
1030 	struct userspace_mem_region *region;
1031 	for (region = vm->userspace_mem_region_head; region;
1032 	     region = region->next) {
1033 		if ((gpa >= region->region.guest_phys_addr)
1034 			&& (gpa <= (region->region.guest_phys_addr
1035 				+ region->region.memory_size - 1)))
1036 			return (void *) ((uintptr_t) region->host_mem
1037 				+ (gpa - region->region.guest_phys_addr));
1038 	}
1039 
1040 	TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
1041 	return NULL;
1042 }
1043 
1044 /*
1045  * Address Host Virtual to VM Physical
1046  *
1047  * Input Args:
1048  *   vm - Virtual Machine
1049  *   hva - Host virtual address
1050  *
1051  * Output Args: None
1052  *
1053  * Return:
1054  *   Equivalent VM physical address
1055  *
1056  * Locates the memory region containing the host virtual address given
1057  * by hva, within the VM given by vm.  When found, the equivalent
1058  * VM physical address is returned. A TEST_ASSERT failure occurs if no
1059  * region containing hva exists.
1060  */
1061 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1062 {
1063 	struct userspace_mem_region *region;
1064 	for (region = vm->userspace_mem_region_head; region;
1065 	     region = region->next) {
1066 		if ((hva >= region->host_mem)
1067 			&& (hva <= (region->host_mem
1068 				+ region->region.memory_size - 1)))
1069 			return (vm_paddr_t) ((uintptr_t)
1070 				region->region.guest_phys_addr
1071 				+ (hva - (uintptr_t) region->host_mem));
1072 	}
1073 
1074 	TEST_ASSERT(false, "No mapping to a guest physical address, "
1075 		"hva: %p", hva);
1076 	return -1;
1077 }
1078 
1079 /*
1080  * VM Create IRQ Chip
1081  *
1082  * Input Args:
1083  *   vm - Virtual Machine
1084  *
1085  * Output Args: None
1086  *
1087  * Return: None
1088  *
1089  * Creates an interrupt controller chip for the VM specified by vm.
1090  */
1091 void vm_create_irqchip(struct kvm_vm *vm)
1092 {
1093 	int ret;
1094 
1095 	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1096 	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1097 		"rc: %i errno: %i", ret, errno);
1098 
1099 	vm->has_irqchip = true;
1100 }
1101 
1102 /*
1103  * VM VCPU State
1104  *
1105  * Input Args:
1106  *   vm - Virtual Machine
1107  *   vcpuid - VCPU ID
1108  *
1109  * Output Args: None
1110  *
1111  * Return:
1112  *   Pointer to structure that describes the state of the VCPU.
1113  *
1114  * Locates and returns a pointer to a structure that describes the
1115  * state of the VCPU with the given vcpuid.
1116  */
1117 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1118 {
1119 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1120 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1121 
1122 	return vcpu->state;
1123 }
1124 
1125 /*
1126  * VM VCPU Run
1127  *
1128  * Input Args:
1129  *   vm - Virtual Machine
1130  *   vcpuid - VCPU ID
1131  *
1132  * Output Args: None
1133  *
1134  * Return: None
1135  *
1136  * Switch to executing the code for the VCPU given by vcpuid, within the VM
1137  * given by vm.
1138  */
1139 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1140 {
1141 	int ret = _vcpu_run(vm, vcpuid);
1142 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1143 		"rc: %i errno: %i", ret, errno);
1144 }
1145 
1146 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1147 {
1148 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1149 	int rc;
1150 
1151 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1152 	do {
1153 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1154 	} while (rc == -1 && errno == EINTR);
1155 	return rc;
1156 }
1157 
1158 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1159 {
1160 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1161 	int ret;
1162 
1163 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1164 
1165 	vcpu->state->immediate_exit = 1;
1166 	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1167 	vcpu->state->immediate_exit = 0;
1168 
1169 	TEST_ASSERT(ret == -1 && errno == EINTR,
1170 		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1171 		    ret, errno);
1172 }
1173 
1174 /*
1175  * VM VCPU Set MP State
1176  *
1177  * Input Args:
1178  *   vm - Virtual Machine
1179  *   vcpuid - VCPU ID
1180  *   mp_state - mp_state to be set
1181  *
1182  * Output Args: None
1183  *
1184  * Return: None
1185  *
1186  * Sets the MP state of the VCPU given by vcpuid, to the state given
1187  * by mp_state.
1188  */
1189 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1190 		       struct kvm_mp_state *mp_state)
1191 {
1192 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1193 	int ret;
1194 
1195 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1196 
1197 	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1198 	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1199 		"rc: %i errno: %i", ret, errno);
1200 }
1201 
1202 /*
1203  * VM VCPU Regs Get
1204  *
1205  * Input Args:
1206  *   vm - Virtual Machine
1207  *   vcpuid - VCPU ID
1208  *
1209  * Output Args:
1210  *   regs - current state of VCPU regs
1211  *
1212  * Return: None
1213  *
1214  * Obtains the current register state for the VCPU specified by vcpuid
1215  * and stores it at the location given by regs.
1216  */
1217 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1218 {
1219 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1220 	int ret;
1221 
1222 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1223 
1224 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1225 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1226 		ret, errno);
1227 }
1228 
1229 /*
1230  * VM VCPU Regs Set
1231  *
1232  * Input Args:
1233  *   vm - Virtual Machine
1234  *   vcpuid - VCPU ID
1235  *   regs - Values to set VCPU regs to
1236  *
1237  * Output Args: None
1238  *
1239  * Return: None
1240  *
1241  * Sets the regs of the VCPU specified by vcpuid to the values
1242  * given by regs.
1243  */
1244 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1245 {
1246 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1247 	int ret;
1248 
1249 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1250 
1251 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1252 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1253 		ret, errno);
1254 }
1255 
1256 #ifdef __KVM_HAVE_VCPU_EVENTS
1257 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1258 		     struct kvm_vcpu_events *events)
1259 {
1260 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1261 	int ret;
1262 
1263 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1264 
1265 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1266 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1267 		ret, errno);
1268 }
1269 
1270 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1271 		     struct kvm_vcpu_events *events)
1272 {
1273 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1274 	int ret;
1275 
1276 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1277 
1278 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1279 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1280 		ret, errno);
1281 }
1282 #endif
1283 
1284 #ifdef __x86_64__
1285 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1286 			   struct kvm_nested_state *state)
1287 {
1288 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1289 	int ret;
1290 
1291 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1292 
1293 	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1294 	TEST_ASSERT(ret == 0,
1295 		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1296 		ret, errno);
1297 }
1298 
1299 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1300 			  struct kvm_nested_state *state, bool ignore_error)
1301 {
1302 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1303 	int ret;
1304 
1305 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1306 
1307 	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1308 	if (!ignore_error) {
1309 		TEST_ASSERT(ret == 0,
1310 			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1311 			ret, errno);
1312 	}
1313 
1314 	return ret;
1315 }
1316 #endif
1317 
1318 /*
1319  * VM VCPU System Regs Get
1320  *
1321  * Input Args:
1322  *   vm - Virtual Machine
1323  *   vcpuid - VCPU ID
1324  *
1325  * Output Args:
1326  *   sregs - current state of VCPU system regs
1327  *
1328  * Return: None
1329  *
1330  * Obtains the current system register state for the VCPU specified by
1331  * vcpuid and stores it at the location given by sregs.
1332  */
1333 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1334 {
1335 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1336 	int ret;
1337 
1338 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1339 
1340 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1341 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1342 		ret, errno);
1343 }
1344 
1345 /*
1346  * VM VCPU System Regs Set
1347  *
1348  * Input Args:
1349  *   vm - Virtual Machine
1350  *   vcpuid - VCPU ID
1351  *   sregs - Values to set VCPU system regs to
1352  *
1353  * Output Args: None
1354  *
1355  * Return: None
1356  *
1357  * Sets the system regs of the VCPU specified by vcpuid to the values
1358  * given by sregs.
1359  */
1360 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1361 {
1362 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1363 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1364 		"rc: %i errno: %i", ret, errno);
1365 }
1366 
1367 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1368 {
1369 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1370 
1371 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1372 
1373 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1374 }
1375 
1376 /*
1377  * VCPU Ioctl
1378  *
1379  * Input Args:
1380  *   vm - Virtual Machine
1381  *   vcpuid - VCPU ID
1382  *   cmd - Ioctl number
1383  *   arg - Argument to pass to the ioctl
1384  *
1385  * Return: None
1386  *
1387  * Issues an arbitrary ioctl on a VCPU fd.
1388  */
1389 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1390 		unsigned long cmd, void *arg)
1391 {
1392 	int ret;
1393 
1394 	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1395 	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1396 		cmd, ret, errno, strerror(errno));
1397 }
1398 
1399 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1400 		unsigned long cmd, void *arg)
1401 {
1402 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1403 	int ret;
1404 
1405 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1406 
1407 	ret = ioctl(vcpu->fd, cmd, arg);
1408 
1409 	return ret;
1410 }
1411 
1412 /*
1413  * VM Ioctl
1414  *
1415  * Input Args:
1416  *   vm - Virtual Machine
1417  *   cmd - Ioctl number
1418  *   arg - Argument to pass to the ioctl
1419  *
1420  * Return: None
1421  *
1422  * Issues an arbitrary ioctl on a VM fd.
1423  */
1424 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1425 {
1426 	int ret;
1427 
1428 	ret = ioctl(vm->fd, cmd, arg);
1429 	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1430 		cmd, ret, errno, strerror(errno));
1431 }
1432 
1433 /*
1434  * VM Dump
1435  *
1436  * Input Args:
1437  *   vm - Virtual Machine
1438  *   indent - Left margin indent amount
1439  *
1440  * Output Args:
1441  *   stream - Output FILE stream
1442  *
1443  * Return: None
1444  *
1445  * Dumps the current state of the VM given by vm, to the FILE stream
1446  * given by stream.
1447  */
1448 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1449 {
1450 	struct userspace_mem_region *region;
1451 	struct vcpu *vcpu;
1452 
1453 	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1454 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1455 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1456 	fprintf(stream, "%*sMem Regions:\n", indent, "");
1457 	for (region = vm->userspace_mem_region_head; region;
1458 		region = region->next) {
1459 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1460 			"host_virt: %p\n", indent + 2, "",
1461 			(uint64_t) region->region.guest_phys_addr,
1462 			(uint64_t) region->region.memory_size,
1463 			region->host_mem);
1464 		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1465 		sparsebit_dump(stream, region->unused_phy_pages, 0);
1466 	}
1467 	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1468 	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1469 	fprintf(stream, "%*spgd_created: %u\n", indent, "",
1470 		vm->pgd_created);
1471 	if (vm->pgd_created) {
1472 		fprintf(stream, "%*sVirtual Translation Tables:\n",
1473 			indent + 2, "");
1474 		virt_dump(stream, vm, indent + 4);
1475 	}
1476 	fprintf(stream, "%*sVCPUs:\n", indent, "");
1477 	for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
1478 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
1479 }
1480 
1481 /* Known KVM exit reasons */
1482 static struct exit_reason {
1483 	unsigned int reason;
1484 	const char *name;
1485 } exit_reasons_known[] = {
1486 	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
1487 	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
1488 	{KVM_EXIT_IO, "IO"},
1489 	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
1490 	{KVM_EXIT_DEBUG, "DEBUG"},
1491 	{KVM_EXIT_HLT, "HLT"},
1492 	{KVM_EXIT_MMIO, "MMIO"},
1493 	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
1494 	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
1495 	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
1496 	{KVM_EXIT_INTR, "INTR"},
1497 	{KVM_EXIT_SET_TPR, "SET_TPR"},
1498 	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
1499 	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
1500 	{KVM_EXIT_S390_RESET, "S390_RESET"},
1501 	{KVM_EXIT_DCR, "DCR"},
1502 	{KVM_EXIT_NMI, "NMI"},
1503 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
1504 	{KVM_EXIT_OSI, "OSI"},
1505 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
1506 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
1507 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
1508 #endif
1509 };
1510 
1511 /*
1512  * Exit Reason String
1513  *
1514  * Input Args:
1515  *   exit_reason - Exit reason
1516  *
1517  * Output Args: None
1518  *
1519  * Return:
1520  *   Constant string pointer describing the exit reason.
1521  *
1522  * Locates and returns a constant string that describes the KVM exit
1523  * reason given by exit_reason.  If no such string is found, a constant
1524  * string of "Unknown" is returned.
1525  */
1526 const char *exit_reason_str(unsigned int exit_reason)
1527 {
1528 	unsigned int n1;
1529 
1530 	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
1531 		if (exit_reason == exit_reasons_known[n1].reason)
1532 			return exit_reasons_known[n1].name;
1533 	}
1534 
1535 	return "Unknown";
1536 }
1537 
1538 /*
1539  * Physical Contiguous Page Allocator
1540  *
1541  * Input Args:
1542  *   vm - Virtual Machine
1543  *   num - number of pages
1544  *   paddr_min - Physical address minimum
1545  *   memslot - Memory region to allocate page from
1546  *
1547  * Output Args: None
1548  *
1549  * Return:
1550  *   Starting physical address
1551  *
1552  * Within the VM specified by vm, locates a range of available physical
1553  * pages at or above paddr_min. If found, the pages are marked as in use
1554  * and their base address is returned. A TEST_ASSERT failure occurs if
1555  * not enough pages are available at or above paddr_min.
1556  */
1557 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
1558 			      vm_paddr_t paddr_min, uint32_t memslot)
1559 {
1560 	struct userspace_mem_region *region;
1561 	sparsebit_idx_t pg, base;
1562 
1563 	TEST_ASSERT(num > 0, "Must allocate at least one page");
1564 
1565 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1566 		"not divisible by page size.\n"
1567 		"  paddr_min: 0x%lx page_size: 0x%x",
1568 		paddr_min, vm->page_size);
1569 
1570 	region = memslot2region(vm, memslot);
1571 	base = pg = paddr_min >> vm->page_shift;
1572 
1573 	do {
1574 		for (; pg < base + num; ++pg) {
1575 			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
1576 				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
1577 				break;
1578 			}
1579 		}
1580 	} while (pg && pg != base + num);
1581 
1582 	if (pg == 0) {
1583 		fprintf(stderr, "No guest physical page available, "
1584 			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
1585 			paddr_min, vm->page_size, memslot);
1586 		fputs("---- vm dump ----\n", stderr);
1587 		vm_dump(stderr, vm, 2);
1588 		abort();
1589 	}
1590 
1591 	for (pg = base; pg < base + num; ++pg)
1592 		sparsebit_clear(region->unused_phy_pages, pg);
1593 
1594 	return base * vm->page_size;
1595 }
1596 
1597 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
1598 			     uint32_t memslot)
1599 {
1600 	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
1601 }
1602 
1603 /*
1604  * Address Guest Virtual to Host Virtual
1605  *
1606  * Input Args:
1607  *   vm - Virtual Machine
1608  *   gva - VM virtual address
1609  *
1610  * Output Args: None
1611  *
1612  * Return:
1613  *   Equivalent host virtual address
1614  */
1615 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
1616 {
1617 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
1618 }
1619 
1620 /*
1621  * Is Unrestricted Guest
1622  *
1623  * Input Args:
1624  *   vm - Virtual Machine
1625  *
1626  * Output Args: None
1627  *
1628  * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
1629  *
1630  * Check if the unrestricted guest flag is enabled.
1631  */
1632 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
1633 {
1634 	char val = 'N';
1635 	size_t count;
1636 	FILE *f;
1637 
1638 	if (vm == NULL) {
1639 		/* Ensure that the KVM vendor-specific module is loaded. */
1640 		f = fopen(KVM_DEV_PATH, "r");
1641 		TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
1642 			    errno);
1643 		fclose(f);
1644 	}
1645 
1646 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
1647 	if (f) {
1648 		count = fread(&val, sizeof(char), 1, f);
1649 		TEST_ASSERT(count == 1, "Unable to read from param file.");
1650 		fclose(f);
1651 	}
1652 
1653 	return val == 'Y';
1654 }
1655 
1656 unsigned int vm_get_page_size(struct kvm_vm *vm)
1657 {
1658 	return vm->page_size;
1659 }
1660 
1661 unsigned int vm_get_page_shift(struct kvm_vm *vm)
1662 {
1663 	return vm->page_shift;
1664 }
1665 
1666 unsigned int vm_get_max_gfn(struct kvm_vm *vm)
1667 {
1668 	return vm->max_gfn;
1669 }
1670