1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024, Intel, Inc 4 * 5 * Author: 6 * Isaku Yamahata <isaku.yamahata at gmail.com> 7 */ 8 #include <linux/sizes.h> 9 10 #include <test_util.h> 11 #include <kvm_util.h> 12 #include <processor.h> 13 #include <pthread.h> 14 15 /* Arbitrarily chosen values */ 16 #define TEST_SIZE (SZ_2M + PAGE_SIZE) 17 #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) 18 #define TEST_SLOT 10 19 20 static void guest_code(uint64_t base_gpa) 21 { 22 volatile uint64_t val __used; 23 int i; 24 25 for (i = 0; i < TEST_NPAGES; i++) { 26 uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE); 27 28 val = *src; 29 } 30 31 GUEST_DONE(); 32 } 33 34 struct slot_worker_data { 35 struct kvm_vm *vm; 36 u64 gpa; 37 uint32_t flags; 38 bool worker_ready; 39 bool prefault_ready; 40 bool recreate_slot; 41 }; 42 43 static void *delete_slot_worker(void *__data) 44 { 45 struct slot_worker_data *data = __data; 46 struct kvm_vm *vm = data->vm; 47 48 WRITE_ONCE(data->worker_ready, true); 49 50 while (!READ_ONCE(data->prefault_ready)) 51 cpu_relax(); 52 53 vm_mem_region_delete(vm, TEST_SLOT); 54 55 while (!READ_ONCE(data->recreate_slot)) 56 cpu_relax(); 57 58 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, 59 TEST_SLOT, TEST_NPAGES, data->flags); 60 61 return NULL; 62 } 63 64 static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, 65 u64 size, u64 expected_left, bool private) 66 { 67 struct kvm_pre_fault_memory range = { 68 .gpa = base_gpa + offset, 69 .size = size, 70 .flags = 0, 71 }; 72 struct slot_worker_data data = { 73 .vm = vcpu->vm, 74 .gpa = base_gpa, 75 .flags = private ? KVM_MEM_GUEST_MEMFD : 0, 76 }; 77 bool slot_recreated = false; 78 pthread_t slot_worker; 79 int ret, save_errno; 80 u64 prev; 81 82 /* 83 * Concurrently delete (and recreate) the slot to test KVM's handling 84 * of a racing memslot deletion with prefaulting. 85 */ 86 pthread_create(&slot_worker, NULL, delete_slot_worker, &data); 87 88 while (!READ_ONCE(data.worker_ready)) 89 cpu_relax(); 90 91 WRITE_ONCE(data.prefault_ready, true); 92 93 for (;;) { 94 prev = range.size; 95 ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); 96 save_errno = errno; 97 TEST_ASSERT((range.size < prev) ^ (ret < 0), 98 "%sexpecting range.size to change on %s", 99 ret < 0 ? "not " : "", 100 ret < 0 ? "failure" : "success"); 101 102 /* 103 * Immediately retry prefaulting if KVM was interrupted by an 104 * unrelated signal/event. 105 */ 106 if (ret < 0 && save_errno == EINTR) 107 continue; 108 109 /* 110 * Tell the worker to recreate the slot in order to complete 111 * prefaulting (if prefault didn't already succeed before the 112 * slot was deleted) and/or to prepare for the next testcase. 113 * Wait for the worker to exit so that the next invocation of 114 * prefaulting is guaranteed to complete (assuming no KVM bugs). 115 */ 116 if (!slot_recreated) { 117 WRITE_ONCE(data.recreate_slot, true); 118 pthread_join(slot_worker, NULL); 119 slot_recreated = true; 120 121 /* 122 * Retry prefaulting to get a stable result, i.e. to 123 * avoid seeing random EAGAIN failures. Don't retry if 124 * prefaulting already succeeded, as KVM disallows 125 * prefaulting with size=0, i.e. blindly retrying would 126 * result in test failures due to EINVAL. KVM should 127 * always return success if all bytes are prefaulted, 128 * i.e. there is no need to guard against EAGAIN being 129 * returned. 130 */ 131 if (range.size) 132 continue; 133 } 134 135 /* 136 * All done if there are no remaining bytes to prefault, or if 137 * prefaulting failed (EINTR was handled above, and EAGAIN due 138 * to prefaulting a memslot that's being actively deleted should 139 * be impossible since the memslot has already been recreated). 140 */ 141 if (!range.size || ret < 0) 142 break; 143 } 144 145 TEST_ASSERT(range.size == expected_left, 146 "Completed with %llu bytes left, expected %lu", 147 range.size, expected_left); 148 149 /* 150 * Assert success if prefaulting the entire range should succeed, i.e. 151 * complete with no bytes remaining. Otherwise prefaulting should have 152 * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when 153 * no memslot exists). 154 */ 155 if (!expected_left) 156 TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); 157 else 158 TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, 159 KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); 160 } 161 162 static void __test_pre_fault_memory(unsigned long vm_type, bool private) 163 { 164 const struct vm_shape shape = { 165 .mode = VM_MODE_DEFAULT, 166 .type = vm_type, 167 }; 168 struct kvm_vcpu *vcpu; 169 struct kvm_run *run; 170 struct kvm_vm *vm; 171 struct ucall uc; 172 173 uint64_t guest_test_phys_mem; 174 uint64_t guest_test_virt_mem; 175 uint64_t alignment, guest_page_size; 176 177 vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); 178 179 alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; 180 guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size; 181 #ifdef __s390x__ 182 alignment = max(0x100000UL, guest_page_size); 183 #else 184 alignment = SZ_2M; 185 #endif 186 guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); 187 guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1); 188 189 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 190 guest_test_phys_mem, TEST_SLOT, TEST_NPAGES, 191 private ? KVM_MEM_GUEST_MEMFD : 0); 192 virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES); 193 194 if (private) 195 vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); 196 197 pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private); 198 pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); 199 pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); 200 201 vcpu_args_set(vcpu, 1, guest_test_virt_mem); 202 vcpu_run(vcpu); 203 204 run = vcpu->run; 205 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 206 "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", 207 run->exit_reason, exit_reason_str(run->exit_reason)); 208 209 switch (get_ucall(vcpu, &uc)) { 210 case UCALL_ABORT: 211 REPORT_GUEST_ASSERT(uc); 212 break; 213 case UCALL_DONE: 214 break; 215 default: 216 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 217 break; 218 } 219 220 kvm_vm_free(vm); 221 } 222 223 static void test_pre_fault_memory(unsigned long vm_type, bool private) 224 { 225 if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { 226 pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); 227 return; 228 } 229 230 __test_pre_fault_memory(vm_type, private); 231 } 232 233 int main(int argc, char *argv[]) 234 { 235 TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); 236 237 test_pre_fault_memory(0, false); 238 #ifdef __x86_64__ 239 test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); 240 test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); 241 #endif 242 return 0; 243 } 244