1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2024, Intel, Inc
4 *
5 * Author:
6 * Isaku Yamahata <isaku.yamahata at gmail.com>
7 */
8 #include <linux/sizes.h>
9
10 #include <test_util.h>
11 #include <kvm_util.h>
12 #include <processor.h>
13 #include <pthread.h>
14
15 /* Arbitrarily chosen values */
16 #define TEST_SIZE (SZ_2M + PAGE_SIZE)
17 #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
18 #define TEST_SLOT 10
19
guest_code(uint64_t base_gva)20 static void guest_code(uint64_t base_gva)
21 {
22 volatile uint64_t val __used;
23 int i;
24
25 for (i = 0; i < TEST_NPAGES; i++) {
26 uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
27
28 val = *src;
29 }
30
31 GUEST_DONE();
32 }
33
34 struct slot_worker_data {
35 struct kvm_vm *vm;
36 u64 gpa;
37 uint32_t flags;
38 bool worker_ready;
39 bool prefault_ready;
40 bool recreate_slot;
41 };
42
delete_slot_worker(void * __data)43 static void *delete_slot_worker(void *__data)
44 {
45 struct slot_worker_data *data = __data;
46 struct kvm_vm *vm = data->vm;
47
48 WRITE_ONCE(data->worker_ready, true);
49
50 while (!READ_ONCE(data->prefault_ready))
51 cpu_relax();
52
53 vm_mem_region_delete(vm, TEST_SLOT);
54
55 while (!READ_ONCE(data->recreate_slot))
56 cpu_relax();
57
58 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
59 TEST_SLOT, TEST_NPAGES, data->flags);
60
61 return NULL;
62 }
63
pre_fault_memory(struct kvm_vcpu * vcpu,u64 base_gpa,u64 offset,u64 size,u64 expected_left,bool private)64 static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
65 u64 size, u64 expected_left, bool private)
66 {
67 struct kvm_pre_fault_memory range = {
68 .gpa = base_gpa + offset,
69 .size = size,
70 .flags = 0,
71 };
72 struct slot_worker_data data = {
73 .vm = vcpu->vm,
74 .gpa = base_gpa,
75 .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
76 };
77 bool slot_recreated = false;
78 pthread_t slot_worker;
79 int ret, save_errno;
80 u64 prev;
81
82 /*
83 * Concurrently delete (and recreate) the slot to test KVM's handling
84 * of a racing memslot deletion with prefaulting.
85 */
86 pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
87
88 while (!READ_ONCE(data.worker_ready))
89 cpu_relax();
90
91 WRITE_ONCE(data.prefault_ready, true);
92
93 for (;;) {
94 prev = range.size;
95 ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
96 save_errno = errno;
97 TEST_ASSERT((range.size < prev) ^ (ret < 0),
98 "%sexpecting range.size to change on %s",
99 ret < 0 ? "not " : "",
100 ret < 0 ? "failure" : "success");
101
102 /*
103 * Immediately retry prefaulting if KVM was interrupted by an
104 * unrelated signal/event.
105 */
106 if (ret < 0 && save_errno == EINTR)
107 continue;
108
109 /*
110 * Tell the worker to recreate the slot in order to complete
111 * prefaulting (if prefault didn't already succeed before the
112 * slot was deleted) and/or to prepare for the next testcase.
113 * Wait for the worker to exit so that the next invocation of
114 * prefaulting is guaranteed to complete (assuming no KVM bugs).
115 */
116 if (!slot_recreated) {
117 WRITE_ONCE(data.recreate_slot, true);
118 pthread_join(slot_worker, NULL);
119 slot_recreated = true;
120
121 /*
122 * Retry prefaulting to get a stable result, i.e. to
123 * avoid seeing random EAGAIN failures. Don't retry if
124 * prefaulting already succeeded, as KVM disallows
125 * prefaulting with size=0, i.e. blindly retrying would
126 * result in test failures due to EINVAL. KVM should
127 * always return success if all bytes are prefaulted,
128 * i.e. there is no need to guard against EAGAIN being
129 * returned.
130 */
131 if (range.size)
132 continue;
133 }
134
135 /*
136 * All done if there are no remaining bytes to prefault, or if
137 * prefaulting failed (EINTR was handled above, and EAGAIN due
138 * to prefaulting a memslot that's being actively deleted should
139 * be impossible since the memslot has already been recreated).
140 */
141 if (!range.size || ret < 0)
142 break;
143 }
144
145 TEST_ASSERT(range.size == expected_left,
146 "Completed with %llu bytes left, expected %lu",
147 range.size, expected_left);
148
149 /*
150 * Assert success if prefaulting the entire range should succeed, i.e.
151 * complete with no bytes remaining. Otherwise prefaulting should have
152 * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
153 * no memslot exists).
154 */
155 if (!expected_left)
156 TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
157 else
158 TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
159 KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
160 }
161
__test_pre_fault_memory(unsigned long vm_type,bool private)162 static void __test_pre_fault_memory(unsigned long vm_type, bool private)
163 {
164 uint64_t gpa, gva, alignment, guest_page_size;
165 const struct vm_shape shape = {
166 .mode = VM_MODE_DEFAULT,
167 .type = vm_type,
168 };
169 struct kvm_vcpu *vcpu;
170 struct kvm_run *run;
171 struct kvm_vm *vm;
172 struct ucall uc;
173
174 vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
175
176 alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
177 gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
178 #ifdef __s390x__
179 alignment = max(0x100000UL, guest_page_size);
180 #else
181 alignment = SZ_2M;
182 #endif
183 gpa = align_down(gpa, alignment);
184 gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
185
186 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
187 TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
188 virt_map(vm, gva, gpa, TEST_NPAGES);
189
190 if (private)
191 vm_mem_set_private(vm, gpa, TEST_SIZE);
192
193 pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
194 pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
195 pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
196
197 vcpu_args_set(vcpu, 1, gva);
198 vcpu_run(vcpu);
199
200 run = vcpu->run;
201 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
202 "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
203 run->exit_reason, exit_reason_str(run->exit_reason));
204
205 switch (get_ucall(vcpu, &uc)) {
206 case UCALL_ABORT:
207 REPORT_GUEST_ASSERT(uc);
208 break;
209 case UCALL_DONE:
210 break;
211 default:
212 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
213 break;
214 }
215
216 kvm_vm_free(vm);
217 }
218
test_pre_fault_memory(unsigned long vm_type,bool private)219 static void test_pre_fault_memory(unsigned long vm_type, bool private)
220 {
221 if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) {
222 pr_info("Skipping tests for vm_type 0x%lx\n", vm_type);
223 return;
224 }
225
226 __test_pre_fault_memory(vm_type, private);
227 }
228
main(int argc,char * argv[])229 int main(int argc, char *argv[])
230 {
231 TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY));
232
233 test_pre_fault_memory(0, false);
234 #ifdef __x86_64__
235 test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false);
236 test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true);
237 #endif
238 return 0;
239 }
240