xref: /linux/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c (revision f4b0c4b508364fde023e4f7b9f23f7e38c663dfe)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2022, Google LLC.
4  */
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <pthread.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/ioctl.h>
14 
15 #include <linux/compiler.h>
16 #include <linux/kernel.h>
17 #include <linux/kvm_para.h>
18 #include <linux/memfd.h>
19 #include <linux/sizes.h>
20 
21 #include <test_util.h>
22 #include <kvm_util.h>
23 #include <processor.h>
24 
25 #define BASE_DATA_SLOT		10
26 #define BASE_DATA_GPA		((uint64_t)(1ull << 32))
27 #define PER_CPU_DATA_SIZE	((uint64_t)(SZ_2M + PAGE_SIZE))
28 
29 /* Horrific macro so that the line info is captured accurately :-( */
30 #define memcmp_g(gpa, pattern,  size)								\
31 do {												\
32 	uint8_t *mem = (uint8_t *)gpa;								\
33 	size_t i;										\
34 												\
35 	for (i = 0; i < size; i++)								\
36 		__GUEST_ASSERT(mem[i] == pattern,						\
37 			       "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",	\
38 			       pattern, i, gpa + i, mem[i]);					\
39 } while (0)
40 
memcmp_h(uint8_t * mem,uint64_t gpa,uint8_t pattern,size_t size)41 static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
42 {
43 	size_t i;
44 
45 	for (i = 0; i < size; i++)
46 		TEST_ASSERT(mem[i] == pattern,
47 			    "Host expected 0x%x at gpa 0x%lx, got 0x%x",
48 			    pattern, gpa + i, mem[i]);
49 }
50 
51 /*
52  * Run memory conversion tests with explicit conversion:
53  * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
54  * to back/unback private memory. Subsequent accesses by guest to the gpa range
55  * will not cause exit to userspace.
56  *
57  * Test memory conversion scenarios with following steps:
58  * 1) Access private memory using private access and verify that memory contents
59  *   are not visible to userspace.
60  * 2) Convert memory to shared using explicit conversions and ensure that
61  *   userspace is able to access the shared regions.
62  * 3) Convert memory back to private using explicit conversions and ensure that
63  *   userspace is again not able to access converted private regions.
64  */
65 
66 #define GUEST_STAGE(o, s) { .offset = o, .size = s }
67 
68 enum ucall_syncs {
69 	SYNC_SHARED,
70 	SYNC_PRIVATE,
71 };
72 
guest_sync_shared(uint64_t gpa,uint64_t size,uint8_t current_pattern,uint8_t new_pattern)73 static void guest_sync_shared(uint64_t gpa, uint64_t size,
74 			      uint8_t current_pattern, uint8_t new_pattern)
75 {
76 	GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
77 }
78 
guest_sync_private(uint64_t gpa,uint64_t size,uint8_t pattern)79 static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
80 {
81 	GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
82 }
83 
84 /* Arbitrary values, KVM doesn't care about the attribute flags. */
85 #define MAP_GPA_SET_ATTRIBUTES	BIT(0)
86 #define MAP_GPA_SHARED		BIT(1)
87 #define MAP_GPA_DO_FALLOCATE	BIT(2)
88 
guest_map_mem(uint64_t gpa,uint64_t size,bool map_shared,bool do_fallocate)89 static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
90 			  bool do_fallocate)
91 {
92 	uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
93 
94 	if (map_shared)
95 		flags |= MAP_GPA_SHARED;
96 	if (do_fallocate)
97 		flags |= MAP_GPA_DO_FALLOCATE;
98 	kvm_hypercall_map_gpa_range(gpa, size, flags);
99 }
100 
guest_map_shared(uint64_t gpa,uint64_t size,bool do_fallocate)101 static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
102 {
103 	guest_map_mem(gpa, size, true, do_fallocate);
104 }
105 
guest_map_private(uint64_t gpa,uint64_t size,bool do_fallocate)106 static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
107 {
108 	guest_map_mem(gpa, size, false, do_fallocate);
109 }
110 
111 struct {
112 	uint64_t offset;
113 	uint64_t size;
114 } static const test_ranges[] = {
115 	GUEST_STAGE(0, PAGE_SIZE),
116 	GUEST_STAGE(0, SZ_2M),
117 	GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
118 	GUEST_STAGE(PAGE_SIZE, SZ_2M),
119 	GUEST_STAGE(SZ_2M, PAGE_SIZE),
120 };
121 
guest_test_explicit_conversion(uint64_t base_gpa,bool do_fallocate)122 static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
123 {
124 	const uint8_t def_p = 0xaa;
125 	const uint8_t init_p = 0xcc;
126 	uint64_t j;
127 	int i;
128 
129 	/* Memory should be shared by default. */
130 	memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
131 	memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
132 	guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
133 
134 	memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
135 
136 	for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
137 		uint64_t gpa = base_gpa + test_ranges[i].offset;
138 		uint64_t size = test_ranges[i].size;
139 		uint8_t p1 = 0x11;
140 		uint8_t p2 = 0x22;
141 		uint8_t p3 = 0x33;
142 		uint8_t p4 = 0x44;
143 
144 		/*
145 		 * Set the test region to pattern one to differentiate it from
146 		 * the data range as a whole (contains the initial pattern).
147 		 */
148 		memset((void *)gpa, p1, size);
149 
150 		/*
151 		 * Convert to private, set and verify the private data, and
152 		 * then verify that the rest of the data (map shared) still
153 		 * holds the initial pattern, and that the host always sees the
154 		 * shared memory (initial pattern).  Unlike shared memory,
155 		 * punching a hole in private memory is destructive, i.e.
156 		 * previous values aren't guaranteed to be preserved.
157 		 */
158 		guest_map_private(gpa, size, do_fallocate);
159 
160 		if (size > PAGE_SIZE) {
161 			memset((void *)gpa, p2, PAGE_SIZE);
162 			goto skip;
163 		}
164 
165 		memset((void *)gpa, p2, size);
166 		guest_sync_private(gpa, size, p1);
167 
168 		/*
169 		 * Verify that the private memory was set to pattern two, and
170 		 * that shared memory still holds the initial pattern.
171 		 */
172 		memcmp_g(gpa, p2, size);
173 		if (gpa > base_gpa)
174 			memcmp_g(base_gpa, init_p, gpa - base_gpa);
175 		if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
176 			memcmp_g(gpa + size, init_p,
177 				 (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
178 
179 		/*
180 		 * Convert odd-number page frames back to shared to verify KVM
181 		 * also correctly handles holes in private ranges.
182 		 */
183 		for (j = 0; j < size; j += PAGE_SIZE) {
184 			if ((j >> PAGE_SHIFT) & 1) {
185 				guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
186 				guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
187 
188 				memcmp_g(gpa + j, p3, PAGE_SIZE);
189 			} else {
190 				guest_sync_private(gpa + j, PAGE_SIZE, p1);
191 			}
192 		}
193 
194 skip:
195 		/*
196 		 * Convert the entire region back to shared, explicitly write
197 		 * pattern three to fill in the even-number frames before
198 		 * asking the host to verify (and write pattern four).
199 		 */
200 		guest_map_shared(gpa, size, do_fallocate);
201 		memset((void *)gpa, p3, size);
202 		guest_sync_shared(gpa, size, p3, p4);
203 		memcmp_g(gpa, p4, size);
204 
205 		/* Reset the shared memory back to the initial pattern. */
206 		memset((void *)gpa, init_p, size);
207 
208 		/*
209 		 * Free (via PUNCH_HOLE) *all* private memory so that the next
210 		 * iteration starts from a clean slate, e.g. with respect to
211 		 * whether or not there are pages/folios in guest_mem.
212 		 */
213 		guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
214 	}
215 }
216 
guest_punch_hole(uint64_t gpa,uint64_t size)217 static void guest_punch_hole(uint64_t gpa, uint64_t size)
218 {
219 	/* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
220 	uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
221 
222 	kvm_hypercall_map_gpa_range(gpa, size, flags);
223 }
224 
225 /*
226  * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
227  * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
228  * (subsequent fault) should zero memory.
229  */
guest_test_punch_hole(uint64_t base_gpa,bool precise)230 static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
231 {
232 	const uint8_t init_p = 0xcc;
233 	int i;
234 
235 	/*
236 	 * Convert the entire range to private, this testcase is all about
237 	 * punching holes in guest_memfd, i.e. shared mappings aren't needed.
238 	 */
239 	guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
240 
241 	for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
242 		uint64_t gpa = base_gpa + test_ranges[i].offset;
243 		uint64_t size = test_ranges[i].size;
244 
245 		/*
246 		 * Free all memory before each iteration, even for the !precise
247 		 * case where the memory will be faulted back in.  Freeing and
248 		 * reallocating should obviously work, and freeing all memory
249 		 * minimizes the probability of cross-testcase influence.
250 		 */
251 		guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
252 
253 		/* Fault-in and initialize memory, and verify the pattern. */
254 		if (precise) {
255 			memset((void *)gpa, init_p, size);
256 			memcmp_g(gpa, init_p, size);
257 		} else {
258 			memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
259 			memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
260 		}
261 
262 		/*
263 		 * Punch a hole at the target range and verify that reads from
264 		 * the guest succeed and return zeroes.
265 		 */
266 		guest_punch_hole(gpa, size);
267 		memcmp_g(gpa, 0, size);
268 	}
269 }
270 
guest_code(uint64_t base_gpa)271 static void guest_code(uint64_t base_gpa)
272 {
273 	/*
274 	 * Run the conversion test twice, with and without doing fallocate() on
275 	 * the guest_memfd backing when converting between shared and private.
276 	 */
277 	guest_test_explicit_conversion(base_gpa, false);
278 	guest_test_explicit_conversion(base_gpa, true);
279 
280 	/*
281 	 * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
282 	 * faulted in, once with only the target range faulted in.
283 	 */
284 	guest_test_punch_hole(base_gpa, false);
285 	guest_test_punch_hole(base_gpa, true);
286 	GUEST_DONE();
287 }
288 
handle_exit_hypercall(struct kvm_vcpu * vcpu)289 static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
290 {
291 	struct kvm_run *run = vcpu->run;
292 	uint64_t gpa = run->hypercall.args[0];
293 	uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
294 	bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
295 	bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
296 	bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
297 	struct kvm_vm *vm = vcpu->vm;
298 
299 	TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
300 		    "Wanted MAP_GPA_RANGE (%u), got '%llu'",
301 		    KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
302 
303 	if (do_fallocate)
304 		vm_guest_mem_fallocate(vm, gpa, size, map_shared);
305 
306 	if (set_attributes)
307 		vm_set_memory_attributes(vm, gpa, size,
308 					 map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
309 	run->hypercall.ret = 0;
310 }
311 
312 static bool run_vcpus;
313 
__test_mem_conversions(void * __vcpu)314 static void *__test_mem_conversions(void *__vcpu)
315 {
316 	struct kvm_vcpu *vcpu = __vcpu;
317 	struct kvm_run *run = vcpu->run;
318 	struct kvm_vm *vm = vcpu->vm;
319 	struct ucall uc;
320 
321 	while (!READ_ONCE(run_vcpus))
322 		;
323 
324 	for ( ;; ) {
325 		vcpu_run(vcpu);
326 
327 		if (run->exit_reason == KVM_EXIT_HYPERCALL) {
328 			handle_exit_hypercall(vcpu);
329 			continue;
330 		}
331 
332 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
333 			    "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
334 			    run->exit_reason, exit_reason_str(run->exit_reason));
335 
336 		switch (get_ucall(vcpu, &uc)) {
337 		case UCALL_ABORT:
338 			REPORT_GUEST_ASSERT(uc);
339 		case UCALL_SYNC: {
340 			uint64_t gpa  = uc.args[1];
341 			size_t size = uc.args[2];
342 			size_t i;
343 
344 			TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
345 				    uc.args[0] == SYNC_PRIVATE,
346 				    "Unknown sync command '%ld'", uc.args[0]);
347 
348 			for (i = 0; i < size; i += vm->page_size) {
349 				size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
350 				uint8_t *hva = addr_gpa2hva(vm, gpa + i);
351 
352 				/* In all cases, the host should observe the shared data. */
353 				memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
354 
355 				/* For shared, write the new pattern to guest memory. */
356 				if (uc.args[0] == SYNC_SHARED)
357 					memset(hva, uc.args[4], nr_bytes);
358 			}
359 			break;
360 		}
361 		case UCALL_DONE:
362 			return NULL;
363 		default:
364 			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
365 		}
366 	}
367 }
368 
test_mem_conversions(enum vm_mem_backing_src_type src_type,uint32_t nr_vcpus,uint32_t nr_memslots)369 static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
370 				 uint32_t nr_memslots)
371 {
372 	/*
373 	 * Allocate enough memory so that each vCPU's chunk of memory can be
374 	 * naturally aligned with respect to the size of the backing store.
375 	 */
376 	const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
377 	const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
378 	const size_t memfd_size = per_cpu_size * nr_vcpus;
379 	const size_t slot_size = memfd_size / nr_memslots;
380 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
381 	pthread_t threads[KVM_MAX_VCPUS];
382 	struct kvm_vm *vm;
383 	int memfd, i, r;
384 
385 	const struct vm_shape shape = {
386 		.mode = VM_MODE_DEFAULT,
387 		.type = KVM_X86_SW_PROTECTED_VM,
388 	};
389 
390 	TEST_ASSERT(slot_size * nr_memslots == memfd_size,
391 		    "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
392 		    memfd_size, nr_memslots);
393 	vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
394 
395 	vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
396 
397 	memfd = vm_create_guest_memfd(vm, memfd_size, 0);
398 
399 	for (i = 0; i < nr_memslots; i++)
400 		vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
401 			   BASE_DATA_SLOT + i, slot_size / vm->page_size,
402 			   KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
403 
404 	for (i = 0; i < nr_vcpus; i++) {
405 		uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
406 
407 		vcpu_args_set(vcpus[i], 1, gpa);
408 
409 		/*
410 		 * Map only what is needed so that an out-of-bounds access
411 		 * results #PF => SHUTDOWN instead of data corruption.
412 		 */
413 		virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
414 
415 		pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
416 	}
417 
418 	WRITE_ONCE(run_vcpus, true);
419 
420 	for (i = 0; i < nr_vcpus; i++)
421 		pthread_join(threads[i], NULL);
422 
423 	kvm_vm_free(vm);
424 
425 	/*
426 	 * Allocate and free memory from the guest_memfd after closing the VM
427 	 * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
428 	 * should prevent the VM from being fully destroyed until the last
429 	 * reference to the guest_memfd is also put.
430 	 */
431 	r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
432 	TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
433 
434 	r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
435 	TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
436 
437 	close(memfd);
438 }
439 
usage(const char * cmd)440 static void usage(const char *cmd)
441 {
442 	puts("");
443 	printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
444 	puts("");
445 	backing_src_help("-s");
446 	puts("");
447 	puts(" -n: specify the number of vcpus (default: 1)");
448 	puts("");
449 	puts(" -m: specify the number of memslots (default: 1)");
450 	puts("");
451 }
452 
main(int argc,char * argv[])453 int main(int argc, char *argv[])
454 {
455 	enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
456 	uint32_t nr_memslots = 1;
457 	uint32_t nr_vcpus = 1;
458 	int opt;
459 
460 	TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
461 
462 	while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
463 		switch (opt) {
464 		case 's':
465 			src_type = parse_backing_src_type(optarg);
466 			break;
467 		case 'n':
468 			nr_vcpus = atoi_positive("nr_vcpus", optarg);
469 			break;
470 		case 'm':
471 			nr_memslots = atoi_positive("nr_memslots", optarg);
472 			break;
473 		case 'h':
474 		default:
475 			usage(argv[0]);
476 			exit(0);
477 		}
478 	}
479 
480 	test_mem_conversions(src_type, nr_vcpus, nr_memslots);
481 
482 	return 0;
483 }
484