xref: /linux/tools/testing/selftests/kvm/guest_memfd_test.c (revision 51d90a15fedf8366cb96ef68d0ea2d0bf15417d2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright Intel Corporation, 2023
4  *
5  * Author: Chao Peng <chao.p.peng@linux.intel.com>
6  */
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <errno.h>
11 #include <stdio.h>
12 #include <fcntl.h>
13 
14 #include <linux/bitmap.h>
15 #include <linux/falloc.h>
16 #include <linux/sizes.h>
17 #include <sys/mman.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include "kvm_util.h"
22 #include "numaif.h"
23 #include "test_util.h"
24 #include "ucall_common.h"
25 
26 static size_t page_size;
27 
test_file_read_write(int fd,size_t total_size)28 static void test_file_read_write(int fd, size_t total_size)
29 {
30 	char buf[64];
31 
32 	TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
33 		    "read on a guest_mem fd should fail");
34 	TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
35 		    "write on a guest_mem fd should fail");
36 	TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
37 		    "pread on a guest_mem fd should fail");
38 	TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
39 		    "pwrite on a guest_mem fd should fail");
40 }
41 
test_mmap_cow(int fd,size_t size)42 static void test_mmap_cow(int fd, size_t size)
43 {
44 	void *mem;
45 
46 	mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
47 	TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
48 }
49 
test_mmap_supported(int fd,size_t total_size)50 static void test_mmap_supported(int fd, size_t total_size)
51 {
52 	const char val = 0xaa;
53 	char *mem;
54 	size_t i;
55 	int ret;
56 
57 	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
58 
59 	memset(mem, val, total_size);
60 	for (i = 0; i < total_size; i++)
61 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
62 
63 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
64 			page_size);
65 	TEST_ASSERT(!ret, "fallocate the first page should succeed.");
66 
67 	for (i = 0; i < page_size; i++)
68 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
69 	for (; i < total_size; i++)
70 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
71 
72 	memset(mem, val, page_size);
73 	for (i = 0; i < total_size; i++)
74 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
75 
76 	kvm_munmap(mem, total_size);
77 }
78 
test_mbind(int fd,size_t total_size)79 static void test_mbind(int fd, size_t total_size)
80 {
81 	const unsigned long nodemask_0 = 1; /* nid: 0 */
82 	unsigned long nodemask = 0;
83 	unsigned long maxnode = 8;
84 	int policy;
85 	char *mem;
86 	int ret;
87 
88 	if (!is_multi_numa_node_system())
89 		return;
90 
91 	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
92 
93 	/* Test MPOL_INTERLEAVE policy */
94 	kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
95 	kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
96 	TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
97 		    "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
98 		    MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
99 
100 	/* Test basic MPOL_BIND policy */
101 	kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
102 	kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
103 	TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
104 		    "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
105 		    MPOL_BIND, nodemask_0, policy, nodemask);
106 
107 	/* Test MPOL_DEFAULT policy */
108 	kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
109 	kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
110 	TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
111 		    "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
112 		    MPOL_DEFAULT, policy, nodemask);
113 
114 	/* Test with invalid policy */
115 	ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
116 	TEST_ASSERT(ret == -1 && errno == EINVAL,
117 		    "mbind with invalid policy should fail with EINVAL");
118 
119 	kvm_munmap(mem, total_size);
120 }
121 
test_numa_allocation(int fd,size_t total_size)122 static void test_numa_allocation(int fd, size_t total_size)
123 {
124 	unsigned long node0_mask = 1;  /* Node 0 */
125 	unsigned long node1_mask = 2;  /* Node 1 */
126 	unsigned long maxnode = 8;
127 	void *pages[4];
128 	int status[4];
129 	char *mem;
130 	int i;
131 
132 	if (!is_multi_numa_node_system())
133 		return;
134 
135 	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
136 
137 	for (i = 0; i < 4; i++)
138 		pages[i] = (char *)mem + page_size * i;
139 
140 	/* Set NUMA policy after allocation */
141 	memset(mem, 0xaa, page_size);
142 	kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
143 	kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
144 
145 	/* Set NUMA policy before allocation */
146 	kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
147 	kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
148 	memset(mem, 0xaa, total_size);
149 
150 	/* Validate if pages are allocated on specified NUMA nodes */
151 	kvm_move_pages(0, 4, pages, NULL, status, 0);
152 	TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
153 	TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
154 	TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
155 	TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
156 
157 	/* Punch hole for all pages */
158 	kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
159 
160 	/* Change NUMA policy nodes and reallocate */
161 	kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
162 	kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
163 	memset(mem, 0xaa, total_size);
164 
165 	kvm_move_pages(0, 4, pages, NULL, status, 0);
166 	TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
167 	TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
168 	TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
169 	TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
170 
171 	kvm_munmap(mem, total_size);
172 }
173 
test_fault_sigbus(int fd,size_t accessible_size,size_t map_size)174 static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
175 {
176 	const char val = 0xaa;
177 	char *mem;
178 	size_t i;
179 
180 	mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
181 
182 	TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
183 	TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
184 
185 	for (i = 0; i < accessible_size; i++)
186 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
187 
188 	kvm_munmap(mem, map_size);
189 }
190 
test_fault_overflow(int fd,size_t total_size)191 static void test_fault_overflow(int fd, size_t total_size)
192 {
193 	test_fault_sigbus(fd, total_size, total_size * 4);
194 }
195 
test_fault_private(int fd,size_t total_size)196 static void test_fault_private(int fd, size_t total_size)
197 {
198 	test_fault_sigbus(fd, 0, total_size);
199 }
200 
test_mmap_not_supported(int fd,size_t total_size)201 static void test_mmap_not_supported(int fd, size_t total_size)
202 {
203 	char *mem;
204 
205 	mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
206 	TEST_ASSERT_EQ(mem, MAP_FAILED);
207 
208 	mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
209 	TEST_ASSERT_EQ(mem, MAP_FAILED);
210 }
211 
test_file_size(int fd,size_t total_size)212 static void test_file_size(int fd, size_t total_size)
213 {
214 	struct stat sb;
215 	int ret;
216 
217 	ret = fstat(fd, &sb);
218 	TEST_ASSERT(!ret, "fstat should succeed");
219 	TEST_ASSERT_EQ(sb.st_size, total_size);
220 	TEST_ASSERT_EQ(sb.st_blksize, page_size);
221 }
222 
test_fallocate(int fd,size_t total_size)223 static void test_fallocate(int fd, size_t total_size)
224 {
225 	int ret;
226 
227 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
228 	TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
229 
230 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
231 			page_size - 1, page_size);
232 	TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
233 
234 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
235 	TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
236 
237 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
238 	TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
239 
240 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
241 			total_size, page_size);
242 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
243 
244 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
245 			total_size + page_size, page_size);
246 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
247 
248 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
249 			page_size, page_size - 1);
250 	TEST_ASSERT(ret, "fallocate with unaligned size should fail");
251 
252 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
253 			page_size, page_size);
254 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
255 
256 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
257 	TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
258 }
259 
test_invalid_punch_hole(int fd,size_t total_size)260 static void test_invalid_punch_hole(int fd, size_t total_size)
261 {
262 	struct {
263 		off_t offset;
264 		off_t len;
265 	} testcases[] = {
266 		{0, 1},
267 		{0, page_size - 1},
268 		{0, page_size + 1},
269 
270 		{1, 1},
271 		{1, page_size - 1},
272 		{1, page_size},
273 		{1, page_size + 1},
274 
275 		{page_size, 1},
276 		{page_size, page_size - 1},
277 		{page_size, page_size + 1},
278 	};
279 	int ret, i;
280 
281 	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
282 		ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
283 				testcases[i].offset, testcases[i].len);
284 		TEST_ASSERT(ret == -1 && errno == EINVAL,
285 			    "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
286 			    testcases[i].offset, testcases[i].len);
287 	}
288 }
289 
test_create_guest_memfd_invalid_sizes(struct kvm_vm * vm,uint64_t guest_memfd_flags)290 static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
291 						  uint64_t guest_memfd_flags)
292 {
293 	size_t size;
294 	int fd;
295 
296 	for (size = 1; size < page_size; size++) {
297 		fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
298 		TEST_ASSERT(fd < 0 && errno == EINVAL,
299 			    "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
300 			    size);
301 	}
302 }
303 
test_create_guest_memfd_multiple(struct kvm_vm * vm)304 static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
305 {
306 	int fd1, fd2, ret;
307 	struct stat st1, st2;
308 
309 	fd1 = __vm_create_guest_memfd(vm, page_size, 0);
310 	TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
311 
312 	ret = fstat(fd1, &st1);
313 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
314 	TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
315 
316 	fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
317 	TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
318 
319 	ret = fstat(fd2, &st2);
320 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
321 	TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
322 
323 	ret = fstat(fd1, &st1);
324 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
325 	TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
326 	TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
327 
328 	close(fd2);
329 	close(fd1);
330 }
331 
test_guest_memfd_flags(struct kvm_vm * vm)332 static void test_guest_memfd_flags(struct kvm_vm *vm)
333 {
334 	uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
335 	uint64_t flag;
336 	int fd;
337 
338 	for (flag = BIT(0); flag; flag <<= 1) {
339 		fd = __vm_create_guest_memfd(vm, page_size, flag);
340 		if (flag & valid_flags) {
341 			TEST_ASSERT(fd >= 0,
342 				    "guest_memfd() with flag '0x%lx' should succeed",
343 				    flag);
344 			close(fd);
345 		} else {
346 			TEST_ASSERT(fd < 0 && errno == EINVAL,
347 				    "guest_memfd() with flag '0x%lx' should fail with EINVAL",
348 				    flag);
349 		}
350 	}
351 }
352 
353 #define gmem_test(__test, __vm, __flags)				\
354 do {									\
355 	int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags);	\
356 									\
357 	test_##__test(fd, page_size * 4);				\
358 	close(fd);							\
359 } while (0)
360 
__test_guest_memfd(struct kvm_vm * vm,uint64_t flags)361 static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
362 {
363 	test_create_guest_memfd_multiple(vm);
364 	test_create_guest_memfd_invalid_sizes(vm, flags);
365 
366 	gmem_test(file_read_write, vm, flags);
367 
368 	if (flags & GUEST_MEMFD_FLAG_MMAP) {
369 		if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
370 			gmem_test(mmap_supported, vm, flags);
371 			gmem_test(fault_overflow, vm, flags);
372 			gmem_test(numa_allocation, vm, flags);
373 		} else {
374 			gmem_test(fault_private, vm, flags);
375 		}
376 
377 		gmem_test(mmap_cow, vm, flags);
378 		gmem_test(mbind, vm, flags);
379 	} else {
380 		gmem_test(mmap_not_supported, vm, flags);
381 	}
382 
383 	gmem_test(file_size, vm, flags);
384 	gmem_test(fallocate, vm, flags);
385 	gmem_test(invalid_punch_hole, vm, flags);
386 }
387 
test_guest_memfd(unsigned long vm_type)388 static void test_guest_memfd(unsigned long vm_type)
389 {
390 	struct kvm_vm *vm = vm_create_barebones_type(vm_type);
391 	uint64_t flags;
392 
393 	test_guest_memfd_flags(vm);
394 
395 	__test_guest_memfd(vm, 0);
396 
397 	flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
398 	if (flags & GUEST_MEMFD_FLAG_MMAP)
399 		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
400 
401 	/* MMAP should always be supported if INIT_SHARED is supported. */
402 	if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
403 		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
404 				       GUEST_MEMFD_FLAG_INIT_SHARED);
405 
406 	kvm_vm_free(vm);
407 }
408 
guest_code(uint8_t * mem,uint64_t size)409 static void guest_code(uint8_t *mem, uint64_t size)
410 {
411 	size_t i;
412 
413 	for (i = 0; i < size; i++)
414 		__GUEST_ASSERT(mem[i] == 0xaa,
415 			       "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
416 
417 	memset(mem, 0xff, size);
418 	GUEST_DONE();
419 }
420 
test_guest_memfd_guest(void)421 static void test_guest_memfd_guest(void)
422 {
423 	/*
424 	 * Skip the first 4gb and slot0.  slot0 maps <1gb and is used to back
425 	 * the guest's code, stack, and page tables, and low memory contains
426 	 * the PCI hole and other MMIO regions that need to be avoided.
427 	 */
428 	const uint64_t gpa = SZ_4G;
429 	const int slot = 1;
430 
431 	struct kvm_vcpu *vcpu;
432 	struct kvm_vm *vm;
433 	uint8_t *mem;
434 	size_t size;
435 	int fd, i;
436 
437 	if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
438 		return;
439 
440 	vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
441 
442 	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
443 		    "Default VM type should support MMAP, supported flags = 0x%x",
444 		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
445 	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
446 		    "Default VM type should support INIT_SHARED, supported flags = 0x%x",
447 		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
448 
449 	size = vm->page_size;
450 	fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
451 					     GUEST_MEMFD_FLAG_INIT_SHARED);
452 	vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
453 
454 	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
455 	memset(mem, 0xaa, size);
456 	kvm_munmap(mem, size);
457 
458 	virt_pg_map(vm, gpa, gpa);
459 	vcpu_args_set(vcpu, 2, gpa, size);
460 	vcpu_run(vcpu);
461 
462 	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
463 
464 	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
465 	for (i = 0; i < size; i++)
466 		TEST_ASSERT_EQ(mem[i], 0xff);
467 
468 	close(fd);
469 	kvm_vm_free(vm);
470 }
471 
main(int argc,char * argv[])472 int main(int argc, char *argv[])
473 {
474 	unsigned long vm_types, vm_type;
475 
476 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
477 
478 	page_size = getpagesize();
479 
480 	/*
481 	 * Not all architectures support KVM_CAP_VM_TYPES. However, those that
482 	 * support guest_memfd have that support for the default VM type.
483 	 */
484 	vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
485 	if (!vm_types)
486 		vm_types = BIT(VM_TYPE_DEFAULT);
487 
488 	for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
489 		test_guest_memfd(vm_type);
490 
491 	test_guest_memfd_guest();
492 }
493