xref: /linux/tools/testing/selftests/kvm/guest_memfd_test.c (revision e3966940559d52aa1800a008dcfeec218dd31f88)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright Intel Corporation, 2023
4  *
5  * Author: Chao Peng <chao.p.peng@linux.intel.com>
6  */
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <errno.h>
11 #include <stdio.h>
12 #include <fcntl.h>
13 
14 #include <linux/bitmap.h>
15 #include <linux/falloc.h>
16 #include <linux/sizes.h>
17 #include <sys/mman.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include "kvm_util.h"
22 #include "test_util.h"
23 #include "ucall_common.h"
24 
25 static size_t page_size;
26 
27 static void test_file_read_write(int fd, size_t total_size)
28 {
29 	char buf[64];
30 
31 	TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
32 		    "read on a guest_mem fd should fail");
33 	TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
34 		    "write on a guest_mem fd should fail");
35 	TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
36 		    "pread on a guest_mem fd should fail");
37 	TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
38 		    "pwrite on a guest_mem fd should fail");
39 }
40 
41 static void test_mmap_cow(int fd, size_t size)
42 {
43 	void *mem;
44 
45 	mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
46 	TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
47 }
48 
49 static void test_mmap_supported(int fd, size_t total_size)
50 {
51 	const char val = 0xaa;
52 	char *mem;
53 	size_t i;
54 	int ret;
55 
56 	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
57 
58 	memset(mem, val, total_size);
59 	for (i = 0; i < total_size; i++)
60 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
61 
62 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
63 			page_size);
64 	TEST_ASSERT(!ret, "fallocate the first page should succeed.");
65 
66 	for (i = 0; i < page_size; i++)
67 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
68 	for (; i < total_size; i++)
69 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
70 
71 	memset(mem, val, page_size);
72 	for (i = 0; i < total_size; i++)
73 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
74 
75 	kvm_munmap(mem, total_size);
76 }
77 
78 static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
79 {
80 	const char val = 0xaa;
81 	char *mem;
82 	size_t i;
83 
84 	mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
85 
86 	TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
87 	TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
88 
89 	for (i = 0; i < accessible_size; i++)
90 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
91 
92 	kvm_munmap(mem, map_size);
93 }
94 
95 static void test_fault_overflow(int fd, size_t total_size)
96 {
97 	test_fault_sigbus(fd, total_size, total_size * 4);
98 }
99 
100 static void test_fault_private(int fd, size_t total_size)
101 {
102 	test_fault_sigbus(fd, 0, total_size);
103 }
104 
105 static void test_mmap_not_supported(int fd, size_t total_size)
106 {
107 	char *mem;
108 
109 	mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
110 	TEST_ASSERT_EQ(mem, MAP_FAILED);
111 
112 	mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
113 	TEST_ASSERT_EQ(mem, MAP_FAILED);
114 }
115 
116 static void test_file_size(int fd, size_t total_size)
117 {
118 	struct stat sb;
119 	int ret;
120 
121 	ret = fstat(fd, &sb);
122 	TEST_ASSERT(!ret, "fstat should succeed");
123 	TEST_ASSERT_EQ(sb.st_size, total_size);
124 	TEST_ASSERT_EQ(sb.st_blksize, page_size);
125 }
126 
127 static void test_fallocate(int fd, size_t total_size)
128 {
129 	int ret;
130 
131 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
132 	TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
133 
134 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
135 			page_size - 1, page_size);
136 	TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
137 
138 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
139 	TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
140 
141 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
142 	TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
143 
144 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
145 			total_size, page_size);
146 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
147 
148 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
149 			total_size + page_size, page_size);
150 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
151 
152 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
153 			page_size, page_size - 1);
154 	TEST_ASSERT(ret, "fallocate with unaligned size should fail");
155 
156 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
157 			page_size, page_size);
158 	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
159 
160 	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
161 	TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
162 }
163 
164 static void test_invalid_punch_hole(int fd, size_t total_size)
165 {
166 	struct {
167 		off_t offset;
168 		off_t len;
169 	} testcases[] = {
170 		{0, 1},
171 		{0, page_size - 1},
172 		{0, page_size + 1},
173 
174 		{1, 1},
175 		{1, page_size - 1},
176 		{1, page_size},
177 		{1, page_size + 1},
178 
179 		{page_size, 1},
180 		{page_size, page_size - 1},
181 		{page_size, page_size + 1},
182 	};
183 	int ret, i;
184 
185 	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
186 		ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
187 				testcases[i].offset, testcases[i].len);
188 		TEST_ASSERT(ret == -1 && errno == EINVAL,
189 			    "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
190 			    testcases[i].offset, testcases[i].len);
191 	}
192 }
193 
194 static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
195 						  uint64_t guest_memfd_flags)
196 {
197 	size_t size;
198 	int fd;
199 
200 	for (size = 1; size < page_size; size++) {
201 		fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
202 		TEST_ASSERT(fd < 0 && errno == EINVAL,
203 			    "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
204 			    size);
205 	}
206 }
207 
208 static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
209 {
210 	int fd1, fd2, ret;
211 	struct stat st1, st2;
212 
213 	fd1 = __vm_create_guest_memfd(vm, page_size, 0);
214 	TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
215 
216 	ret = fstat(fd1, &st1);
217 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
218 	TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
219 
220 	fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
221 	TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
222 
223 	ret = fstat(fd2, &st2);
224 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
225 	TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
226 
227 	ret = fstat(fd1, &st1);
228 	TEST_ASSERT(ret != -1, "memfd fstat should succeed");
229 	TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
230 	TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
231 
232 	close(fd2);
233 	close(fd1);
234 }
235 
236 static void test_guest_memfd_flags(struct kvm_vm *vm)
237 {
238 	uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
239 	uint64_t flag;
240 	int fd;
241 
242 	for (flag = BIT(0); flag; flag <<= 1) {
243 		fd = __vm_create_guest_memfd(vm, page_size, flag);
244 		if (flag & valid_flags) {
245 			TEST_ASSERT(fd >= 0,
246 				    "guest_memfd() with flag '0x%lx' should succeed",
247 				    flag);
248 			close(fd);
249 		} else {
250 			TEST_ASSERT(fd < 0 && errno == EINVAL,
251 				    "guest_memfd() with flag '0x%lx' should fail with EINVAL",
252 				    flag);
253 		}
254 	}
255 }
256 
257 #define gmem_test(__test, __vm, __flags)				\
258 do {									\
259 	int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags);	\
260 									\
261 	test_##__test(fd, page_size * 4);				\
262 	close(fd);							\
263 } while (0)
264 
265 static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
266 {
267 	test_create_guest_memfd_multiple(vm);
268 	test_create_guest_memfd_invalid_sizes(vm, flags);
269 
270 	gmem_test(file_read_write, vm, flags);
271 
272 	if (flags & GUEST_MEMFD_FLAG_MMAP) {
273 		if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
274 			gmem_test(mmap_supported, vm, flags);
275 			gmem_test(fault_overflow, vm, flags);
276 		} else {
277 			gmem_test(fault_private, vm, flags);
278 		}
279 
280 		gmem_test(mmap_cow, vm, flags);
281 	} else {
282 		gmem_test(mmap_not_supported, vm, flags);
283 	}
284 
285 	gmem_test(file_size, vm, flags);
286 	gmem_test(fallocate, vm, flags);
287 	gmem_test(invalid_punch_hole, vm, flags);
288 }
289 
290 static void test_guest_memfd(unsigned long vm_type)
291 {
292 	struct kvm_vm *vm = vm_create_barebones_type(vm_type);
293 	uint64_t flags;
294 
295 	test_guest_memfd_flags(vm);
296 
297 	__test_guest_memfd(vm, 0);
298 
299 	flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
300 	if (flags & GUEST_MEMFD_FLAG_MMAP)
301 		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
302 
303 	/* MMAP should always be supported if INIT_SHARED is supported. */
304 	if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
305 		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
306 				       GUEST_MEMFD_FLAG_INIT_SHARED);
307 
308 	kvm_vm_free(vm);
309 }
310 
311 static void guest_code(uint8_t *mem, uint64_t size)
312 {
313 	size_t i;
314 
315 	for (i = 0; i < size; i++)
316 		__GUEST_ASSERT(mem[i] == 0xaa,
317 			       "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
318 
319 	memset(mem, 0xff, size);
320 	GUEST_DONE();
321 }
322 
323 static void test_guest_memfd_guest(void)
324 {
325 	/*
326 	 * Skip the first 4gb and slot0.  slot0 maps <1gb and is used to back
327 	 * the guest's code, stack, and page tables, and low memory contains
328 	 * the PCI hole and other MMIO regions that need to be avoided.
329 	 */
330 	const uint64_t gpa = SZ_4G;
331 	const int slot = 1;
332 
333 	struct kvm_vcpu *vcpu;
334 	struct kvm_vm *vm;
335 	uint8_t *mem;
336 	size_t size;
337 	int fd, i;
338 
339 	if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
340 		return;
341 
342 	vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
343 
344 	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
345 		    "Default VM type should support MMAP, supported flags = 0x%x",
346 		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
347 	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
348 		    "Default VM type should support INIT_SHARED, supported flags = 0x%x",
349 		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
350 
351 	size = vm->page_size;
352 	fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
353 					     GUEST_MEMFD_FLAG_INIT_SHARED);
354 	vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
355 
356 	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
357 	memset(mem, 0xaa, size);
358 	kvm_munmap(mem, size);
359 
360 	virt_pg_map(vm, gpa, gpa);
361 	vcpu_args_set(vcpu, 2, gpa, size);
362 	vcpu_run(vcpu);
363 
364 	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
365 
366 	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
367 	for (i = 0; i < size; i++)
368 		TEST_ASSERT_EQ(mem[i], 0xff);
369 
370 	close(fd);
371 	kvm_vm_free(vm);
372 }
373 
374 int main(int argc, char *argv[])
375 {
376 	unsigned long vm_types, vm_type;
377 
378 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
379 
380 	page_size = getpagesize();
381 
382 	/*
383 	 * Not all architectures support KVM_CAP_VM_TYPES. However, those that
384 	 * support guest_memfd have that support for the default VM type.
385 	 */
386 	vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
387 	if (!vm_types)
388 		vm_types = BIT(VM_TYPE_DEFAULT);
389 
390 	for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
391 		test_guest_memfd(vm_type);
392 
393 	test_guest_memfd_guest();
394 }
395