xref: /linux/tools/testing/selftests/mm/uffd-unit-tests.c (revision 1d95be59ca0698c1095a08e11b6bc60df7bfaa26)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Userfaultfd unit tests.
4  *
5  *  Copyright (C) 2015-2023  Red Hat, Inc.
6  */
7 
8 #include "uffd-common.h"
9 
10 #include "../../../../mm/gup_test.h"
11 
12 #ifdef __NR_userfaultfd
13 
14 /* The unit test doesn't need a large or random size, make it 32MB for now */
15 #define  UFFD_TEST_MEM_SIZE               (32UL << 20)
16 
17 #define  MEM_ANON                         BIT_ULL(0)
18 #define  MEM_SHMEM                        BIT_ULL(1)
19 #define  MEM_SHMEM_PRIVATE                BIT_ULL(2)
20 #define  MEM_HUGETLB                      BIT_ULL(3)
21 #define  MEM_HUGETLB_PRIVATE              BIT_ULL(4)
22 
23 #define  MEM_ALL  (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
24 		   MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
25 
26 #define ALIGN_UP(x, align_to) \
27 	((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
28 
29 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
30 
31 struct mem_type {
32 	const char *name;
33 	unsigned int mem_flag;
34 	uffd_test_ops_t *mem_ops;
35 	bool shared;
36 };
37 typedef struct mem_type mem_type_t;
38 
39 mem_type_t mem_types[] = {
40 	{
41 		.name = "anon",
42 		.mem_flag = MEM_ANON,
43 		.mem_ops = &anon_uffd_test_ops,
44 		.shared = false,
45 	},
46 	{
47 		.name = "shmem",
48 		.mem_flag = MEM_SHMEM,
49 		.mem_ops = &shmem_uffd_test_ops,
50 		.shared = true,
51 	},
52 	{
53 		.name = "shmem-private",
54 		.mem_flag = MEM_SHMEM_PRIVATE,
55 		.mem_ops = &shmem_uffd_test_ops,
56 		.shared = false,
57 	},
58 	{
59 		.name = "hugetlb",
60 		.mem_flag = MEM_HUGETLB,
61 		.mem_ops = &hugetlb_uffd_test_ops,
62 		.shared = true,
63 	},
64 	{
65 		.name = "hugetlb-private",
66 		.mem_flag = MEM_HUGETLB_PRIVATE,
67 		.mem_ops = &hugetlb_uffd_test_ops,
68 		.shared = false,
69 	},
70 };
71 
72 /* Arguments to be passed over to each uffd unit test */
73 struct uffd_test_args {
74 	mem_type_t *mem_type;
75 };
76 typedef struct uffd_test_args uffd_test_args_t;
77 
78 /* Returns: UFFD_TEST_* */
79 typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *);
80 
81 typedef struct {
82 	const char *name;
83 	uffd_test_fn uffd_fn;
84 	unsigned int mem_targets;
85 	uint64_t uffd_feature_required;
86 	uffd_test_case_ops_t *test_case_ops;
87 } uffd_test_case_t;
88 
89 static char current_test[256];
90 
91 static void uffd_test_pass(void)
92 {
93 	ksft_test_result_pass("%s\n", current_test);
94 }
95 
96 #define  uffd_test_start(...)  do {		\
97 		snprintf(current_test, sizeof(current_test), __VA_ARGS__); \
98 	} while (0)
99 
100 #define  uffd_test_fail(fmt, ...)  do {					\
101 		ksft_print_msg("failed reason: [" fmt "]\n", ##__VA_ARGS__); \
102 		ksft_test_result_fail("%s\n", current_test);		\
103 	} while (0)
104 
105 static void uffd_test_skip(const char *message)
106 {
107 	ksft_test_result_skip("%s (%s)\n", current_test, message);
108 }
109 
110 static void test_uffd_api(bool use_dev)
111 {
112 	struct uffdio_api uffdio_api;
113 	int uffd;
114 
115 	uffd_test_start("UFFDIO_API (with %s)",
116 			use_dev ? "/dev/userfaultfd" : "syscall");
117 
118 	if (use_dev)
119 		uffd = uffd_open_dev(UFFD_FLAGS);
120 	else
121 		uffd = uffd_open_sys(UFFD_FLAGS);
122 	if (uffd < 0) {
123 		uffd_test_skip("cannot open userfaultfd handle");
124 		return;
125 	}
126 
127 	/* Test wrong UFFD_API */
128 	uffdio_api.api = 0xab;
129 	uffdio_api.features = 0;
130 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
131 		uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
132 		goto out;
133 	}
134 
135 	/* Test wrong feature bit */
136 	uffdio_api.api = UFFD_API;
137 	uffdio_api.features = BIT_ULL(63);
138 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
139 		uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
140 		goto out;
141 	}
142 
143 	/* Test normal UFFDIO_API */
144 	uffdio_api.api = UFFD_API;
145 	uffdio_api.features = 0;
146 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
147 		uffd_test_fail("UFFDIO_API should succeed but failed");
148 		goto out;
149 	}
150 
151 	/* Test double requests of UFFDIO_API with a random feature set */
152 	uffdio_api.features = BIT_ULL(0);
153 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
154 		uffd_test_fail("UFFDIO_API should reject initialized uffd");
155 		goto out;
156 	}
157 
158 	uffd_test_pass();
159 out:
160 	close(uffd);
161 }
162 
163 
164 static bool uffd_feature_supported(uffd_test_case_t *test)
165 {
166 	uint64_t features;
167 
168 	if (uffd_get_features(&features))
169 		return false;
170 
171 	return (features & test->uffd_feature_required) ==
172 	    test->uffd_feature_required;
173 }
174 
175 static int pagemap_open(void)
176 {
177 	int fd = open("/proc/self/pagemap", O_RDONLY);
178 
179 	if (fd < 0)
180 		err("open pagemap");
181 
182 	return fd;
183 }
184 
185 /* This macro let __LINE__ works in err() */
186 #define  pagemap_check_wp(value, wp) do {				\
187 		if (!!(value & PM_UFFD_WP) != wp)			\
188 			err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
189 	} while (0)
190 
191 typedef struct {
192 	uffd_global_test_opts_t *gopts;
193 	int child_uffd;
194 } fork_event_args;
195 
196 static void *fork_event_consumer(void *data)
197 {
198 	fork_event_args *args = data;
199 	struct uffd_msg msg = { 0 };
200 
201 	args->gopts->ready_for_fork = true;
202 
203 	/* Read until a full msg received */
204 	while (uffd_read_msg(args->gopts, &msg));
205 
206 	if (msg.event != UFFD_EVENT_FORK)
207 		err("wrong message: %u\n", msg.event);
208 
209 	/* Just to be properly freed later */
210 	args->child_uffd = msg.arg.fork.ufd;
211 	return NULL;
212 }
213 
214 typedef struct {
215 	int gup_fd;
216 	bool pinned;
217 } pin_args;
218 
219 /*
220  * Returns 0 if succeed, <0 for errors.  pin_pages() needs to be paired
221  * with unpin_pages().  Currently it needs to be RO longterm pin to satisfy
222  * all needs of the test cases (e.g., trigger unshare, trigger fork() early
223  * CoW, etc.).
224  */
225 static int pin_pages(pin_args *args, void *buffer, size_t size)
226 {
227 	struct pin_longterm_test test = {
228 		.addr = (uintptr_t)buffer,
229 		.size = size,
230 		/* Read-only pins */
231 		.flags = 0,
232 	};
233 
234 	if (args->pinned)
235 		err("already pinned");
236 
237 	args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
238 	if (args->gup_fd < 0)
239 		return -errno;
240 
241 	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
242 		/* Even if gup_test existed, can be an old gup_test / kernel */
243 		close(args->gup_fd);
244 		return -errno;
245 	}
246 	args->pinned = true;
247 	return 0;
248 }
249 
250 static void unpin_pages(pin_args *args)
251 {
252 	if (!args->pinned)
253 		err("unpin without pin first");
254 	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
255 		err("PIN_LONGTERM_TEST_STOP");
256 	close(args->gup_fd);
257 	args->pinned = false;
258 }
259 
260 static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin)
261 {
262 	fork_event_args args = { .gopts = gopts, .child_uffd = -1 };
263 	pthread_t thread;
264 	pid_t child;
265 	uint64_t value;
266 	int fd, result;
267 
268 	/* Prepare a thread to resolve EVENT_FORK */
269 	if (with_event) {
270 		gopts->ready_for_fork = false;
271 		if (pthread_create(&thread, NULL, fork_event_consumer, &args))
272 			err("pthread_create()");
273 		while (!gopts->ready_for_fork)
274 			; /* Wait for the poll_thread to start executing before forking */
275 	}
276 
277 	child = fork();
278 	if (!child) {
279 		/* Open the pagemap fd of the child itself */
280 		pin_args args = {};
281 
282 		fd = pagemap_open();
283 
284 		if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size))
285 			/*
286 			 * Normally when reach here we have pinned in
287 			 * previous tests, so shouldn't fail anymore
288 			 */
289 			err("pin page failed in child");
290 
291 		value = pagemap_get_entry(fd, gopts->area_dst);
292 		/*
293 		 * After fork(), we should handle uffd-wp bit differently:
294 		 *
295 		 * (1) when with EVENT_FORK, it should persist
296 		 * (2) when without EVENT_FORK, it should be dropped
297 		 */
298 		pagemap_check_wp(value, with_event);
299 		if (test_pin)
300 			unpin_pages(&args);
301 		/* Succeed */
302 		_exit(0);
303 	}
304 	waitpid(child, &result, 0);
305 
306 	if (with_event) {
307 		if (pthread_join(thread, NULL))
308 			err("pthread_join()");
309 		if (args.child_uffd < 0)
310 			err("Didn't receive child uffd");
311 		close(args.child_uffd);
312 	}
313 
314 	return result;
315 }
316 
317 static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
318 {
319 	uint64_t value;
320 	int pagemap_fd;
321 
322 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
323 			  false, true, false))
324 		err("register failed");
325 
326 	pagemap_fd = pagemap_open();
327 
328 	/* Test applying pte marker to anon unpopulated */
329 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
330 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
331 	pagemap_check_wp(value, true);
332 
333 	/* Test unprotect on anon pte marker */
334 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
335 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
336 	pagemap_check_wp(value, false);
337 
338 	/* Test zap on anon marker */
339 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
340 	if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
341 		err("madvise(MADV_DONTNEED) failed");
342 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
343 	pagemap_check_wp(value, false);
344 
345 	/* Test fault in after marker removed */
346 	*gopts->area_dst = 1;
347 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
348 	pagemap_check_wp(value, false);
349 	/* Drop it to make pte none again */
350 	if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
351 		err("madvise(MADV_DONTNEED) failed");
352 
353 	/* Test read-zero-page upon pte marker */
354 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
355 	*(volatile char *)gopts->area_dst;
356 	/* Drop it to make pte none again */
357 	if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
358 		err("madvise(MADV_DONTNEED) failed");
359 
360 	uffd_test_pass();
361 }
362 
363 static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args,
364 				     bool with_event)
365 {
366 	int pagemap_fd;
367 	uint64_t value;
368 
369 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
370 			  false, true, false))
371 		err("register failed");
372 
373 	pagemap_fd = pagemap_open();
374 
375 	/* Touch the page */
376 	*gopts->area_dst = 1;
377 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
378 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
379 	pagemap_check_wp(value, true);
380 	if (pagemap_test_fork(gopts, with_event, false)) {
381 		uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
382 			       with_event ? "missing" : "stall");
383 		goto out;
384 	}
385 
386 	/*
387 	 * This is an attempt for zapping the pgtable so as to test the
388 	 * markers.
389 	 *
390 	 * For private mappings, PAGEOUT will only work on exclusive ptes
391 	 * (PM_MMAP_EXCLUSIVE) which we should satisfy.
392 	 *
393 	 * For shared, PAGEOUT may not work.  Use DONTNEED instead which
394 	 * plays a similar role of zapping (rather than freeing the page)
395 	 * to expose pte markers.
396 	 */
397 	if (args->mem_type->shared) {
398 		if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
399 			err("MADV_DONTNEED");
400 	} else {
401 		/*
402 		 * NOTE: ignore retval because private-hugetlb doesn't yet
403 		 * support swapping, so it could fail.
404 		 */
405 		madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT);
406 	}
407 
408 	/* Uffd-wp should persist even swapped out */
409 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
410 	pagemap_check_wp(value, true);
411 	if (pagemap_test_fork(gopts, with_event, false)) {
412 		uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
413 			       with_event ? "missing" : "stall");
414 		goto out;
415 	}
416 
417 	/* Unprotect; this tests swap pte modifications */
418 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
419 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
420 	pagemap_check_wp(value, false);
421 
422 	/* Fault in the page from disk */
423 	*gopts->area_dst = 2;
424 	value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
425 	pagemap_check_wp(value, false);
426 	uffd_test_pass();
427 out:
428 	if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size))
429 		err("unregister failed");
430 	close(pagemap_fd);
431 }
432 
433 static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
434 {
435 	uffd_wp_fork_test_common(gopts, args, false);
436 }
437 
438 static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
439 {
440 	uffd_wp_fork_test_common(gopts, args, true);
441 }
442 
443 static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts,
444 					 uffd_test_args_t *args,
445 					 bool with_event)
446 {
447 	int pagemap_fd;
448 	pin_args pin_args = {};
449 
450 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false))
451 		err("register failed");
452 
453 	pagemap_fd = pagemap_open();
454 
455 	/* Touch the page */
456 	*gopts->area_dst = 1;
457 	wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
458 
459 	/*
460 	 * 1. First pin, then fork().  This tests fork() special path when
461 	 * doing early CoW if the page is private.
462 	 */
463 	if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) {
464 		uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
465 			       "or unprivileged");
466 		close(pagemap_fd);
467 		uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size);
468 		return;
469 	}
470 
471 	if (pagemap_test_fork(gopts, with_event, false)) {
472 		uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
473 			       with_event ? "missing" : "stall");
474 		unpin_pages(&pin_args);
475 		goto out;
476 	}
477 
478 	unpin_pages(&pin_args);
479 
480 	/*
481 	 * 2. First fork(), then pin (in the child, where test_pin==true).
482 	 * This tests COR, aka, page unsharing on private memories.
483 	 */
484 	if (pagemap_test_fork(gopts, with_event, true)) {
485 		uffd_test_fail("Detected %s uffd-wp bit when RO pin",
486 			       with_event ? "missing" : "stall");
487 		goto out;
488 	}
489 	uffd_test_pass();
490 out:
491 	if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
492 		err("register failed");
493 	close(pagemap_fd);
494 }
495 
496 static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
497 {
498 	uffd_wp_fork_pin_test_common(gopts, args, false);
499 }
500 
501 static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
502 {
503 	uffd_wp_fork_pin_test_common(gopts, args, true);
504 }
505 
506 static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p)
507 {
508 	unsigned long i, j;
509 	uint8_t expected_byte;
510 
511 	for (i = 0; i < gopts->nr_pages; ++i) {
512 		expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
513 		for (j = 0; j < gopts->page_size; j++) {
514 			uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j);
515 			if (v != expected_byte)
516 				err("unexpected page contents");
517 		}
518 	}
519 }
520 
521 static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp)
522 {
523 	unsigned long p;
524 	pthread_t uffd_mon;
525 	char c = '\0';
526 	struct uffd_args args = { 0 };
527 	args.gopts = gopts;
528 
529 	/*
530 	 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
531 	 * both do not make much sense.
532 	 */
533 	assert(!(test_collapse && test_wp));
534 
535 	if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
536 			  /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
537 			  false, test_wp, true))
538 		err("register failure");
539 
540 	/*
541 	 * After registering with UFFD, populate the non-UFFD-registered side of
542 	 * the shared mapping. This should *not* trigger any UFFD minor faults.
543 	 */
544 	for (p = 0; p < gopts->nr_pages; ++p)
545 		memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1),
546 		       gopts->page_size);
547 
548 	args.apply_wp = test_wp;
549 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
550 		err("uffd_poll_thread create");
551 
552 	/*
553 	 * Read each of the pages back using the UFFD-registered mapping. We
554 	 * expect that the first time we touch a page, it will result in a minor
555 	 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
556 	 * page's contents, and then issuing a CONTINUE ioctl.
557 	 */
558 	check_memory_contents(gopts, gopts->area_dst_alias);
559 
560 	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
561 		err("pipe write");
562 	if (pthread_join(uffd_mon, NULL))
563 		err("join() failed");
564 
565 	if (test_collapse) {
566 		if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
567 			    MADV_COLLAPSE)) {
568 			/* It's fine to fail for this one... */
569 			uffd_test_skip("MADV_COLLAPSE failed");
570 			return;
571 		}
572 
573 		uffd_test_ops->check_pmd_mapping(gopts,
574 						 gopts->area_dst,
575 						 gopts->nr_pages * gopts->page_size /
576 						 read_pmd_pagesize());
577 		/*
578 		 * This won't cause uffd-fault - it purely just makes sure there
579 		 * was no corruption.
580 		 */
581 		check_memory_contents(gopts, gopts->area_dst_alias);
582 	}
583 
584 	if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages)
585 		uffd_test_fail("stats check error");
586 	else
587 		uffd_test_pass();
588 }
589 
590 void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
591 {
592 	uffd_minor_test_common(gopts, false, false);
593 }
594 
595 void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
596 {
597 	uffd_minor_test_common(gopts, false, true);
598 }
599 
600 void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
601 {
602 	uffd_minor_test_common(gopts, true, false);
603 }
604 
605 static sigjmp_buf jbuf, *sigbuf;
606 
607 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
608 {
609 	if (sig == SIGBUS) {
610 		if (sigbuf)
611 			siglongjmp(*sigbuf, 1);
612 		abort();
613 	}
614 }
615 
616 /*
617  * For non-cooperative userfaultfd test we fork() a process that will
618  * generate pagefaults, will mremap the area monitored by the
619  * userfaultfd and at last this process will release the monitored
620  * area.
621  * For the anonymous and shared memory the area is divided into two
622  * parts, the first part is accessed before mremap, and the second
623  * part is accessed after mremap. Since hugetlbfs does not support
624  * mremap, the entire monitored area is accessed in a single pass for
625  * HUGETLB_TEST.
626  * The release of the pages currently generates event for shmem and
627  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
628  * for hugetlb.
629  * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
630  * monitored area, generate pagefaults and test that signal is delivered.
631  * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
632  * test robustness use case - we release monitored area, fork a process
633  * that will generate pagefaults and verify signal is generated.
634  * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
635  * feature. Using monitor thread, verify no userfault events are generated.
636  */
637 static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp)
638 {
639 	unsigned long nr, i;
640 	unsigned long long count;
641 	unsigned long split_nr_pages;
642 	unsigned long lastnr;
643 	struct sigaction act;
644 	volatile unsigned long signalled = 0;
645 
646 	split_nr_pages = (gopts->nr_pages + 1) / 2;
647 
648 	if (signal_test) {
649 		sigbuf = &jbuf;
650 		memset(&act, 0, sizeof(act));
651 		act.sa_sigaction = sighndl;
652 		act.sa_flags = SA_SIGINFO;
653 		if (sigaction(SIGBUS, &act, 0))
654 			err("sigaction");
655 		lastnr = (unsigned long)-1;
656 	}
657 
658 	for (nr = 0; nr < split_nr_pages; nr++) {
659 		volatile int steps = 1;
660 		unsigned long offset = nr * gopts->page_size;
661 
662 		if (signal_test) {
663 			if (sigsetjmp(*sigbuf, 1) != 0) {
664 				if (steps == 1 && nr == lastnr)
665 					err("Signal repeated");
666 
667 				lastnr = nr;
668 				if (signal_test == 1) {
669 					if (steps == 1) {
670 						/* This is a MISSING request */
671 						steps++;
672 						if (copy_page(gopts, offset, wp))
673 							signalled++;
674 					} else {
675 						/* This is a WP request */
676 						assert(steps == 2);
677 						wp_range(gopts->uffd,
678 							 (__u64)gopts->area_dst +
679 							 offset,
680 							 gopts->page_size, false);
681 					}
682 				} else {
683 					signalled++;
684 					continue;
685 				}
686 			}
687 		}
688 
689 		count = *area_count(gopts->area_dst, nr, gopts);
690 		if (count != gopts->count_verify[nr])
691 			err("nr %lu memory corruption %llu %llu\n",
692 			    nr, count, gopts->count_verify[nr]);
693 		/*
694 		 * Trigger write protection if there is by writing
695 		 * the same value back.
696 		 */
697 		*area_count(gopts->area_dst, nr, gopts) = count;
698 	}
699 
700 	if (signal_test)
701 		return signalled != split_nr_pages;
702 
703 	gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size,
704 				 gopts->nr_pages * gopts->page_size,
705 				 MREMAP_MAYMOVE | MREMAP_FIXED,
706 				 gopts->area_src);
707 	if (gopts->area_dst == MAP_FAILED)
708 		err("mremap");
709 	/* Reset area_src since we just clobbered it */
710 	gopts->area_src = NULL;
711 
712 	for (; nr < gopts->nr_pages; nr++) {
713 		count = *area_count(gopts->area_dst, nr, gopts);
714 		if (count != gopts->count_verify[nr]) {
715 			err("nr %lu memory corruption %llu %llu\n",
716 			    nr, count, gopts->count_verify[nr]);
717 		}
718 		/*
719 		 * Trigger write protection if there is by writing
720 		 * the same value back.
721 		 */
722 		*area_count(gopts->area_dst, nr, gopts) = count;
723 	}
724 
725 	uffd_test_ops->release_pages(gopts, gopts->area_dst);
726 
727 	for (nr = 0; nr < gopts->nr_pages; nr++)
728 		for (i = 0; i < gopts->page_size; i++)
729 			if (*(gopts->area_dst + nr * gopts->page_size + i) != 0)
730 				err("page %lu offset %lu is not zero", nr, i);
731 
732 	return 0;
733 }
734 
735 static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
736 {
737 	unsigned long userfaults;
738 	pthread_t uffd_mon;
739 	pid_t pid;
740 	int err;
741 	char c = '\0';
742 	struct uffd_args args = { 0 };
743 	args.gopts = gopts;
744 
745 	gopts->ready_for_fork = false;
746 
747 	fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
748 
749 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
750 			  true, wp, false))
751 		err("register failure");
752 
753 	if (faulting_process(gopts, 1, wp))
754 		err("faulting process failed");
755 
756 	uffd_test_ops->release_pages(gopts, gopts->area_dst);
757 
758 	args.apply_wp = wp;
759 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
760 		err("uffd_poll_thread create");
761 
762 	while (!gopts->ready_for_fork)
763 		; /* Wait for the poll_thread to start executing before forking */
764 
765 	pid = fork();
766 	if (pid < 0)
767 		err("fork");
768 
769 	if (!pid)
770 		_exit(faulting_process(gopts, 2, wp));
771 
772 	waitpid(pid, &err, 0);
773 	if (err)
774 		err("faulting process failed");
775 	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
776 		err("pipe write");
777 	if (pthread_join(uffd_mon, (void **)&userfaults))
778 		err("pthread_join()");
779 
780 	if (userfaults)
781 		uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
782 	else
783 		uffd_test_pass();
784 }
785 
786 static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
787 {
788 	uffd_sigbus_test_common(gopts, false);
789 }
790 
791 static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
792 {
793 	uffd_sigbus_test_common(gopts, true);
794 }
795 
796 static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
797 {
798 	pthread_t uffd_mon;
799 	pid_t pid;
800 	int err;
801 	char c = '\0';
802 	struct uffd_args args = { 0 };
803 	args.gopts = gopts;
804 
805 	gopts->ready_for_fork = false;
806 
807 	fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
808 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
809 			  true, wp, false))
810 		err("register failure");
811 
812 	args.apply_wp = wp;
813 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
814 		err("uffd_poll_thread create");
815 
816 	while (!gopts->ready_for_fork)
817 		; /* Wait for the poll_thread to start executing before forking */
818 
819 	pid = fork();
820 	if (pid < 0)
821 		err("fork");
822 
823 	if (!pid)
824 		_exit(faulting_process(gopts, 0, wp));
825 
826 	waitpid(pid, &err, 0);
827 	if (err)
828 		err("faulting process failed");
829 	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
830 		err("pipe write");
831 	if (pthread_join(uffd_mon, NULL))
832 		err("pthread_join()");
833 
834 	if (args.missing_faults != gopts->nr_pages)
835 		uffd_test_fail("Fault counts wrong");
836 	else
837 		uffd_test_pass();
838 }
839 
840 static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
841 {
842 	uffd_events_test_common(gopts, false);
843 }
844 
845 static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
846 {
847 	uffd_events_test_common(gopts, true);
848 }
849 
850 static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts,
851 				  struct uffdio_zeropage *uffdio_zeropage)
852 {
853 	uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start,
854 				     uffdio_zeropage->range.len,
855 				     0);
856 	if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
857 		if (uffdio_zeropage->zeropage != -EEXIST)
858 			err("UFFDIO_ZEROPAGE error: %"PRId64,
859 			    (int64_t)uffdio_zeropage->zeropage);
860 	} else {
861 		err("UFFDIO_ZEROPAGE error: %"PRId64,
862 		    (int64_t)uffdio_zeropage->zeropage);
863 	}
864 }
865 
866 static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage)
867 {
868 	struct uffdio_zeropage uffdio_zeropage = { 0 };
869 	int ret;
870 	__s64 res;
871 
872 	uffdio_zeropage.range.start = (unsigned long) gopts->area_dst;
873 	uffdio_zeropage.range.len = gopts->page_size;
874 	uffdio_zeropage.mode = 0;
875 	ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
876 	res = uffdio_zeropage.zeropage;
877 	if (ret) {
878 		/* real retval in ufdio_zeropage.zeropage */
879 		if (has_zeropage)
880 			err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
881 		else if (res != -EINVAL)
882 			err("UFFDIO_ZEROPAGE not -EINVAL");
883 	} else if (has_zeropage) {
884 		if (res != gopts->page_size)
885 			err("UFFDIO_ZEROPAGE unexpected size");
886 		else
887 			retry_uffdio_zeropage(gopts, &uffdio_zeropage);
888 		return true;
889 	} else
890 		err("UFFDIO_ZEROPAGE succeeded");
891 
892 	return false;
893 }
894 
895 /*
896  * Registers a range with MISSING mode only for zeropage test.  Return true
897  * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
898  * because we want to detect .ioctls along the way.
899  */
900 static bool
901 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
902 {
903 	uint64_t ioctls = 0;
904 
905 	if (uffd_register_with_ioctls(uffd, addr, len, true,
906 				      false, false, &ioctls))
907 		err("zeropage register fail");
908 
909 	return ioctls & (1 << _UFFDIO_ZEROPAGE);
910 }
911 
912 /* exercise UFFDIO_ZEROPAGE */
913 static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
914 {
915 	bool has_zeropage;
916 	int i;
917 
918 	has_zeropage = uffd_register_detect_zeropage(gopts->uffd,
919 						     gopts->area_dst,
920 						     gopts->page_size);
921 	if (gopts->area_dst_alias)
922 		/* Ignore the retval; we already have it */
923 		uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size);
924 
925 	if (do_uffdio_zeropage(gopts, has_zeropage))
926 		for (i = 0; i < gopts->page_size; i++)
927 			if (gopts->area_dst[i] != 0)
928 				err("data non-zero at offset %d\n", i);
929 
930 	if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
931 		err("unregister");
932 
933 	if (gopts->area_dst_alias && uffd_unregister(gopts->uffd,
934 						     gopts->area_dst_alias,
935 						     gopts->page_size))
936 		err("unregister");
937 
938 	uffd_test_pass();
939 }
940 
941 static void uffd_register_poison(int uffd, void *addr, uint64_t len)
942 {
943 	uint64_t ioctls = 0;
944 	uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
945 
946 	if (uffd_register_with_ioctls(uffd, addr, len, true,
947 				      false, false, &ioctls))
948 		err("poison register fail");
949 
950 	if ((ioctls & expected) != expected)
951 		err("registered area doesn't support COPY and POISON ioctls");
952 }
953 
954 static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset)
955 {
956 	struct uffdio_poison uffdio_poison = { 0 };
957 	int ret;
958 	__s64 res;
959 
960 	uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset;
961 	uffdio_poison.range.len = gopts->page_size;
962 	uffdio_poison.mode = 0;
963 	ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison);
964 	res = uffdio_poison.updated;
965 
966 	if (ret)
967 		err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
968 	else if (res != gopts->page_size)
969 		err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
970 }
971 
972 static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts,
973 				     struct uffd_msg *msg,
974 				     struct uffd_args *args)
975 {
976 	unsigned long offset;
977 
978 	if (msg->event != UFFD_EVENT_PAGEFAULT)
979 		err("unexpected msg event %u", msg->event);
980 
981 	if (msg->arg.pagefault.flags &
982 	    (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
983 		err("unexpected fault type %llu", msg->arg.pagefault.flags);
984 
985 	offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
986 	offset &= ~(gopts->page_size-1);
987 
988 	/* Odd pages -> copy zeroed page; even pages -> poison. */
989 	if (offset & gopts->page_size)
990 		copy_page(gopts, offset, false);
991 	else
992 		do_uffdio_poison(gopts, offset);
993 }
994 
995 /* Make sure to cover odd/even, and minimum duplications */
996 #define  UFFD_POISON_TEST_NPAGES  4
997 
998 static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
999 {
1000 	pthread_t uffd_mon;
1001 	char c;
1002 	struct uffd_args args = { 0 };
1003 	struct sigaction act = { 0 };
1004 	unsigned long nr_sigbus = 0;
1005 	unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES;
1006 
1007 	if (gopts->nr_pages < poison_pages) {
1008 		uffd_test_skip("Too less pages for POISON test");
1009 		return;
1010 	}
1011 
1012 	args.gopts = gopts;
1013 
1014 	fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
1015 
1016 	uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size);
1017 	memset(gopts->area_src, 0, poison_pages * gopts->page_size);
1018 
1019 	args.handle_fault = uffd_poison_handle_fault;
1020 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1021 		err("uffd_poll_thread create");
1022 
1023 	sigbuf = &jbuf;
1024 	act.sa_sigaction = sighndl;
1025 	act.sa_flags = SA_SIGINFO;
1026 	if (sigaction(SIGBUS, &act, 0))
1027 		err("sigaction");
1028 
1029 	for (nr = 0; nr < poison_pages; ++nr) {
1030 		unsigned long offset = nr * gopts->page_size;
1031 		const char *bytes = (const char *) gopts->area_dst + offset;
1032 		const char *i;
1033 
1034 		if (sigsetjmp(*sigbuf, 1)) {
1035 			/*
1036 			 * Access below triggered a SIGBUS, which was caught by
1037 			 * sighndl, which then jumped here. Count this SIGBUS,
1038 			 * and move on to next page.
1039 			 */
1040 			++nr_sigbus;
1041 			continue;
1042 		}
1043 
1044 		for (i = bytes; i < bytes + gopts->page_size; ++i) {
1045 			if (*i)
1046 				err("nonzero byte in area_dst (%p) at %p: %u",
1047 				    gopts->area_dst, i, *i);
1048 		}
1049 	}
1050 
1051 	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
1052 		err("pipe write");
1053 	if (pthread_join(uffd_mon, NULL))
1054 		err("pthread_join()");
1055 
1056 	if (nr_sigbus != poison_pages / 2)
1057 		err("expected to receive %lu SIGBUS, actually received %lu",
1058 		    poison_pages / 2, nr_sigbus);
1059 
1060 	uffd_test_pass();
1061 }
1062 
1063 static void
1064 uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts,
1065 			      struct uffd_msg *msg,
1066 			      struct uffd_args *args,
1067 			      unsigned long len)
1068 {
1069 	unsigned long offset;
1070 
1071 	if (msg->event != UFFD_EVENT_PAGEFAULT)
1072 		err("unexpected msg event %u", msg->event);
1073 
1074 	if (msg->arg.pagefault.flags &
1075 	    (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
1076 		err("unexpected fault type %llu", msg->arg.pagefault.flags);
1077 
1078 	offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
1079 	offset &= ~(len-1);
1080 
1081 	if (move_page(gopts, offset, len))
1082 		args->missing_faults++;
1083 }
1084 
1085 static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
1086 				   struct uffd_args *args)
1087 {
1088 	uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size);
1089 }
1090 
1091 static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
1092 				       struct uffd_args *args)
1093 {
1094 	uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize());
1095 }
1096 
1097 static void
1098 uffd_move_test_common(uffd_global_test_opts_t *gopts,
1099 		      uffd_test_args_t *targs,
1100 		      unsigned long chunk_size,
1101 		      void (*handle_fault)(struct uffd_global_test_opts *gopts,
1102 		      struct uffd_msg *msg, struct uffd_args *args)
1103 )
1104 {
1105 	unsigned long nr;
1106 	pthread_t uffd_mon;
1107 	char c = '\0';
1108 	unsigned long long count;
1109 	struct uffd_args args = { 0 };
1110 	char *orig_area_src = NULL, *orig_area_dst = NULL;
1111 	unsigned long step_size, step_count;
1112 	unsigned long src_offs = 0;
1113 	unsigned long dst_offs = 0;
1114 
1115 	args.gopts = gopts;
1116 
1117 	/* Prevent source pages from being mapped more than once */
1118 	if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK))
1119 		err("madvise(MADV_DONTFORK) failure");
1120 
1121 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
1122 			  true, false, false))
1123 		err("register failure");
1124 
1125 	args.handle_fault = handle_fault;
1126 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1127 		err("uffd_poll_thread create");
1128 
1129 	step_size = chunk_size / gopts->page_size;
1130 	step_count = gopts->nr_pages / step_size;
1131 
1132 	if (chunk_size > gopts->page_size) {
1133 		char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size);
1134 		char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size);
1135 
1136 		if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) {
1137 			src_offs = (aligned_src - gopts->area_src) / gopts->page_size;
1138 			dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size;
1139 			step_count--;
1140 		}
1141 		orig_area_src = gopts->area_src;
1142 		orig_area_dst = gopts->area_dst;
1143 		gopts->area_src = aligned_src;
1144 		gopts->area_dst = aligned_dst;
1145 	}
1146 
1147 	/*
1148 	 * Read each of the pages back using the UFFD-registered mapping. We
1149 	 * expect that the first time we touch a page, it will result in a missing
1150 	 * fault. uffd_poll_thread will resolve the fault by moving source
1151 	 * page to destination.
1152 	 */
1153 	for (nr = 0; nr < step_count * step_size; nr += step_size) {
1154 		unsigned long i;
1155 
1156 		/* Check area_src content */
1157 		for (i = 0; i < step_size; i++) {
1158 			count = *area_count(gopts->area_src, nr + i, gopts);
1159 			if (count != gopts->count_verify[src_offs + nr + i])
1160 				err("nr %lu source memory invalid %llu %llu\n",
1161 				    nr + i, count, gopts->count_verify[src_offs + nr + i]);
1162 		}
1163 
1164 		/* Faulting into area_dst should move the page or the huge page */
1165 		for (i = 0; i < step_size; i++) {
1166 			count = *area_count(gopts->area_dst, nr + i, gopts);
1167 			if (count != gopts->count_verify[dst_offs + nr + i])
1168 				err("nr %lu memory corruption %llu %llu\n",
1169 				    nr, count, gopts->count_verify[dst_offs + nr + i]);
1170 		}
1171 
1172 		/* Re-check area_src content which should be empty */
1173 		for (i = 0; i < step_size; i++) {
1174 			count = *area_count(gopts->area_src, nr + i, gopts);
1175 			if (count != 0)
1176 				err("nr %lu move failed %llu %llu\n",
1177 				    nr, count, gopts->count_verify[src_offs + nr + i]);
1178 		}
1179 	}
1180 	if (chunk_size > gopts->page_size) {
1181 		gopts->area_src = orig_area_src;
1182 		gopts->area_dst = orig_area_dst;
1183 	}
1184 
1185 	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
1186 		err("pipe write");
1187 	if (pthread_join(uffd_mon, NULL))
1188 		err("join() failed");
1189 
1190 	if (args.missing_faults != step_count || args.minor_faults != 0)
1191 		uffd_test_fail("stats check error");
1192 	else
1193 		uffd_test_pass();
1194 }
1195 
1196 static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1197 {
1198 	uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault);
1199 }
1200 
1201 static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1202 {
1203 	if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE))
1204 		err("madvise(MADV_HUGEPAGE) failure");
1205 	uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
1206 			      uffd_move_pmd_handle_fault);
1207 }
1208 
1209 static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1210 {
1211 	if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE))
1212 		err("madvise(MADV_NOHUGEPAGE) failure");
1213 	uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
1214 			      uffd_move_pmd_handle_fault);
1215 }
1216 
1217 static bool
1218 uffdio_verify_results(const char *name, int ret, int error, long result)
1219 {
1220 	/*
1221 	 * Should always return -1 with errno=EAGAIN, with corresponding
1222 	 * result field updated in ioctl() args to be -EAGAIN too
1223 	 * (e.g. copy.copy field for UFFDIO_COPY).
1224 	 */
1225 	if (ret != -1) {
1226 		uffd_test_fail("%s should have returned -1", name);
1227 		return false;
1228 	}
1229 
1230 	if (error != EAGAIN) {
1231 		uffd_test_fail("%s should have errno==EAGAIN", name);
1232 		return false;
1233 	}
1234 
1235 	if (result != -EAGAIN) {
1236 		uffd_test_fail("%s should have been updated for -EAGAIN",
1237 			       name);
1238 		return false;
1239 	}
1240 
1241 	return true;
1242 }
1243 
1244 /*
1245  * This defines a function to test one ioctl.  Note that here "field" can
1246  * be 1 or anything not -EAGAIN.  With that initial value set, we can
1247  * verify later that it should be updated by kernel (when -EAGAIN
1248  * returned), by checking whether it is also updated to -EAGAIN.
1249  */
1250 #define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field)		\
1251 	static bool uffdio_mmap_changing_test_##name(int fd)		\
1252 	{								\
1253 		int ret;						\
1254 		struct uffdio_##name args = {				\
1255 			.field = 1,					\
1256 		};							\
1257 		ret = ioctl(fd, ioctl_name, &args);			\
1258 		return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
1259 	}
1260 
1261 DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
1262 DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
1263 DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
1264 DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
1265 DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
1266 
1267 typedef enum {
1268 	/* We actually do not care about any state except UNINTERRUPTIBLE.. */
1269 	THR_STATE_UNKNOWN = 0,
1270 	THR_STATE_UNINTERRUPTIBLE,
1271 } thread_state;
1272 
1273 typedef struct {
1274 	uffd_global_test_opts_t *gopts;
1275 	volatile pid_t *pid;
1276 } mmap_changing_thread_args;
1277 
1278 static void sleep_short(void)
1279 {
1280 	usleep(1000);
1281 }
1282 
1283 static thread_state thread_state_get(pid_t tid)
1284 {
1285 	const char *header = "State:\t";
1286 	char tmp[256], *p, c;
1287 	FILE *fp;
1288 
1289 	snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
1290 	fp = fopen(tmp, "r");
1291 
1292 	if (!fp)
1293 		return THR_STATE_UNKNOWN;
1294 
1295 	while (fgets(tmp, sizeof(tmp), fp)) {
1296 		p = strstr(tmp, header);
1297 		if (p) {
1298 			/* For example, "State:\tD (disk sleep)" */
1299 			c = *(p + strlen(header));
1300 			return c == 'D' ?
1301 			    THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
1302 		}
1303 	}
1304 
1305 	return THR_STATE_UNKNOWN;
1306 }
1307 
1308 static void thread_state_until(pid_t tid, thread_state state)
1309 {
1310 	thread_state s;
1311 
1312 	do {
1313 		s = thread_state_get(tid);
1314 		sleep_short();
1315 	} while (s != state);
1316 }
1317 
1318 static void *uffd_mmap_changing_thread(void *opaque)
1319 {
1320 	mmap_changing_thread_args *args = opaque;
1321 	uffd_global_test_opts_t *gopts = args->gopts;
1322 	volatile pid_t *pid = args->pid;
1323 	int ret;
1324 
1325 	/* Unfortunately, it's only fetch-able from the thread itself.. */
1326 	assert(*pid == 0);
1327 	*pid = syscall(SYS_gettid);
1328 
1329 	/* Inject an event, this will hang solid until the event read */
1330 	ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE);
1331 	if (ret)
1332 		err("madvise(MADV_REMOVE) failed");
1333 
1334 	return NULL;
1335 }
1336 
1337 static void uffd_consume_message(uffd_global_test_opts_t *gopts)
1338 {
1339 	struct uffd_msg msg = { 0 };
1340 
1341 	while (uffd_read_msg(gopts, &msg));
1342 }
1343 
1344 static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1345 {
1346 	/*
1347 	 * This stores the real PID (which can be different from how tid is
1348 	 * defined..) for the child thread, 0 means not initialized.
1349 	 */
1350 	pid_t pid = 0;
1351 	pthread_t tid;
1352 	int ret;
1353 	mmap_changing_thread_args args = { gopts, &pid };
1354 
1355 	if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
1356 			  true, false, false))
1357 		err("uffd_register() failed");
1358 
1359 	/* Create a thread to generate the racy event */
1360 	ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args);
1361 	if (ret)
1362 		err("pthread_create() failed");
1363 
1364 	/*
1365 	 * Wait until the thread setup the pid.  Use volatile to make sure
1366 	 * it reads from RAM not regs.
1367 	 */
1368 	while (!(volatile pid_t)pid)
1369 		sleep_short();
1370 
1371 	/* Wait until the thread hangs at REMOVE event */
1372 	thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
1373 
1374 	if (!uffdio_mmap_changing_test_copy(gopts->uffd))
1375 		return;
1376 
1377 	if (!uffdio_mmap_changing_test_zeropage(gopts->uffd))
1378 		return;
1379 
1380 	if (!uffdio_mmap_changing_test_move(gopts->uffd))
1381 		return;
1382 
1383 	if (!uffdio_mmap_changing_test_poison(gopts->uffd))
1384 		return;
1385 
1386 	if (!uffdio_mmap_changing_test_continue(gopts->uffd))
1387 		return;
1388 
1389 	/*
1390 	 * All succeeded above!  Recycle everything.  Start by reading the
1391 	 * event so as to kick the thread roll again..
1392 	 */
1393 	uffd_consume_message(gopts);
1394 
1395 	ret = pthread_join(tid, NULL);
1396 	assert(ret == 0);
1397 
1398 	uffd_test_pass();
1399 }
1400 
1401 static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
1402 {
1403 	/* This should be done before source area is populated */
1404 	if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) {
1405 		/* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
1406 		if (errno != EINVAL) {
1407 			if (errmsg)
1408 				*errmsg = "madvise(MADV_NOHUGEPAGE) failed";
1409 			return -errno;
1410 		}
1411 	}
1412 	return 0;
1413 }
1414 
1415 static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
1416 {
1417 	/* This should be done before source area is populated */
1418 	if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) {
1419 		if (errmsg) {
1420 			*errmsg = (errno == EINVAL) ?
1421 				"CONFIG_TRANSPARENT_HUGEPAGE is not set" :
1422 				"madvise(MADV_HUGEPAGE) failed";
1423 		}
1424 		return -errno;
1425 	}
1426 	return 0;
1427 }
1428 
1429 struct uffd_test_case_ops uffd_move_test_case_ops = {
1430 	.post_alloc = prevent_hugepages,
1431 };
1432 
1433 struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
1434 	.post_alloc = request_hugepages,
1435 };
1436 
1437 /*
1438  * Test the returned uffdio_register.ioctls with different register modes.
1439  * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
1440  */
1441 static void
1442 do_register_ioctls_test(uffd_global_test_opts_t *gopts,
1443 			uffd_test_args_t *args,
1444 			bool miss,
1445 			bool wp,
1446 			bool minor)
1447 {
1448 	uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
1449 	mem_type_t *mem_type = args->mem_type;
1450 	int ret;
1451 
1452 	ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size,
1453 					miss, wp, minor, &ioctls);
1454 
1455 	/*
1456 	 * Handle special cases of UFFDIO_REGISTER here where it should
1457 	 * just fail with -EINVAL first..
1458 	 *
1459 	 * Case 1: register MINOR on anon
1460 	 * Case 2: register with no mode selected
1461 	 */
1462 	if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
1463 	    (!miss && !wp && !minor)) {
1464 		if (ret != -EINVAL)
1465 			err("register (miss=%d, wp=%d, minor=%d) failed "
1466 			    "with wrong errno=%d", miss, wp, minor, ret);
1467 		return;
1468 	}
1469 
1470 	/* UFFDIO_REGISTER should succeed, then check ioctls returned */
1471 	if (miss)
1472 		expected |= BIT_ULL(_UFFDIO_COPY);
1473 	if (wp)
1474 		expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
1475 	if (minor)
1476 		expected |= BIT_ULL(_UFFDIO_CONTINUE);
1477 
1478 	if ((ioctls & expected) != expected)
1479 		err("unexpected uffdio_register.ioctls "
1480 		    "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
1481 		    "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
1482 
1483 	if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
1484 		err("unregister");
1485 }
1486 
1487 static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
1488 {
1489 	int miss, wp, minor;
1490 
1491 	for (miss = 0; miss <= 1; miss++)
1492 		for (wp = 0; wp <= 1; wp++)
1493 			for (minor = 0; minor <= 1; minor++)
1494 				do_register_ioctls_test(gopts, args, miss, wp, minor);
1495 
1496 	uffd_test_pass();
1497 }
1498 
1499 uffd_test_case_t uffd_tests[] = {
1500 	{
1501 		/* Test returned uffdio_register.ioctls. */
1502 		.name = "register-ioctls",
1503 		.uffd_fn = uffd_register_ioctls_test,
1504 		.mem_targets = MEM_ALL,
1505 		.uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
1506 		UFFD_FEATURE_MISSING_SHMEM |
1507 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1508 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1509 		UFFD_FEATURE_MINOR_HUGETLBFS |
1510 		UFFD_FEATURE_MINOR_SHMEM,
1511 	},
1512 	{
1513 		.name = "zeropage",
1514 		.uffd_fn = uffd_zeropage_test,
1515 		.mem_targets = MEM_ALL,
1516 		.uffd_feature_required = 0,
1517 	},
1518 	{
1519 		.name = "move",
1520 		.uffd_fn = uffd_move_test,
1521 		.mem_targets = MEM_ANON,
1522 		.uffd_feature_required = UFFD_FEATURE_MOVE,
1523 		.test_case_ops = &uffd_move_test_case_ops,
1524 	},
1525 	{
1526 		.name = "move-pmd",
1527 		.uffd_fn = uffd_move_pmd_test,
1528 		.mem_targets = MEM_ANON,
1529 		.uffd_feature_required = UFFD_FEATURE_MOVE,
1530 		.test_case_ops = &uffd_move_test_pmd_case_ops,
1531 	},
1532 	{
1533 		.name = "move-pmd-split",
1534 		.uffd_fn = uffd_move_pmd_split_test,
1535 		.mem_targets = MEM_ANON,
1536 		.uffd_feature_required = UFFD_FEATURE_MOVE,
1537 		.test_case_ops = &uffd_move_test_pmd_case_ops,
1538 	},
1539 	{
1540 		.name = "wp-fork",
1541 		.uffd_fn = uffd_wp_fork_test,
1542 		.mem_targets = MEM_ALL,
1543 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1544 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1545 	},
1546 	{
1547 		.name = "wp-fork-with-event",
1548 		.uffd_fn = uffd_wp_fork_with_event_test,
1549 		.mem_targets = MEM_ALL,
1550 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1551 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1552 		/* when set, child process should inherit uffd-wp bits */
1553 		UFFD_FEATURE_EVENT_FORK,
1554 	},
1555 	{
1556 		.name = "wp-fork-pin",
1557 		.uffd_fn = uffd_wp_fork_pin_test,
1558 		.mem_targets = MEM_ALL,
1559 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1560 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1561 	},
1562 	{
1563 		.name = "wp-fork-pin-with-event",
1564 		.uffd_fn = uffd_wp_fork_pin_with_event_test,
1565 		.mem_targets = MEM_ALL,
1566 		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1567 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1568 		/* when set, child process should inherit uffd-wp bits */
1569 		UFFD_FEATURE_EVENT_FORK,
1570 	},
1571 	{
1572 		.name = "wp-unpopulated",
1573 		.uffd_fn = uffd_wp_unpopulated_test,
1574 		.mem_targets = MEM_ANON,
1575 		.uffd_feature_required =
1576 		UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
1577 	},
1578 	{
1579 		.name = "minor",
1580 		.uffd_fn = uffd_minor_test,
1581 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1582 		.uffd_feature_required =
1583 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
1584 	},
1585 	{
1586 		.name = "minor-wp",
1587 		.uffd_fn = uffd_minor_wp_test,
1588 		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1589 		.uffd_feature_required =
1590 		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
1591 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1592 		/*
1593 		 * HACK: here we leveraged WP_UNPOPULATED to detect whether
1594 		 * minor mode supports wr-protect.  There's no feature flag
1595 		 * for it so this is the best we can test against.
1596 		 */
1597 		UFFD_FEATURE_WP_UNPOPULATED,
1598 	},
1599 	{
1600 		.name = "minor-collapse",
1601 		.uffd_fn = uffd_minor_collapse_test,
1602 		/* MADV_COLLAPSE only works with shmem */
1603 		.mem_targets = MEM_SHMEM,
1604 		/* We can't test MADV_COLLAPSE, so try our luck */
1605 		.uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
1606 	},
1607 	{
1608 		.name = "sigbus",
1609 		.uffd_fn = uffd_sigbus_test,
1610 		.mem_targets = MEM_ALL,
1611 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1612 		UFFD_FEATURE_EVENT_FORK,
1613 	},
1614 	{
1615 		.name = "sigbus-wp",
1616 		.uffd_fn = uffd_sigbus_wp_test,
1617 		.mem_targets = MEM_ALL,
1618 		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1619 		UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1620 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1621 	},
1622 	{
1623 		.name = "events",
1624 		.uffd_fn = uffd_events_test,
1625 		.mem_targets = MEM_ALL,
1626 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1627 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
1628 	},
1629 	{
1630 		.name = "events-wp",
1631 		.uffd_fn = uffd_events_wp_test,
1632 		.mem_targets = MEM_ALL,
1633 		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1634 		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
1635 		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1636 		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1637 	},
1638 	{
1639 		.name = "poison",
1640 		.uffd_fn = uffd_poison_test,
1641 		.mem_targets = MEM_ALL,
1642 		.uffd_feature_required = UFFD_FEATURE_POISON,
1643 	},
1644 	{
1645 		.name = "mmap-changing",
1646 		.uffd_fn = uffd_mmap_changing_test,
1647 		/*
1648 		 * There's no point running this test over all mem types as
1649 		 * they share the same code paths.
1650 		 *
1651 		 * Choose shmem for simplicity, because (1) shmem supports
1652 		 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
1653 		 * almost always available (unlike hugetlb).  Here we
1654 		 * abused SHMEM for UFFDIO_MOVE, but the test we want to
1655 		 * cover doesn't yet need the correct memory type..
1656 		 */
1657 		.mem_targets = MEM_SHMEM,
1658 		/*
1659 		 * Any UFFD_FEATURE_EVENT_* should work to trigger the
1660 		 * race logically, but choose the simplest (REMOVE).
1661 		 *
1662 		 * Meanwhile, since we'll cover quite a few new ioctl()s
1663 		 * (CONTINUE, POISON, MOVE), skip this test for old kernels
1664 		 * by choosing all of them.
1665 		 */
1666 		.uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
1667 		UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
1668 		UFFD_FEATURE_MINOR_SHMEM,
1669 	},
1670 };
1671 
1672 static void usage(const char *prog)
1673 {
1674 	printf("usage: %s [-f TESTNAME]\n", prog);
1675 	puts("");
1676 	puts(" -f: test name to filter (e.g., event)");
1677 	puts(" -h: show the help msg");
1678 	puts(" -l: list tests only");
1679 	puts("");
1680 	exit(KSFT_FAIL);
1681 }
1682 
1683 static int uffd_count_tests(int n_tests, int n_mems, const char *test_filter)
1684 {
1685 	uffd_test_case_t *test;
1686 	int i, j, count = 0;
1687 
1688 	if (!test_filter)
1689 		count += 2;	/* test_uffd_api(false) + test_uffd_api(true) */
1690 
1691 	for (i = 0; i < n_tests; i++) {
1692 		test = &uffd_tests[i];
1693 		if (test_filter && !strstr(test->name, test_filter))
1694 			continue;
1695 		for (j = 0; j < n_mems; j++)
1696 			if (test->mem_targets & mem_types[j].mem_flag)
1697 				count++;
1698 	}
1699 
1700 	return count;
1701 }
1702 
1703 static unsigned long uffd_setup_hugetlb(void)
1704 {
1705 	unsigned long nr_hugepages, hp_size;
1706 
1707 	hugetlb_save_settings();
1708 	hp_size = default_huge_page_size();
1709 
1710 	if (!hp_size)
1711 		return 0;
1712 
1713 	/* need twice UFFD_TEST_MEM_SIZE, one for src area and one for dst */
1714 	nr_hugepages = 2 * MAX(UFFD_TEST_MEM_SIZE, hp_size * 2) / hp_size;
1715 	hugetlb_set_nr_default_pages(nr_hugepages);
1716 
1717 	if (hugetlb_free_default_pages() < nr_hugepages)
1718 		return 0;
1719 
1720 	return hp_size;
1721 }
1722 
1723 int main(int argc, char *argv[])
1724 {
1725 	int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
1726 	int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
1727 	const char *test_filter = NULL;
1728 	unsigned long hugepage_size;
1729 	bool list_only = false;
1730 	uffd_test_case_t *test;
1731 	mem_type_t *mem_type;
1732 	uffd_test_args_t args;
1733 	const char *errmsg;
1734 	int i, j, opt;
1735 
1736 	while ((opt = getopt(argc, argv, "f:hl")) != -1) {
1737 		switch (opt) {
1738 		case 'f':
1739 			test_filter = optarg;
1740 			break;
1741 		case 'l':
1742 			list_only = true;
1743 			break;
1744 		case 'h':
1745 		default:
1746 			/* Unknown */
1747 			usage(argv[0]);
1748 			break;
1749 		}
1750 	}
1751 
1752 	if (list_only) {
1753 		for (i = 0; i < n_tests; i++) {
1754 			test = &uffd_tests[i];
1755 			if (test_filter && !strstr(test->name, test_filter))
1756 				continue;
1757 			printf("%s\n", test->name);
1758 		}
1759 		return KSFT_PASS;
1760 	}
1761 
1762 	hugepage_size = uffd_setup_hugetlb();
1763 
1764 	ksft_print_header();
1765 	ksft_set_plan(uffd_count_tests(n_tests, n_mems, test_filter));
1766 
1767 	if (!test_filter) {
1768 		test_uffd_api(false);
1769 		test_uffd_api(true);
1770 	}
1771 
1772 	for (i = 0; i < n_tests; i++) {
1773 		test = &uffd_tests[i];
1774 		if (test_filter && !strstr(test->name, test_filter))
1775 			continue;
1776 		for (j = 0; j < n_mems; j++) {
1777 			mem_type = &mem_types[j];
1778 
1779 			/* Initialize global test options */
1780 			uffd_global_test_opts_t gopts = { 0 };
1781 
1782 			gopts.map_shared = mem_type->shared;
1783 			uffd_test_ops = mem_type->mem_ops;
1784 			uffd_test_case_ops = test->test_case_ops;
1785 
1786 			if (!(test->mem_targets & mem_type->mem_flag))
1787 				continue;
1788 
1789 			uffd_test_start("%s on %s", test->name, mem_type->name);
1790 			if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
1791 				gopts.page_size = hugepage_size;
1792 				if (gopts.page_size == 0) {
1793 					uffd_test_skip("not enough HugeTLB pages");
1794 					continue;
1795 				}
1796 			} else {
1797 				gopts.page_size = psize();
1798 			}
1799 
1800 			/* Ensure we have at least 2 pages */
1801 			gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
1802 				/ gopts.page_size;
1803 
1804 			gopts.nr_parallel = 1;
1805 
1806 			/* Initialize test arguments */
1807 			args.mem_type = mem_type;
1808 
1809 			if (!uffd_feature_supported(test)) {
1810 				uffd_test_skip("feature missing");
1811 				continue;
1812 			}
1813 			if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) {
1814 				uffd_test_skip(errmsg);
1815 				continue;
1816 			}
1817 			test->uffd_fn(&gopts, &args);
1818 			uffd_test_ctx_clear(&gopts);
1819 		}
1820 	}
1821 
1822 	ksft_finished();
1823 }
1824 
1825 #else /* __NR_userfaultfd */
1826 
1827 #warning "missing __NR_userfaultfd definition"
1828 
1829 int main(void)
1830 {
1831 	ksft_print_header();
1832 	ksft_exit_skip("missing __NR_userfaultfd definition\n");
1833 }
1834 
1835 #endif /* __NR_userfaultfd */
1836