1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Userfaultfd unit tests.
4 *
5 * Copyright (C) 2015-2023 Red Hat, Inc.
6 */
7
8 #include "uffd-common.h"
9
10 #include "../../../../mm/gup_test.h"
11
12 #ifdef __NR_userfaultfd
13
14 /* The unit test doesn't need a large or random size, make it 32MB for now */
15 #define UFFD_TEST_MEM_SIZE (32UL << 20)
16
17 #define MEM_ANON BIT_ULL(0)
18 #define MEM_SHMEM BIT_ULL(1)
19 #define MEM_SHMEM_PRIVATE BIT_ULL(2)
20 #define MEM_HUGETLB BIT_ULL(3)
21 #define MEM_HUGETLB_PRIVATE BIT_ULL(4)
22
23 #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
24 MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
25
26 #define ALIGN_UP(x, align_to) \
27 ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
28
29 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
30
31 struct mem_type {
32 const char *name;
33 unsigned int mem_flag;
34 uffd_test_ops_t *mem_ops;
35 bool shared;
36 };
37 typedef struct mem_type mem_type_t;
38
39 mem_type_t mem_types[] = {
40 {
41 .name = "anon",
42 .mem_flag = MEM_ANON,
43 .mem_ops = &anon_uffd_test_ops,
44 .shared = false,
45 },
46 {
47 .name = "shmem",
48 .mem_flag = MEM_SHMEM,
49 .mem_ops = &shmem_uffd_test_ops,
50 .shared = true,
51 },
52 {
53 .name = "shmem-private",
54 .mem_flag = MEM_SHMEM_PRIVATE,
55 .mem_ops = &shmem_uffd_test_ops,
56 .shared = false,
57 },
58 {
59 .name = "hugetlb",
60 .mem_flag = MEM_HUGETLB,
61 .mem_ops = &hugetlb_uffd_test_ops,
62 .shared = true,
63 },
64 {
65 .name = "hugetlb-private",
66 .mem_flag = MEM_HUGETLB_PRIVATE,
67 .mem_ops = &hugetlb_uffd_test_ops,
68 .shared = false,
69 },
70 };
71
72 /* Arguments to be passed over to each uffd unit test */
73 struct uffd_test_args {
74 mem_type_t *mem_type;
75 };
76 typedef struct uffd_test_args uffd_test_args_t;
77
78 /* Returns: UFFD_TEST_* */
79 typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *);
80
81 typedef struct {
82 const char *name;
83 uffd_test_fn uffd_fn;
84 unsigned int mem_targets;
85 uint64_t uffd_feature_required;
86 uffd_test_case_ops_t *test_case_ops;
87 } uffd_test_case_t;
88
uffd_test_report(void)89 static void uffd_test_report(void)
90 {
91 printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
92 ksft_get_pass_cnt(),
93 ksft_get_xskip_cnt(),
94 ksft_get_fail_cnt(),
95 ksft_test_num());
96 }
97
uffd_test_pass(void)98 static void uffd_test_pass(void)
99 {
100 printf("done\n");
101 ksft_inc_pass_cnt();
102 }
103
104 #define uffd_test_start(...) do { \
105 printf("Testing "); \
106 printf(__VA_ARGS__); \
107 printf("... "); \
108 fflush(stdout); \
109 } while (0)
110
111 #define uffd_test_fail(...) do { \
112 printf("failed [reason: "); \
113 printf(__VA_ARGS__); \
114 printf("]\n"); \
115 ksft_inc_fail_cnt(); \
116 } while (0)
117
uffd_test_skip(const char * message)118 static void uffd_test_skip(const char *message)
119 {
120 printf("skipped [reason: %s]\n", message);
121 ksft_inc_xskip_cnt();
122 }
123
124 /*
125 * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
126 * return 1 even if some test failed as long as uffd supported, because in
127 * that case we still want to proceed with the rest uffd unit tests.
128 */
test_uffd_api(bool use_dev)129 static int test_uffd_api(bool use_dev)
130 {
131 struct uffdio_api uffdio_api;
132 int uffd;
133
134 uffd_test_start("UFFDIO_API (with %s)",
135 use_dev ? "/dev/userfaultfd" : "syscall");
136
137 if (use_dev)
138 uffd = uffd_open_dev(UFFD_FLAGS);
139 else
140 uffd = uffd_open_sys(UFFD_FLAGS);
141 if (uffd < 0) {
142 uffd_test_skip("cannot open userfaultfd handle");
143 return 0;
144 }
145
146 /* Test wrong UFFD_API */
147 uffdio_api.api = 0xab;
148 uffdio_api.features = 0;
149 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
150 uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
151 goto out;
152 }
153
154 /* Test wrong feature bit */
155 uffdio_api.api = UFFD_API;
156 uffdio_api.features = BIT_ULL(63);
157 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
158 uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
159 goto out;
160 }
161
162 /* Test normal UFFDIO_API */
163 uffdio_api.api = UFFD_API;
164 uffdio_api.features = 0;
165 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
166 uffd_test_fail("UFFDIO_API should succeed but failed");
167 goto out;
168 }
169
170 /* Test double requests of UFFDIO_API with a random feature set */
171 uffdio_api.features = BIT_ULL(0);
172 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
173 uffd_test_fail("UFFDIO_API should reject initialized uffd");
174 goto out;
175 }
176
177 uffd_test_pass();
178 out:
179 close(uffd);
180 /* We have a valid uffd handle */
181 return 1;
182 }
183
184
uffd_feature_supported(uffd_test_case_t * test)185 static bool uffd_feature_supported(uffd_test_case_t *test)
186 {
187 uint64_t features;
188
189 if (uffd_get_features(&features))
190 return false;
191
192 return (features & test->uffd_feature_required) ==
193 test->uffd_feature_required;
194 }
195
pagemap_open(void)196 static int pagemap_open(void)
197 {
198 int fd = open("/proc/self/pagemap", O_RDONLY);
199
200 if (fd < 0)
201 err("open pagemap");
202
203 return fd;
204 }
205
206 /* This macro let __LINE__ works in err() */
207 #define pagemap_check_wp(value, wp) do { \
208 if (!!(value & PM_UFFD_WP) != wp) \
209 err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
210 } while (0)
211
212 typedef struct {
213 uffd_global_test_opts_t *gopts;
214 int child_uffd;
215 } fork_event_args;
216
fork_event_consumer(void * data)217 static void *fork_event_consumer(void *data)
218 {
219 fork_event_args *args = data;
220 struct uffd_msg msg = { 0 };
221
222 args->gopts->ready_for_fork = true;
223
224 /* Read until a full msg received */
225 while (uffd_read_msg(args->gopts, &msg));
226
227 if (msg.event != UFFD_EVENT_FORK)
228 err("wrong message: %u\n", msg.event);
229
230 /* Just to be properly freed later */
231 args->child_uffd = msg.arg.fork.ufd;
232 return NULL;
233 }
234
235 typedef struct {
236 int gup_fd;
237 bool pinned;
238 } pin_args;
239
240 /*
241 * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired
242 * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy
243 * all needs of the test cases (e.g., trigger unshare, trigger fork() early
244 * CoW, etc.).
245 */
pin_pages(pin_args * args,void * buffer,size_t size)246 static int pin_pages(pin_args *args, void *buffer, size_t size)
247 {
248 struct pin_longterm_test test = {
249 .addr = (uintptr_t)buffer,
250 .size = size,
251 /* Read-only pins */
252 .flags = 0,
253 };
254
255 if (args->pinned)
256 err("already pinned");
257
258 args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
259 if (args->gup_fd < 0)
260 return -errno;
261
262 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
263 /* Even if gup_test existed, can be an old gup_test / kernel */
264 close(args->gup_fd);
265 return -errno;
266 }
267 args->pinned = true;
268 return 0;
269 }
270
unpin_pages(pin_args * args)271 static void unpin_pages(pin_args *args)
272 {
273 if (!args->pinned)
274 err("unpin without pin first");
275 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
276 err("PIN_LONGTERM_TEST_STOP");
277 close(args->gup_fd);
278 args->pinned = false;
279 }
280
pagemap_test_fork(uffd_global_test_opts_t * gopts,bool with_event,bool test_pin)281 static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin)
282 {
283 fork_event_args args = { .gopts = gopts, .child_uffd = -1 };
284 pthread_t thread;
285 pid_t child;
286 uint64_t value;
287 int fd, result;
288
289 /* Prepare a thread to resolve EVENT_FORK */
290 if (with_event) {
291 gopts->ready_for_fork = false;
292 if (pthread_create(&thread, NULL, fork_event_consumer, &args))
293 err("pthread_create()");
294 while (!gopts->ready_for_fork)
295 ; /* Wait for the poll_thread to start executing before forking */
296 }
297
298 child = fork();
299 if (!child) {
300 /* Open the pagemap fd of the child itself */
301 pin_args args = {};
302
303 fd = pagemap_open();
304
305 if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size))
306 /*
307 * Normally when reach here we have pinned in
308 * previous tests, so shouldn't fail anymore
309 */
310 err("pin page failed in child");
311
312 value = pagemap_get_entry(fd, gopts->area_dst);
313 /*
314 * After fork(), we should handle uffd-wp bit differently:
315 *
316 * (1) when with EVENT_FORK, it should persist
317 * (2) when without EVENT_FORK, it should be dropped
318 */
319 pagemap_check_wp(value, with_event);
320 if (test_pin)
321 unpin_pages(&args);
322 /* Succeed */
323 exit(0);
324 }
325 waitpid(child, &result, 0);
326
327 if (with_event) {
328 if (pthread_join(thread, NULL))
329 err("pthread_join()");
330 if (args.child_uffd < 0)
331 err("Didn't receive child uffd");
332 close(args.child_uffd);
333 }
334
335 return result;
336 }
337
uffd_wp_unpopulated_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)338 static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
339 {
340 uint64_t value;
341 int pagemap_fd;
342
343 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
344 false, true, false))
345 err("register failed");
346
347 pagemap_fd = pagemap_open();
348
349 /* Test applying pte marker to anon unpopulated */
350 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
351 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
352 pagemap_check_wp(value, true);
353
354 /* Test unprotect on anon pte marker */
355 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
356 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
357 pagemap_check_wp(value, false);
358
359 /* Test zap on anon marker */
360 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
361 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
362 err("madvise(MADV_DONTNEED) failed");
363 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
364 pagemap_check_wp(value, false);
365
366 /* Test fault in after marker removed */
367 *gopts->area_dst = 1;
368 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
369 pagemap_check_wp(value, false);
370 /* Drop it to make pte none again */
371 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
372 err("madvise(MADV_DONTNEED) failed");
373
374 /* Test read-zero-page upon pte marker */
375 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
376 *(volatile char *)gopts->area_dst;
377 /* Drop it to make pte none again */
378 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
379 err("madvise(MADV_DONTNEED) failed");
380
381 uffd_test_pass();
382 }
383
uffd_wp_fork_test_common(uffd_global_test_opts_t * gopts,uffd_test_args_t * args,bool with_event)384 static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args,
385 bool with_event)
386 {
387 int pagemap_fd;
388 uint64_t value;
389
390 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
391 false, true, false))
392 err("register failed");
393
394 pagemap_fd = pagemap_open();
395
396 /* Touch the page */
397 *gopts->area_dst = 1;
398 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
399 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
400 pagemap_check_wp(value, true);
401 if (pagemap_test_fork(gopts, with_event, false)) {
402 uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
403 with_event ? "missing" : "stall");
404 goto out;
405 }
406
407 /*
408 * This is an attempt for zapping the pgtable so as to test the
409 * markers.
410 *
411 * For private mappings, PAGEOUT will only work on exclusive ptes
412 * (PM_MMAP_EXCLUSIVE) which we should satisfy.
413 *
414 * For shared, PAGEOUT may not work. Use DONTNEED instead which
415 * plays a similar role of zapping (rather than freeing the page)
416 * to expose pte markers.
417 */
418 if (args->mem_type->shared) {
419 if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
420 err("MADV_DONTNEED");
421 } else {
422 /*
423 * NOTE: ignore retval because private-hugetlb doesn't yet
424 * support swapping, so it could fail.
425 */
426 madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT);
427 }
428
429 /* Uffd-wp should persist even swapped out */
430 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
431 pagemap_check_wp(value, true);
432 if (pagemap_test_fork(gopts, with_event, false)) {
433 uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
434 with_event ? "missing" : "stall");
435 goto out;
436 }
437
438 /* Unprotect; this tests swap pte modifications */
439 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
440 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
441 pagemap_check_wp(value, false);
442
443 /* Fault in the page from disk */
444 *gopts->area_dst = 2;
445 value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
446 pagemap_check_wp(value, false);
447 uffd_test_pass();
448 out:
449 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size))
450 err("unregister failed");
451 close(pagemap_fd);
452 }
453
uffd_wp_fork_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)454 static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
455 {
456 uffd_wp_fork_test_common(gopts, args, false);
457 }
458
uffd_wp_fork_with_event_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)459 static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
460 {
461 uffd_wp_fork_test_common(gopts, args, true);
462 }
463
uffd_wp_fork_pin_test_common(uffd_global_test_opts_t * gopts,uffd_test_args_t * args,bool with_event)464 static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts,
465 uffd_test_args_t *args,
466 bool with_event)
467 {
468 int pagemap_fd;
469 pin_args pin_args = {};
470
471 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false))
472 err("register failed");
473
474 pagemap_fd = pagemap_open();
475
476 /* Touch the page */
477 *gopts->area_dst = 1;
478 wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
479
480 /*
481 * 1. First pin, then fork(). This tests fork() special path when
482 * doing early CoW if the page is private.
483 */
484 if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) {
485 uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
486 "or unprivileged");
487 close(pagemap_fd);
488 uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size);
489 return;
490 }
491
492 if (pagemap_test_fork(gopts, with_event, false)) {
493 uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
494 with_event ? "missing" : "stall");
495 unpin_pages(&pin_args);
496 goto out;
497 }
498
499 unpin_pages(&pin_args);
500
501 /*
502 * 2. First fork(), then pin (in the child, where test_pin==true).
503 * This tests COR, aka, page unsharing on private memories.
504 */
505 if (pagemap_test_fork(gopts, with_event, true)) {
506 uffd_test_fail("Detected %s uffd-wp bit when RO pin",
507 with_event ? "missing" : "stall");
508 goto out;
509 }
510 uffd_test_pass();
511 out:
512 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
513 err("register failed");
514 close(pagemap_fd);
515 }
516
uffd_wp_fork_pin_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)517 static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
518 {
519 uffd_wp_fork_pin_test_common(gopts, args, false);
520 }
521
uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)522 static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
523 {
524 uffd_wp_fork_pin_test_common(gopts, args, true);
525 }
526
check_memory_contents(uffd_global_test_opts_t * gopts,char * p)527 static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p)
528 {
529 unsigned long i, j;
530 uint8_t expected_byte;
531
532 for (i = 0; i < gopts->nr_pages; ++i) {
533 expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
534 for (j = 0; j < gopts->page_size; j++) {
535 uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j);
536 if (v != expected_byte)
537 err("unexpected page contents");
538 }
539 }
540 }
541
uffd_minor_test_common(uffd_global_test_opts_t * gopts,bool test_collapse,bool test_wp)542 static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp)
543 {
544 unsigned long p;
545 pthread_t uffd_mon;
546 char c;
547 struct uffd_args args = { 0 };
548 args.gopts = gopts;
549
550 /*
551 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
552 * both do not make much sense.
553 */
554 assert(!(test_collapse && test_wp));
555
556 if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
557 /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
558 false, test_wp, true))
559 err("register failure");
560
561 /*
562 * After registering with UFFD, populate the non-UFFD-registered side of
563 * the shared mapping. This should *not* trigger any UFFD minor faults.
564 */
565 for (p = 0; p < gopts->nr_pages; ++p)
566 memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1),
567 gopts->page_size);
568
569 args.apply_wp = test_wp;
570 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
571 err("uffd_poll_thread create");
572
573 /*
574 * Read each of the pages back using the UFFD-registered mapping. We
575 * expect that the first time we touch a page, it will result in a minor
576 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
577 * page's contents, and then issuing a CONTINUE ioctl.
578 */
579 check_memory_contents(gopts, gopts->area_dst_alias);
580
581 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
582 err("pipe write");
583 if (pthread_join(uffd_mon, NULL))
584 err("join() failed");
585
586 if (test_collapse) {
587 if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
588 MADV_COLLAPSE)) {
589 /* It's fine to fail for this one... */
590 uffd_test_skip("MADV_COLLAPSE failed");
591 return;
592 }
593
594 uffd_test_ops->check_pmd_mapping(gopts,
595 gopts->area_dst,
596 gopts->nr_pages * gopts->page_size /
597 read_pmd_pagesize());
598 /*
599 * This won't cause uffd-fault - it purely just makes sure there
600 * was no corruption.
601 */
602 check_memory_contents(gopts, gopts->area_dst_alias);
603 }
604
605 if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages)
606 uffd_test_fail("stats check error");
607 else
608 uffd_test_pass();
609 }
610
uffd_minor_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)611 void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
612 {
613 uffd_minor_test_common(gopts, false, false);
614 }
615
uffd_minor_wp_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)616 void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
617 {
618 uffd_minor_test_common(gopts, false, true);
619 }
620
uffd_minor_collapse_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)621 void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
622 {
623 uffd_minor_test_common(gopts, true, false);
624 }
625
626 static sigjmp_buf jbuf, *sigbuf;
627
sighndl(int sig,siginfo_t * siginfo,void * ptr)628 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
629 {
630 if (sig == SIGBUS) {
631 if (sigbuf)
632 siglongjmp(*sigbuf, 1);
633 abort();
634 }
635 }
636
637 /*
638 * For non-cooperative userfaultfd test we fork() a process that will
639 * generate pagefaults, will mremap the area monitored by the
640 * userfaultfd and at last this process will release the monitored
641 * area.
642 * For the anonymous and shared memory the area is divided into two
643 * parts, the first part is accessed before mremap, and the second
644 * part is accessed after mremap. Since hugetlbfs does not support
645 * mremap, the entire monitored area is accessed in a single pass for
646 * HUGETLB_TEST.
647 * The release of the pages currently generates event for shmem and
648 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
649 * for hugetlb.
650 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
651 * monitored area, generate pagefaults and test that signal is delivered.
652 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
653 * test robustness use case - we release monitored area, fork a process
654 * that will generate pagefaults and verify signal is generated.
655 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
656 * feature. Using monitor thread, verify no userfault events are generated.
657 */
faulting_process(uffd_global_test_opts_t * gopts,int signal_test,bool wp)658 static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp)
659 {
660 unsigned long nr, i;
661 unsigned long long count;
662 unsigned long split_nr_pages;
663 unsigned long lastnr;
664 struct sigaction act;
665 volatile unsigned long signalled = 0;
666
667 split_nr_pages = (gopts->nr_pages + 1) / 2;
668
669 if (signal_test) {
670 sigbuf = &jbuf;
671 memset(&act, 0, sizeof(act));
672 act.sa_sigaction = sighndl;
673 act.sa_flags = SA_SIGINFO;
674 if (sigaction(SIGBUS, &act, 0))
675 err("sigaction");
676 lastnr = (unsigned long)-1;
677 }
678
679 for (nr = 0; nr < split_nr_pages; nr++) {
680 volatile int steps = 1;
681 unsigned long offset = nr * gopts->page_size;
682
683 if (signal_test) {
684 if (sigsetjmp(*sigbuf, 1) != 0) {
685 if (steps == 1 && nr == lastnr)
686 err("Signal repeated");
687
688 lastnr = nr;
689 if (signal_test == 1) {
690 if (steps == 1) {
691 /* This is a MISSING request */
692 steps++;
693 if (copy_page(gopts, offset, wp))
694 signalled++;
695 } else {
696 /* This is a WP request */
697 assert(steps == 2);
698 wp_range(gopts->uffd,
699 (__u64)gopts->area_dst +
700 offset,
701 gopts->page_size, false);
702 }
703 } else {
704 signalled++;
705 continue;
706 }
707 }
708 }
709
710 count = *area_count(gopts->area_dst, nr, gopts);
711 if (count != gopts->count_verify[nr])
712 err("nr %lu memory corruption %llu %llu\n",
713 nr, count, gopts->count_verify[nr]);
714 /*
715 * Trigger write protection if there is by writing
716 * the same value back.
717 */
718 *area_count(gopts->area_dst, nr, gopts) = count;
719 }
720
721 if (signal_test)
722 return signalled != split_nr_pages;
723
724 gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size,
725 gopts->nr_pages * gopts->page_size,
726 MREMAP_MAYMOVE | MREMAP_FIXED,
727 gopts->area_src);
728 if (gopts->area_dst == MAP_FAILED)
729 err("mremap");
730 /* Reset area_src since we just clobbered it */
731 gopts->area_src = NULL;
732
733 for (; nr < gopts->nr_pages; nr++) {
734 count = *area_count(gopts->area_dst, nr, gopts);
735 if (count != gopts->count_verify[nr]) {
736 err("nr %lu memory corruption %llu %llu\n",
737 nr, count, gopts->count_verify[nr]);
738 }
739 /*
740 * Trigger write protection if there is by writing
741 * the same value back.
742 */
743 *area_count(gopts->area_dst, nr, gopts) = count;
744 }
745
746 uffd_test_ops->release_pages(gopts, gopts->area_dst);
747
748 for (nr = 0; nr < gopts->nr_pages; nr++)
749 for (i = 0; i < gopts->page_size; i++)
750 if (*(gopts->area_dst + nr * gopts->page_size + i) != 0)
751 err("page %lu offset %lu is not zero", nr, i);
752
753 return 0;
754 }
755
uffd_sigbus_test_common(uffd_global_test_opts_t * gopts,bool wp)756 static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
757 {
758 unsigned long userfaults;
759 pthread_t uffd_mon;
760 pid_t pid;
761 int err;
762 char c;
763 struct uffd_args args = { 0 };
764 args.gopts = gopts;
765
766 gopts->ready_for_fork = false;
767
768 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
769
770 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
771 true, wp, false))
772 err("register failure");
773
774 if (faulting_process(gopts, 1, wp))
775 err("faulting process failed");
776
777 uffd_test_ops->release_pages(gopts, gopts->area_dst);
778
779 args.apply_wp = wp;
780 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
781 err("uffd_poll_thread create");
782
783 while (!gopts->ready_for_fork)
784 ; /* Wait for the poll_thread to start executing before forking */
785
786 pid = fork();
787 if (pid < 0)
788 err("fork");
789
790 if (!pid)
791 exit(faulting_process(gopts, 2, wp));
792
793 waitpid(pid, &err, 0);
794 if (err)
795 err("faulting process failed");
796 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
797 err("pipe write");
798 if (pthread_join(uffd_mon, (void **)&userfaults))
799 err("pthread_join()");
800
801 if (userfaults)
802 uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
803 else
804 uffd_test_pass();
805 }
806
uffd_sigbus_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)807 static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
808 {
809 uffd_sigbus_test_common(gopts, false);
810 }
811
uffd_sigbus_wp_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)812 static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
813 {
814 uffd_sigbus_test_common(gopts, true);
815 }
816
uffd_events_test_common(uffd_global_test_opts_t * gopts,bool wp)817 static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
818 {
819 pthread_t uffd_mon;
820 pid_t pid;
821 int err;
822 char c;
823 struct uffd_args args = { 0 };
824 args.gopts = gopts;
825
826 gopts->ready_for_fork = false;
827
828 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
829 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
830 true, wp, false))
831 err("register failure");
832
833 args.apply_wp = wp;
834 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
835 err("uffd_poll_thread create");
836
837 while (!gopts->ready_for_fork)
838 ; /* Wait for the poll_thread to start executing before forking */
839
840 pid = fork();
841 if (pid < 0)
842 err("fork");
843
844 if (!pid)
845 exit(faulting_process(gopts, 0, wp));
846
847 waitpid(pid, &err, 0);
848 if (err)
849 err("faulting process failed");
850 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
851 err("pipe write");
852 if (pthread_join(uffd_mon, NULL))
853 err("pthread_join()");
854
855 if (args.missing_faults != gopts->nr_pages)
856 uffd_test_fail("Fault counts wrong");
857 else
858 uffd_test_pass();
859 }
860
uffd_events_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)861 static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
862 {
863 uffd_events_test_common(gopts, false);
864 }
865
uffd_events_wp_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)866 static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
867 {
868 uffd_events_test_common(gopts, true);
869 }
870
retry_uffdio_zeropage(uffd_global_test_opts_t * gopts,struct uffdio_zeropage * uffdio_zeropage)871 static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts,
872 struct uffdio_zeropage *uffdio_zeropage)
873 {
874 uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start,
875 uffdio_zeropage->range.len,
876 0);
877 if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
878 if (uffdio_zeropage->zeropage != -EEXIST)
879 err("UFFDIO_ZEROPAGE error: %"PRId64,
880 (int64_t)uffdio_zeropage->zeropage);
881 } else {
882 err("UFFDIO_ZEROPAGE error: %"PRId64,
883 (int64_t)uffdio_zeropage->zeropage);
884 }
885 }
886
do_uffdio_zeropage(uffd_global_test_opts_t * gopts,bool has_zeropage)887 static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage)
888 {
889 struct uffdio_zeropage uffdio_zeropage = { 0 };
890 int ret;
891 __s64 res;
892
893 uffdio_zeropage.range.start = (unsigned long) gopts->area_dst;
894 uffdio_zeropage.range.len = gopts->page_size;
895 uffdio_zeropage.mode = 0;
896 ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
897 res = uffdio_zeropage.zeropage;
898 if (ret) {
899 /* real retval in ufdio_zeropage.zeropage */
900 if (has_zeropage)
901 err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
902 else if (res != -EINVAL)
903 err("UFFDIO_ZEROPAGE not -EINVAL");
904 } else if (has_zeropage) {
905 if (res != gopts->page_size)
906 err("UFFDIO_ZEROPAGE unexpected size");
907 else
908 retry_uffdio_zeropage(gopts, &uffdio_zeropage);
909 return true;
910 } else
911 err("UFFDIO_ZEROPAGE succeeded");
912
913 return false;
914 }
915
916 /*
917 * Registers a range with MISSING mode only for zeropage test. Return true
918 * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
919 * because we want to detect .ioctls along the way.
920 */
921 static bool
uffd_register_detect_zeropage(int uffd,void * addr,uint64_t len)922 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
923 {
924 uint64_t ioctls = 0;
925
926 if (uffd_register_with_ioctls(uffd, addr, len, true,
927 false, false, &ioctls))
928 err("zeropage register fail");
929
930 return ioctls & (1 << _UFFDIO_ZEROPAGE);
931 }
932
933 /* exercise UFFDIO_ZEROPAGE */
uffd_zeropage_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)934 static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
935 {
936 bool has_zeropage;
937 int i;
938
939 has_zeropage = uffd_register_detect_zeropage(gopts->uffd,
940 gopts->area_dst,
941 gopts->page_size);
942 if (gopts->area_dst_alias)
943 /* Ignore the retval; we already have it */
944 uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size);
945
946 if (do_uffdio_zeropage(gopts, has_zeropage))
947 for (i = 0; i < gopts->page_size; i++)
948 if (gopts->area_dst[i] != 0)
949 err("data non-zero at offset %d\n", i);
950
951 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
952 err("unregister");
953
954 if (gopts->area_dst_alias && uffd_unregister(gopts->uffd,
955 gopts->area_dst_alias,
956 gopts->page_size))
957 err("unregister");
958
959 uffd_test_pass();
960 }
961
uffd_register_poison(int uffd,void * addr,uint64_t len)962 static void uffd_register_poison(int uffd, void *addr, uint64_t len)
963 {
964 uint64_t ioctls = 0;
965 uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
966
967 if (uffd_register_with_ioctls(uffd, addr, len, true,
968 false, false, &ioctls))
969 err("poison register fail");
970
971 if ((ioctls & expected) != expected)
972 err("registered area doesn't support COPY and POISON ioctls");
973 }
974
do_uffdio_poison(uffd_global_test_opts_t * gopts,unsigned long offset)975 static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset)
976 {
977 struct uffdio_poison uffdio_poison = { 0 };
978 int ret;
979 __s64 res;
980
981 uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset;
982 uffdio_poison.range.len = gopts->page_size;
983 uffdio_poison.mode = 0;
984 ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison);
985 res = uffdio_poison.updated;
986
987 if (ret)
988 err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
989 else if (res != gopts->page_size)
990 err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
991 }
992
uffd_poison_handle_fault(uffd_global_test_opts_t * gopts,struct uffd_msg * msg,struct uffd_args * args)993 static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts,
994 struct uffd_msg *msg,
995 struct uffd_args *args)
996 {
997 unsigned long offset;
998
999 if (msg->event != UFFD_EVENT_PAGEFAULT)
1000 err("unexpected msg event %u", msg->event);
1001
1002 if (msg->arg.pagefault.flags &
1003 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
1004 err("unexpected fault type %llu", msg->arg.pagefault.flags);
1005
1006 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
1007 offset &= ~(gopts->page_size-1);
1008
1009 /* Odd pages -> copy zeroed page; even pages -> poison. */
1010 if (offset & gopts->page_size)
1011 copy_page(gopts, offset, false);
1012 else
1013 do_uffdio_poison(gopts, offset);
1014 }
1015
1016 /* Make sure to cover odd/even, and minimum duplications */
1017 #define UFFD_POISON_TEST_NPAGES 4
1018
uffd_poison_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs)1019 static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1020 {
1021 pthread_t uffd_mon;
1022 char c;
1023 struct uffd_args args = { 0 };
1024 struct sigaction act = { 0 };
1025 unsigned long nr_sigbus = 0;
1026 unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES;
1027
1028 if (gopts->nr_pages < poison_pages) {
1029 uffd_test_skip("Too less pages for POISON test");
1030 return;
1031 }
1032
1033 args.gopts = gopts;
1034
1035 fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
1036
1037 uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size);
1038 memset(gopts->area_src, 0, poison_pages * gopts->page_size);
1039
1040 args.handle_fault = uffd_poison_handle_fault;
1041 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1042 err("uffd_poll_thread create");
1043
1044 sigbuf = &jbuf;
1045 act.sa_sigaction = sighndl;
1046 act.sa_flags = SA_SIGINFO;
1047 if (sigaction(SIGBUS, &act, 0))
1048 err("sigaction");
1049
1050 for (nr = 0; nr < poison_pages; ++nr) {
1051 unsigned long offset = nr * gopts->page_size;
1052 const char *bytes = (const char *) gopts->area_dst + offset;
1053 const char *i;
1054
1055 if (sigsetjmp(*sigbuf, 1)) {
1056 /*
1057 * Access below triggered a SIGBUS, which was caught by
1058 * sighndl, which then jumped here. Count this SIGBUS,
1059 * and move on to next page.
1060 */
1061 ++nr_sigbus;
1062 continue;
1063 }
1064
1065 for (i = bytes; i < bytes + gopts->page_size; ++i) {
1066 if (*i)
1067 err("nonzero byte in area_dst (%p) at %p: %u",
1068 gopts->area_dst, i, *i);
1069 }
1070 }
1071
1072 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
1073 err("pipe write");
1074 if (pthread_join(uffd_mon, NULL))
1075 err("pthread_join()");
1076
1077 if (nr_sigbus != poison_pages / 2)
1078 err("expected to receive %lu SIGBUS, actually received %lu",
1079 poison_pages / 2, nr_sigbus);
1080
1081 uffd_test_pass();
1082 }
1083
1084 static void
uffd_move_handle_fault_common(uffd_global_test_opts_t * gopts,struct uffd_msg * msg,struct uffd_args * args,unsigned long len)1085 uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts,
1086 struct uffd_msg *msg,
1087 struct uffd_args *args,
1088 unsigned long len)
1089 {
1090 unsigned long offset;
1091
1092 if (msg->event != UFFD_EVENT_PAGEFAULT)
1093 err("unexpected msg event %u", msg->event);
1094
1095 if (msg->arg.pagefault.flags &
1096 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
1097 err("unexpected fault type %llu", msg->arg.pagefault.flags);
1098
1099 offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
1100 offset &= ~(len-1);
1101
1102 if (move_page(gopts, offset, len))
1103 args->missing_faults++;
1104 }
1105
uffd_move_handle_fault(uffd_global_test_opts_t * gopts,struct uffd_msg * msg,struct uffd_args * args)1106 static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
1107 struct uffd_args *args)
1108 {
1109 uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size);
1110 }
1111
uffd_move_pmd_handle_fault(uffd_global_test_opts_t * gopts,struct uffd_msg * msg,struct uffd_args * args)1112 static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
1113 struct uffd_args *args)
1114 {
1115 uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize());
1116 }
1117
1118 static void
uffd_move_test_common(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs,unsigned long chunk_size,void (* handle_fault)(struct uffd_global_test_opts * gopts,struct uffd_msg * msg,struct uffd_args * args))1119 uffd_move_test_common(uffd_global_test_opts_t *gopts,
1120 uffd_test_args_t *targs,
1121 unsigned long chunk_size,
1122 void (*handle_fault)(struct uffd_global_test_opts *gopts,
1123 struct uffd_msg *msg, struct uffd_args *args)
1124 )
1125 {
1126 unsigned long nr;
1127 pthread_t uffd_mon;
1128 char c;
1129 unsigned long long count;
1130 struct uffd_args args = { 0 };
1131 char *orig_area_src = NULL, *orig_area_dst = NULL;
1132 unsigned long step_size, step_count;
1133 unsigned long src_offs = 0;
1134 unsigned long dst_offs = 0;
1135
1136 args.gopts = gopts;
1137
1138 /* Prevent source pages from being mapped more than once */
1139 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK))
1140 err("madvise(MADV_DONTFORK) failure");
1141
1142 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
1143 true, false, false))
1144 err("register failure");
1145
1146 args.handle_fault = handle_fault;
1147 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1148 err("uffd_poll_thread create");
1149
1150 step_size = chunk_size / gopts->page_size;
1151 step_count = gopts->nr_pages / step_size;
1152
1153 if (chunk_size > gopts->page_size) {
1154 char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size);
1155 char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size);
1156
1157 if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) {
1158 src_offs = (aligned_src - gopts->area_src) / gopts->page_size;
1159 dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size;
1160 step_count--;
1161 }
1162 orig_area_src = gopts->area_src;
1163 orig_area_dst = gopts->area_dst;
1164 gopts->area_src = aligned_src;
1165 gopts->area_dst = aligned_dst;
1166 }
1167
1168 /*
1169 * Read each of the pages back using the UFFD-registered mapping. We
1170 * expect that the first time we touch a page, it will result in a missing
1171 * fault. uffd_poll_thread will resolve the fault by moving source
1172 * page to destination.
1173 */
1174 for (nr = 0; nr < step_count * step_size; nr += step_size) {
1175 unsigned long i;
1176
1177 /* Check area_src content */
1178 for (i = 0; i < step_size; i++) {
1179 count = *area_count(gopts->area_src, nr + i, gopts);
1180 if (count != gopts->count_verify[src_offs + nr + i])
1181 err("nr %lu source memory invalid %llu %llu\n",
1182 nr + i, count, gopts->count_verify[src_offs + nr + i]);
1183 }
1184
1185 /* Faulting into area_dst should move the page or the huge page */
1186 for (i = 0; i < step_size; i++) {
1187 count = *area_count(gopts->area_dst, nr + i, gopts);
1188 if (count != gopts->count_verify[dst_offs + nr + i])
1189 err("nr %lu memory corruption %llu %llu\n",
1190 nr, count, gopts->count_verify[dst_offs + nr + i]);
1191 }
1192
1193 /* Re-check area_src content which should be empty */
1194 for (i = 0; i < step_size; i++) {
1195 count = *area_count(gopts->area_src, nr + i, gopts);
1196 if (count != 0)
1197 err("nr %lu move failed %llu %llu\n",
1198 nr, count, gopts->count_verify[src_offs + nr + i]);
1199 }
1200 }
1201 if (chunk_size > gopts->page_size) {
1202 gopts->area_src = orig_area_src;
1203 gopts->area_dst = orig_area_dst;
1204 }
1205
1206 if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
1207 err("pipe write");
1208 if (pthread_join(uffd_mon, NULL))
1209 err("join() failed");
1210
1211 if (args.missing_faults != step_count || args.minor_faults != 0)
1212 uffd_test_fail("stats check error");
1213 else
1214 uffd_test_pass();
1215 }
1216
uffd_move_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs)1217 static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1218 {
1219 uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault);
1220 }
1221
uffd_move_pmd_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs)1222 static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1223 {
1224 if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE))
1225 err("madvise(MADV_HUGEPAGE) failure");
1226 uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
1227 uffd_move_pmd_handle_fault);
1228 }
1229
uffd_move_pmd_split_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs)1230 static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1231 {
1232 if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE))
1233 err("madvise(MADV_NOHUGEPAGE) failure");
1234 uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
1235 uffd_move_pmd_handle_fault);
1236 }
1237
1238 static bool
uffdio_verify_results(const char * name,int ret,int error,long result)1239 uffdio_verify_results(const char *name, int ret, int error, long result)
1240 {
1241 /*
1242 * Should always return -1 with errno=EAGAIN, with corresponding
1243 * result field updated in ioctl() args to be -EAGAIN too
1244 * (e.g. copy.copy field for UFFDIO_COPY).
1245 */
1246 if (ret != -1) {
1247 uffd_test_fail("%s should have returned -1", name);
1248 return false;
1249 }
1250
1251 if (error != EAGAIN) {
1252 uffd_test_fail("%s should have errno==EAGAIN", name);
1253 return false;
1254 }
1255
1256 if (result != -EAGAIN) {
1257 uffd_test_fail("%s should have been updated for -EAGAIN",
1258 name);
1259 return false;
1260 }
1261
1262 return true;
1263 }
1264
1265 /*
1266 * This defines a function to test one ioctl. Note that here "field" can
1267 * be 1 or anything not -EAGAIN. With that initial value set, we can
1268 * verify later that it should be updated by kernel (when -EAGAIN
1269 * returned), by checking whether it is also updated to -EAGAIN.
1270 */
1271 #define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \
1272 static bool uffdio_mmap_changing_test_##name(int fd) \
1273 { \
1274 int ret; \
1275 struct uffdio_##name args = { \
1276 .field = 1, \
1277 }; \
1278 ret = ioctl(fd, ioctl_name, &args); \
1279 return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
1280 }
1281
1282 DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
1283 DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
1284 DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
1285 DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
1286 DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
1287
1288 typedef enum {
1289 /* We actually do not care about any state except UNINTERRUPTIBLE.. */
1290 THR_STATE_UNKNOWN = 0,
1291 THR_STATE_UNINTERRUPTIBLE,
1292 } thread_state;
1293
1294 typedef struct {
1295 uffd_global_test_opts_t *gopts;
1296 volatile pid_t *pid;
1297 } mmap_changing_thread_args;
1298
sleep_short(void)1299 static void sleep_short(void)
1300 {
1301 usleep(1000);
1302 }
1303
thread_state_get(pid_t tid)1304 static thread_state thread_state_get(pid_t tid)
1305 {
1306 const char *header = "State:\t";
1307 char tmp[256], *p, c;
1308 FILE *fp;
1309
1310 snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
1311 fp = fopen(tmp, "r");
1312
1313 if (!fp)
1314 return THR_STATE_UNKNOWN;
1315
1316 while (fgets(tmp, sizeof(tmp), fp)) {
1317 p = strstr(tmp, header);
1318 if (p) {
1319 /* For example, "State:\tD (disk sleep)" */
1320 c = *(p + sizeof(header) - 1);
1321 return c == 'D' ?
1322 THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
1323 }
1324 }
1325
1326 return THR_STATE_UNKNOWN;
1327 }
1328
thread_state_until(pid_t tid,thread_state state)1329 static void thread_state_until(pid_t tid, thread_state state)
1330 {
1331 thread_state s;
1332
1333 do {
1334 s = thread_state_get(tid);
1335 sleep_short();
1336 } while (s != state);
1337 }
1338
uffd_mmap_changing_thread(void * opaque)1339 static void *uffd_mmap_changing_thread(void *opaque)
1340 {
1341 mmap_changing_thread_args *args = opaque;
1342 uffd_global_test_opts_t *gopts = args->gopts;
1343 volatile pid_t *pid = args->pid;
1344 int ret;
1345
1346 /* Unfortunately, it's only fetch-able from the thread itself.. */
1347 assert(*pid == 0);
1348 *pid = syscall(SYS_gettid);
1349
1350 /* Inject an event, this will hang solid until the event read */
1351 ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE);
1352 if (ret)
1353 err("madvise(MADV_REMOVE) failed");
1354
1355 return NULL;
1356 }
1357
uffd_consume_message(uffd_global_test_opts_t * gopts)1358 static void uffd_consume_message(uffd_global_test_opts_t *gopts)
1359 {
1360 struct uffd_msg msg = { 0 };
1361
1362 while (uffd_read_msg(gopts, &msg));
1363 }
1364
uffd_mmap_changing_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * targs)1365 static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
1366 {
1367 /*
1368 * This stores the real PID (which can be different from how tid is
1369 * defined..) for the child thread, 0 means not initialized.
1370 */
1371 pid_t pid = 0;
1372 pthread_t tid;
1373 int ret;
1374 mmap_changing_thread_args args = { gopts, &pid };
1375
1376 if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
1377 true, false, false))
1378 err("uffd_register() failed");
1379
1380 /* Create a thread to generate the racy event */
1381 ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args);
1382 if (ret)
1383 err("pthread_create() failed");
1384
1385 /*
1386 * Wait until the thread setup the pid. Use volatile to make sure
1387 * it reads from RAM not regs.
1388 */
1389 while (!(volatile pid_t)pid)
1390 sleep_short();
1391
1392 /* Wait until the thread hangs at REMOVE event */
1393 thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
1394
1395 if (!uffdio_mmap_changing_test_copy(gopts->uffd))
1396 return;
1397
1398 if (!uffdio_mmap_changing_test_zeropage(gopts->uffd))
1399 return;
1400
1401 if (!uffdio_mmap_changing_test_move(gopts->uffd))
1402 return;
1403
1404 if (!uffdio_mmap_changing_test_poison(gopts->uffd))
1405 return;
1406
1407 if (!uffdio_mmap_changing_test_continue(gopts->uffd))
1408 return;
1409
1410 /*
1411 * All succeeded above! Recycle everything. Start by reading the
1412 * event so as to kick the thread roll again..
1413 */
1414 uffd_consume_message(gopts);
1415
1416 ret = pthread_join(tid, NULL);
1417 assert(ret == 0);
1418
1419 uffd_test_pass();
1420 }
1421
prevent_hugepages(uffd_global_test_opts_t * gopts,const char ** errmsg)1422 static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
1423 {
1424 /* This should be done before source area is populated */
1425 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) {
1426 /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
1427 if (errno != EINVAL) {
1428 if (errmsg)
1429 *errmsg = "madvise(MADV_NOHUGEPAGE) failed";
1430 return -errno;
1431 }
1432 }
1433 return 0;
1434 }
1435
request_hugepages(uffd_global_test_opts_t * gopts,const char ** errmsg)1436 static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
1437 {
1438 /* This should be done before source area is populated */
1439 if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) {
1440 if (errmsg) {
1441 *errmsg = (errno == EINVAL) ?
1442 "CONFIG_TRANSPARENT_HUGEPAGE is not set" :
1443 "madvise(MADV_HUGEPAGE) failed";
1444 }
1445 return -errno;
1446 }
1447 return 0;
1448 }
1449
1450 struct uffd_test_case_ops uffd_move_test_case_ops = {
1451 .post_alloc = prevent_hugepages,
1452 };
1453
1454 struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
1455 .post_alloc = request_hugepages,
1456 };
1457
1458 /*
1459 * Test the returned uffdio_register.ioctls with different register modes.
1460 * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
1461 */
1462 static void
do_register_ioctls_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args,bool miss,bool wp,bool minor)1463 do_register_ioctls_test(uffd_global_test_opts_t *gopts,
1464 uffd_test_args_t *args,
1465 bool miss,
1466 bool wp,
1467 bool minor)
1468 {
1469 uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
1470 mem_type_t *mem_type = args->mem_type;
1471 int ret;
1472
1473 ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size,
1474 miss, wp, minor, &ioctls);
1475
1476 /*
1477 * Handle special cases of UFFDIO_REGISTER here where it should
1478 * just fail with -EINVAL first..
1479 *
1480 * Case 1: register MINOR on anon
1481 * Case 2: register with no mode selected
1482 */
1483 if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
1484 (!miss && !wp && !minor)) {
1485 if (ret != -EINVAL)
1486 err("register (miss=%d, wp=%d, minor=%d) failed "
1487 "with wrong errno=%d", miss, wp, minor, ret);
1488 return;
1489 }
1490
1491 /* UFFDIO_REGISTER should succeed, then check ioctls returned */
1492 if (miss)
1493 expected |= BIT_ULL(_UFFDIO_COPY);
1494 if (wp)
1495 expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
1496 if (minor)
1497 expected |= BIT_ULL(_UFFDIO_CONTINUE);
1498
1499 if ((ioctls & expected) != expected)
1500 err("unexpected uffdio_register.ioctls "
1501 "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
1502 "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
1503
1504 if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
1505 err("unregister");
1506 }
1507
uffd_register_ioctls_test(uffd_global_test_opts_t * gopts,uffd_test_args_t * args)1508 static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
1509 {
1510 int miss, wp, minor;
1511
1512 for (miss = 0; miss <= 1; miss++)
1513 for (wp = 0; wp <= 1; wp++)
1514 for (minor = 0; minor <= 1; minor++)
1515 do_register_ioctls_test(gopts, args, miss, wp, minor);
1516
1517 uffd_test_pass();
1518 }
1519
1520 uffd_test_case_t uffd_tests[] = {
1521 {
1522 /* Test returned uffdio_register.ioctls. */
1523 .name = "register-ioctls",
1524 .uffd_fn = uffd_register_ioctls_test,
1525 .mem_targets = MEM_ALL,
1526 .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
1527 UFFD_FEATURE_MISSING_SHMEM |
1528 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1529 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1530 UFFD_FEATURE_MINOR_HUGETLBFS |
1531 UFFD_FEATURE_MINOR_SHMEM,
1532 },
1533 {
1534 .name = "zeropage",
1535 .uffd_fn = uffd_zeropage_test,
1536 .mem_targets = MEM_ALL,
1537 .uffd_feature_required = 0,
1538 },
1539 {
1540 .name = "move",
1541 .uffd_fn = uffd_move_test,
1542 .mem_targets = MEM_ANON,
1543 .uffd_feature_required = UFFD_FEATURE_MOVE,
1544 .test_case_ops = &uffd_move_test_case_ops,
1545 },
1546 {
1547 .name = "move-pmd",
1548 .uffd_fn = uffd_move_pmd_test,
1549 .mem_targets = MEM_ANON,
1550 .uffd_feature_required = UFFD_FEATURE_MOVE,
1551 .test_case_ops = &uffd_move_test_pmd_case_ops,
1552 },
1553 {
1554 .name = "move-pmd-split",
1555 .uffd_fn = uffd_move_pmd_split_test,
1556 .mem_targets = MEM_ANON,
1557 .uffd_feature_required = UFFD_FEATURE_MOVE,
1558 .test_case_ops = &uffd_move_test_pmd_case_ops,
1559 },
1560 {
1561 .name = "wp-fork",
1562 .uffd_fn = uffd_wp_fork_test,
1563 .mem_targets = MEM_ALL,
1564 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1565 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1566 },
1567 {
1568 .name = "wp-fork-with-event",
1569 .uffd_fn = uffd_wp_fork_with_event_test,
1570 .mem_targets = MEM_ALL,
1571 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1572 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1573 /* when set, child process should inherit uffd-wp bits */
1574 UFFD_FEATURE_EVENT_FORK,
1575 },
1576 {
1577 .name = "wp-fork-pin",
1578 .uffd_fn = uffd_wp_fork_pin_test,
1579 .mem_targets = MEM_ALL,
1580 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1581 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1582 },
1583 {
1584 .name = "wp-fork-pin-with-event",
1585 .uffd_fn = uffd_wp_fork_pin_with_event_test,
1586 .mem_targets = MEM_ALL,
1587 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1588 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1589 /* when set, child process should inherit uffd-wp bits */
1590 UFFD_FEATURE_EVENT_FORK,
1591 },
1592 {
1593 .name = "wp-unpopulated",
1594 .uffd_fn = uffd_wp_unpopulated_test,
1595 .mem_targets = MEM_ANON,
1596 .uffd_feature_required =
1597 UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
1598 },
1599 {
1600 .name = "minor",
1601 .uffd_fn = uffd_minor_test,
1602 .mem_targets = MEM_SHMEM | MEM_HUGETLB,
1603 .uffd_feature_required =
1604 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
1605 },
1606 {
1607 .name = "minor-wp",
1608 .uffd_fn = uffd_minor_wp_test,
1609 .mem_targets = MEM_SHMEM | MEM_HUGETLB,
1610 .uffd_feature_required =
1611 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
1612 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1613 /*
1614 * HACK: here we leveraged WP_UNPOPULATED to detect whether
1615 * minor mode supports wr-protect. There's no feature flag
1616 * for it so this is the best we can test against.
1617 */
1618 UFFD_FEATURE_WP_UNPOPULATED,
1619 },
1620 {
1621 .name = "minor-collapse",
1622 .uffd_fn = uffd_minor_collapse_test,
1623 /* MADV_COLLAPSE only works with shmem */
1624 .mem_targets = MEM_SHMEM,
1625 /* We can't test MADV_COLLAPSE, so try our luck */
1626 .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
1627 },
1628 {
1629 .name = "sigbus",
1630 .uffd_fn = uffd_sigbus_test,
1631 .mem_targets = MEM_ALL,
1632 .uffd_feature_required = UFFD_FEATURE_SIGBUS |
1633 UFFD_FEATURE_EVENT_FORK,
1634 },
1635 {
1636 .name = "sigbus-wp",
1637 .uffd_fn = uffd_sigbus_wp_test,
1638 .mem_targets = MEM_ALL,
1639 .uffd_feature_required = UFFD_FEATURE_SIGBUS |
1640 UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1641 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1642 },
1643 {
1644 .name = "events",
1645 .uffd_fn = uffd_events_test,
1646 .mem_targets = MEM_ALL,
1647 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1648 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
1649 },
1650 {
1651 .name = "events-wp",
1652 .uffd_fn = uffd_events_wp_test,
1653 .mem_targets = MEM_ALL,
1654 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1655 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
1656 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1657 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1658 },
1659 {
1660 .name = "poison",
1661 .uffd_fn = uffd_poison_test,
1662 .mem_targets = MEM_ALL,
1663 .uffd_feature_required = UFFD_FEATURE_POISON,
1664 },
1665 {
1666 .name = "mmap-changing",
1667 .uffd_fn = uffd_mmap_changing_test,
1668 /*
1669 * There's no point running this test over all mem types as
1670 * they share the same code paths.
1671 *
1672 * Choose shmem for simplicity, because (1) shmem supports
1673 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
1674 * almost always available (unlike hugetlb). Here we
1675 * abused SHMEM for UFFDIO_MOVE, but the test we want to
1676 * cover doesn't yet need the correct memory type..
1677 */
1678 .mem_targets = MEM_SHMEM,
1679 /*
1680 * Any UFFD_FEATURE_EVENT_* should work to trigger the
1681 * race logically, but choose the simplest (REMOVE).
1682 *
1683 * Meanwhile, since we'll cover quite a few new ioctl()s
1684 * (CONTINUE, POISON, MOVE), skip this test for old kernels
1685 * by choosing all of them.
1686 */
1687 .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
1688 UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
1689 UFFD_FEATURE_MINOR_SHMEM,
1690 },
1691 };
1692
usage(const char * prog)1693 static void usage(const char *prog)
1694 {
1695 printf("usage: %s [-f TESTNAME]\n", prog);
1696 puts("");
1697 puts(" -f: test name to filter (e.g., event)");
1698 puts(" -h: show the help msg");
1699 puts(" -l: list tests only");
1700 puts("");
1701 exit(KSFT_FAIL);
1702 }
1703
main(int argc,char * argv[])1704 int main(int argc, char *argv[])
1705 {
1706 int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
1707 int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
1708 const char *test_filter = NULL;
1709 bool list_only = false;
1710 uffd_test_case_t *test;
1711 mem_type_t *mem_type;
1712 uffd_test_args_t args;
1713 const char *errmsg;
1714 int has_uffd, opt;
1715 int i, j;
1716
1717 while ((opt = getopt(argc, argv, "f:hl")) != -1) {
1718 switch (opt) {
1719 case 'f':
1720 test_filter = optarg;
1721 break;
1722 case 'l':
1723 list_only = true;
1724 break;
1725 case 'h':
1726 default:
1727 /* Unknown */
1728 usage(argv[0]);
1729 break;
1730 }
1731 }
1732
1733 if (!test_filter && !list_only) {
1734 has_uffd = test_uffd_api(false);
1735 has_uffd |= test_uffd_api(true);
1736
1737 if (!has_uffd) {
1738 printf("Userfaultfd not supported or unprivileged, skip all tests\n");
1739 exit(KSFT_SKIP);
1740 }
1741 }
1742
1743 for (i = 0; i < n_tests; i++) {
1744 test = &uffd_tests[i];
1745 if (test_filter && !strstr(test->name, test_filter))
1746 continue;
1747 if (list_only) {
1748 printf("%s\n", test->name);
1749 continue;
1750 }
1751 for (j = 0; j < n_mems; j++) {
1752 mem_type = &mem_types[j];
1753
1754 /* Initialize global test options */
1755 uffd_global_test_opts_t gopts = { 0 };
1756
1757 gopts.map_shared = mem_type->shared;
1758 uffd_test_ops = mem_type->mem_ops;
1759 uffd_test_case_ops = test->test_case_ops;
1760
1761 if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
1762 gopts.page_size = default_huge_page_size();
1763 else
1764 gopts.page_size = psize();
1765
1766 /* Ensure we have at least 2 pages */
1767 gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
1768 / gopts.page_size;
1769
1770 gopts.nr_parallel = 1;
1771
1772 /* Initialize test arguments */
1773 args.mem_type = mem_type;
1774
1775 if (!(test->mem_targets & mem_type->mem_flag))
1776 continue;
1777
1778 uffd_test_start("%s on %s", test->name, mem_type->name);
1779 if ((mem_type->mem_flag == MEM_HUGETLB ||
1780 mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
1781 (default_huge_page_size() == 0)) {
1782 uffd_test_skip("huge page size is 0, feature missing?");
1783 continue;
1784 }
1785 if (!uffd_feature_supported(test)) {
1786 uffd_test_skip("feature missing");
1787 continue;
1788 }
1789 if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) {
1790 uffd_test_skip(errmsg);
1791 continue;
1792 }
1793 test->uffd_fn(&gopts, &args);
1794 uffd_test_ctx_clear(&gopts);
1795 }
1796 }
1797
1798 if (!list_only)
1799 uffd_test_report();
1800
1801 return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
1802 }
1803
1804 #else /* __NR_userfaultfd */
1805
1806 #warning "missing __NR_userfaultfd definition"
1807
main(void)1808 int main(void)
1809 {
1810 printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
1811 return KSFT_SKIP;
1812 }
1813
1814 #endif /* __NR_userfaultfd */
1815