1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * COW (Copy On Write) tests.
4 *
5 * Copyright 2022, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9 #define _GNU_SOURCE
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <assert.h>
18 #include <linux/mman.h>
19 #include <sys/mman.h>
20 #include <sys/ioctl.h>
21 #include <sys/wait.h>
22 #include <linux/memfd.h>
23
24 #include "local_config.h"
25 #ifdef LOCAL_CONFIG_HAVE_LIBURING
26 #include <liburing.h>
27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
28
29 #include "../../../../mm/gup_test.h"
30 #include "../kselftest.h"
31 #include "vm_util.h"
32 #include "thp_settings.h"
33
34 static size_t pagesize;
35 static int pagemap_fd;
36 static size_t pmdsize;
37 static int nr_thpsizes;
38 static size_t thpsizes[20];
39 static int nr_hugetlbsizes;
40 static size_t hugetlbsizes[10];
41 static int gup_fd;
42 static bool has_huge_zeropage;
43
detect_thp_sizes(size_t sizes[],int max)44 static int detect_thp_sizes(size_t sizes[], int max)
45 {
46 int count = 0;
47 unsigned long orders;
48 size_t kb;
49 int i;
50
51 /* thp not supported at all. */
52 if (!pmdsize)
53 return 0;
54
55 orders = 1UL << sz2ord(pmdsize, pagesize);
56 orders |= thp_supported_orders();
57
58 for (i = 0; orders && count < max; i++) {
59 if (!(orders & (1UL << i)))
60 continue;
61 orders &= ~(1UL << i);
62 kb = (pagesize >> 10) << i;
63 sizes[count++] = kb * 1024;
64 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
65 }
66
67 return count;
68 }
69
range_is_swapped(void * addr,size_t size)70 static bool range_is_swapped(void *addr, size_t size)
71 {
72 for (; size; addr += pagesize, size -= pagesize)
73 if (!pagemap_is_swapped(pagemap_fd, addr))
74 return false;
75 return true;
76 }
77
78 struct comm_pipes {
79 int child_ready[2];
80 int parent_ready[2];
81 };
82
setup_comm_pipes(struct comm_pipes * comm_pipes)83 static int setup_comm_pipes(struct comm_pipes *comm_pipes)
84 {
85 if (pipe(comm_pipes->child_ready) < 0) {
86 ksft_perror("pipe() failed");
87 return -errno;
88 }
89 if (pipe(comm_pipes->parent_ready) < 0) {
90 ksft_perror("pipe() failed");
91 close(comm_pipes->child_ready[0]);
92 close(comm_pipes->child_ready[1]);
93 return -errno;
94 }
95
96 return 0;
97 }
98
close_comm_pipes(struct comm_pipes * comm_pipes)99 static void close_comm_pipes(struct comm_pipes *comm_pipes)
100 {
101 close(comm_pipes->child_ready[0]);
102 close(comm_pipes->child_ready[1]);
103 close(comm_pipes->parent_ready[0]);
104 close(comm_pipes->parent_ready[1]);
105 }
106
child_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)107 static int child_memcmp_fn(char *mem, size_t size,
108 struct comm_pipes *comm_pipes)
109 {
110 char *old = malloc(size);
111 char buf;
112
113 /* Backup the original content. */
114 memcpy(old, mem, size);
115
116 /* Wait until the parent modified the page. */
117 write(comm_pipes->child_ready[1], "0", 1);
118 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
119 ;
120
121 /* See if we still read the old values. */
122 return memcmp(old, mem, size);
123 }
124
child_vmsplice_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)125 static int child_vmsplice_memcmp_fn(char *mem, size_t size,
126 struct comm_pipes *comm_pipes)
127 {
128 struct iovec iov = {
129 .iov_base = mem,
130 .iov_len = size,
131 };
132 ssize_t cur, total, transferred;
133 char *old, *new;
134 int fds[2];
135 char buf;
136
137 old = malloc(size);
138 new = malloc(size);
139
140 /* Backup the original content. */
141 memcpy(old, mem, size);
142
143 if (pipe(fds) < 0)
144 return -errno;
145
146 /* Trigger a read-only pin. */
147 transferred = vmsplice(fds[1], &iov, 1, 0);
148 if (transferred < 0)
149 return -errno;
150 if (transferred == 0)
151 return -EINVAL;
152
153 /* Unmap it from our page tables. */
154 if (munmap(mem, size) < 0)
155 return -errno;
156
157 /* Wait until the parent modified it. */
158 write(comm_pipes->child_ready[1], "0", 1);
159 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
160 ;
161
162 /* See if we still read the old values via the pipe. */
163 for (total = 0; total < transferred; total += cur) {
164 cur = read(fds[0], new + total, transferred - total);
165 if (cur < 0)
166 return -errno;
167 }
168
169 return memcmp(old, new, transferred);
170 }
171
172 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
173
do_test_cow_in_parent(char * mem,size_t size,bool do_mprotect,child_fn fn,bool xfail)174 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
175 child_fn fn, bool xfail)
176 {
177 struct comm_pipes comm_pipes;
178 char buf;
179 int ret;
180
181 ret = setup_comm_pipes(&comm_pipes);
182 if (ret) {
183 log_test_result(KSFT_FAIL);
184 return;
185 }
186
187 ret = fork();
188 if (ret < 0) {
189 ksft_perror("fork() failed");
190 log_test_result(KSFT_FAIL);
191 goto close_comm_pipes;
192 } else if (!ret) {
193 exit(fn(mem, size, &comm_pipes));
194 }
195
196 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
197 ;
198
199 if (do_mprotect) {
200 /*
201 * mprotect() optimizations might try avoiding
202 * write-faults by directly mapping pages writable.
203 */
204 ret = mprotect(mem, size, PROT_READ);
205 if (ret) {
206 ksft_perror("mprotect() failed");
207 log_test_result(KSFT_FAIL);
208 write(comm_pipes.parent_ready[1], "0", 1);
209 wait(&ret);
210 goto close_comm_pipes;
211 }
212
213 ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
214 if (ret) {
215 ksft_perror("mprotect() failed");
216 log_test_result(KSFT_FAIL);
217 write(comm_pipes.parent_ready[1], "0", 1);
218 wait(&ret);
219 goto close_comm_pipes;
220 }
221 }
222
223 /* Modify the page. */
224 memset(mem, 0xff, size);
225 write(comm_pipes.parent_ready[1], "0", 1);
226
227 wait(&ret);
228 if (WIFEXITED(ret))
229 ret = WEXITSTATUS(ret);
230 else
231 ret = -EINVAL;
232
233 if (!ret) {
234 log_test_result(KSFT_PASS);
235 } else if (xfail) {
236 /*
237 * With hugetlb, some vmsplice() tests are currently expected to
238 * fail because (a) harder to fix and (b) nobody really cares.
239 * Flag them as expected failure for now.
240 */
241 ksft_print_msg("Leak from parent into child\n");
242 log_test_result(KSFT_XFAIL);
243 } else {
244 ksft_print_msg("Leak from parent into child\n");
245 log_test_result(KSFT_FAIL);
246 }
247 close_comm_pipes:
248 close_comm_pipes(&comm_pipes);
249 }
250
test_cow_in_parent(char * mem,size_t size,bool is_hugetlb)251 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
252 {
253 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
254 }
255
test_cow_in_parent_mprotect(char * mem,size_t size,bool is_hugetlb)256 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
257 {
258 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
259 }
260
test_vmsplice_in_child(char * mem,size_t size,bool is_hugetlb)261 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
262 {
263 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
264 is_hugetlb);
265 }
266
test_vmsplice_in_child_mprotect(char * mem,size_t size,bool is_hugetlb)267 static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
268 bool is_hugetlb)
269 {
270 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
271 is_hugetlb);
272 }
273
do_test_vmsplice_in_parent(char * mem,size_t size,bool before_fork,bool xfail)274 static void do_test_vmsplice_in_parent(char *mem, size_t size,
275 bool before_fork, bool xfail)
276 {
277 struct iovec iov = {
278 .iov_base = mem,
279 .iov_len = size,
280 };
281 ssize_t cur, total, transferred = 0;
282 struct comm_pipes comm_pipes;
283 char *old, *new;
284 int ret, fds[2];
285 char buf;
286
287 old = malloc(size);
288 new = malloc(size);
289
290 memcpy(old, mem, size);
291
292 ret = setup_comm_pipes(&comm_pipes);
293 if (ret) {
294 log_test_result(KSFT_FAIL);
295 goto free;
296 }
297
298 if (pipe(fds) < 0) {
299 ksft_perror("pipe() failed");
300 log_test_result(KSFT_FAIL);
301 goto close_comm_pipes;
302 }
303
304 if (before_fork) {
305 transferred = vmsplice(fds[1], &iov, 1, 0);
306 if (transferred <= 0) {
307 ksft_perror("vmsplice() failed\n");
308 log_test_result(KSFT_FAIL);
309 goto close_pipe;
310 }
311 }
312
313 ret = fork();
314 if (ret < 0) {
315 ksft_perror("fork() failed\n");
316 log_test_result(KSFT_FAIL);
317 goto close_pipe;
318 } else if (!ret) {
319 write(comm_pipes.child_ready[1], "0", 1);
320 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
321 ;
322 /* Modify page content in the child. */
323 memset(mem, 0xff, size);
324 exit(0);
325 }
326
327 if (!before_fork) {
328 transferred = vmsplice(fds[1], &iov, 1, 0);
329 if (transferred <= 0) {
330 ksft_perror("vmsplice() failed");
331 log_test_result(KSFT_FAIL);
332 wait(&ret);
333 goto close_pipe;
334 }
335 }
336
337 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
338 ;
339 if (munmap(mem, size) < 0) {
340 ksft_perror("munmap() failed");
341 log_test_result(KSFT_FAIL);
342 goto close_pipe;
343 }
344 write(comm_pipes.parent_ready[1], "0", 1);
345
346 /* Wait until the child is done writing. */
347 wait(&ret);
348 if (!WIFEXITED(ret)) {
349 ksft_perror("wait() failed");
350 log_test_result(KSFT_FAIL);
351 goto close_pipe;
352 }
353
354 /* See if we still read the old values. */
355 for (total = 0; total < transferred; total += cur) {
356 cur = read(fds[0], new + total, transferred - total);
357 if (cur < 0) {
358 ksft_perror("read() failed");
359 log_test_result(KSFT_FAIL);
360 goto close_pipe;
361 }
362 }
363
364 if (!memcmp(old, new, transferred)) {
365 log_test_result(KSFT_PASS);
366 } else if (xfail) {
367 /*
368 * With hugetlb, some vmsplice() tests are currently expected to
369 * fail because (a) harder to fix and (b) nobody really cares.
370 * Flag them as expected failure for now.
371 */
372 ksft_print_msg("Leak from child into parent\n");
373 log_test_result(KSFT_XFAIL);
374 } else {
375 ksft_print_msg("Leak from child into parent\n");
376 log_test_result(KSFT_FAIL);
377 }
378 close_pipe:
379 close(fds[0]);
380 close(fds[1]);
381 close_comm_pipes:
382 close_comm_pipes(&comm_pipes);
383 free:
384 free(old);
385 free(new);
386 }
387
test_vmsplice_before_fork(char * mem,size_t size,bool is_hugetlb)388 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
389 {
390 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
391 }
392
test_vmsplice_after_fork(char * mem,size_t size,bool is_hugetlb)393 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
394 {
395 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
396 }
397
398 #ifdef LOCAL_CONFIG_HAVE_LIBURING
do_test_iouring(char * mem,size_t size,bool use_fork)399 static void do_test_iouring(char *mem, size_t size, bool use_fork)
400 {
401 struct comm_pipes comm_pipes;
402 struct io_uring_cqe *cqe;
403 struct io_uring_sqe *sqe;
404 struct io_uring ring;
405 ssize_t cur, total;
406 struct iovec iov;
407 char *buf, *tmp;
408 int ret, fd;
409 FILE *file;
410
411 ret = setup_comm_pipes(&comm_pipes);
412 if (ret) {
413 log_test_result(KSFT_FAIL);
414 return;
415 }
416
417 file = tmpfile();
418 if (!file) {
419 ksft_perror("tmpfile() failed");
420 log_test_result(KSFT_FAIL);
421 goto close_comm_pipes;
422 }
423 fd = fileno(file);
424 assert(fd);
425
426 tmp = malloc(size);
427 if (!tmp) {
428 ksft_print_msg("malloc() failed\n");
429 log_test_result(KSFT_FAIL);
430 goto close_file;
431 }
432
433 /* Skip on errors, as we might just lack kernel support. */
434 ret = io_uring_queue_init(1, &ring, 0);
435 if (ret < 0) {
436 ksft_print_msg("io_uring_queue_init() failed\n");
437 log_test_result(KSFT_SKIP);
438 goto free_tmp;
439 }
440
441 /*
442 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
443 * | FOLL_LONGTERM the range.
444 *
445 * Skip on errors, as we might just lack kernel support or might not
446 * have sufficient MEMLOCK permissions.
447 */
448 iov.iov_base = mem;
449 iov.iov_len = size;
450 ret = io_uring_register_buffers(&ring, &iov, 1);
451 if (ret) {
452 ksft_print_msg("io_uring_register_buffers() failed\n");
453 log_test_result(KSFT_SKIP);
454 goto queue_exit;
455 }
456
457 if (use_fork) {
458 /*
459 * fork() and keep the child alive until we're done. Note that
460 * we expect the pinned page to not get shared with the child.
461 */
462 ret = fork();
463 if (ret < 0) {
464 ksft_perror("fork() failed");
465 log_test_result(KSFT_FAIL);
466 goto unregister_buffers;
467 } else if (!ret) {
468 write(comm_pipes.child_ready[1], "0", 1);
469 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
470 ;
471 exit(0);
472 }
473
474 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
475 ;
476 } else {
477 /*
478 * Map the page R/O into the page table. Enable softdirty
479 * tracking to stop the page from getting mapped R/W immediately
480 * again by mprotect() optimizations. Note that we don't have an
481 * easy way to test if that worked (the pagemap does not export
482 * if the page is mapped R/O vs. R/W).
483 */
484 ret = mprotect(mem, size, PROT_READ);
485 if (ret) {
486 ksft_perror("mprotect() failed");
487 log_test_result(KSFT_FAIL);
488 goto unregister_buffers;
489 }
490
491 clear_softdirty();
492 ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
493 if (ret) {
494 ksft_perror("mprotect() failed");
495 log_test_result(KSFT_FAIL);
496 goto unregister_buffers;
497 }
498 }
499
500 /*
501 * Modify the page and write page content as observed by the fixed
502 * buffer pin to the file so we can verify it.
503 */
504 memset(mem, 0xff, size);
505 sqe = io_uring_get_sqe(&ring);
506 if (!sqe) {
507 ksft_print_msg("io_uring_get_sqe() failed\n");
508 log_test_result(KSFT_FAIL);
509 goto quit_child;
510 }
511 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
512
513 ret = io_uring_submit(&ring);
514 if (ret < 0) {
515 ksft_print_msg("io_uring_submit() failed\n");
516 log_test_result(KSFT_FAIL);
517 goto quit_child;
518 }
519
520 ret = io_uring_wait_cqe(&ring, &cqe);
521 if (ret < 0) {
522 ksft_print_msg("io_uring_wait_cqe() failed\n");
523 log_test_result(KSFT_FAIL);
524 goto quit_child;
525 }
526
527 if (cqe->res != size) {
528 ksft_print_msg("write_fixed failed\n");
529 log_test_result(KSFT_FAIL);
530 goto quit_child;
531 }
532 io_uring_cqe_seen(&ring, cqe);
533
534 /* Read back the file content to the temporary buffer. */
535 total = 0;
536 while (total < size) {
537 cur = pread(fd, tmp + total, size - total, total);
538 if (cur < 0) {
539 ksft_perror("pread() failed\n");
540 log_test_result(KSFT_FAIL);
541 goto quit_child;
542 }
543 total += cur;
544 }
545
546 /* Finally, check if we read what we expected. */
547 if (!memcmp(mem, tmp, size)) {
548 log_test_result(KSFT_PASS);
549 } else {
550 ksft_print_msg("Longtom R/W pin is not reliable\n");
551 log_test_result(KSFT_FAIL);
552 }
553
554 quit_child:
555 if (use_fork) {
556 write(comm_pipes.parent_ready[1], "0", 1);
557 wait(&ret);
558 }
559 unregister_buffers:
560 io_uring_unregister_buffers(&ring);
561 queue_exit:
562 io_uring_queue_exit(&ring);
563 free_tmp:
564 free(tmp);
565 close_file:
566 fclose(file);
567 close_comm_pipes:
568 close_comm_pipes(&comm_pipes);
569 }
570
test_iouring_ro(char * mem,size_t size,bool is_hugetlb)571 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
572 {
573 do_test_iouring(mem, size, false);
574 }
575
test_iouring_fork(char * mem,size_t size,bool is_hugetlb)576 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
577 {
578 do_test_iouring(mem, size, true);
579 }
580
581 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
582
583 enum ro_pin_test {
584 RO_PIN_TEST,
585 RO_PIN_TEST_SHARED,
586 RO_PIN_TEST_PREVIOUSLY_SHARED,
587 RO_PIN_TEST_RO_EXCLUSIVE,
588 };
589
do_test_ro_pin(char * mem,size_t size,enum ro_pin_test test,bool fast)590 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
591 bool fast)
592 {
593 struct pin_longterm_test args;
594 struct comm_pipes comm_pipes;
595 char *tmp, buf;
596 __u64 tmp_val;
597 int ret;
598
599 if (gup_fd < 0) {
600 ksft_print_msg("gup_test not available\n");
601 log_test_result(KSFT_SKIP);
602 return;
603 }
604
605 tmp = malloc(size);
606 if (!tmp) {
607 ksft_perror("malloc() failed\n");
608 log_test_result(KSFT_FAIL);
609 return;
610 }
611
612 ret = setup_comm_pipes(&comm_pipes);
613 if (ret) {
614 log_test_result(KSFT_FAIL);
615 goto free_tmp;
616 }
617
618 switch (test) {
619 case RO_PIN_TEST:
620 break;
621 case RO_PIN_TEST_SHARED:
622 case RO_PIN_TEST_PREVIOUSLY_SHARED:
623 /*
624 * Share the pages with our child. As the pages are not pinned,
625 * this should just work.
626 */
627 ret = fork();
628 if (ret < 0) {
629 ksft_perror("fork() failed");
630 log_test_result(KSFT_FAIL);
631 goto close_comm_pipes;
632 } else if (!ret) {
633 write(comm_pipes.child_ready[1], "0", 1);
634 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
635 ;
636 exit(0);
637 }
638
639 /* Wait until our child is ready. */
640 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
641 ;
642
643 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
644 /*
645 * Tell the child to quit now and wait until it quit.
646 * The pages should now be mapped R/O into our page
647 * tables, but they are no longer shared.
648 */
649 write(comm_pipes.parent_ready[1], "0", 1);
650 wait(&ret);
651 if (!WIFEXITED(ret))
652 ksft_print_msg("[INFO] wait() failed\n");
653 }
654 break;
655 case RO_PIN_TEST_RO_EXCLUSIVE:
656 /*
657 * Map the page R/O into the page table. Enable softdirty
658 * tracking to stop the page from getting mapped R/W immediately
659 * again by mprotect() optimizations. Note that we don't have an
660 * easy way to test if that worked (the pagemap does not export
661 * if the page is mapped R/O vs. R/W).
662 */
663 ret = mprotect(mem, size, PROT_READ);
664 clear_softdirty();
665 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
666 if (ret) {
667 ksft_perror("mprotect() failed");
668 log_test_result(KSFT_FAIL);
669 goto close_comm_pipes;
670 }
671 break;
672 default:
673 assert(false);
674 }
675
676 /* Take a R/O pin. This should trigger unsharing. */
677 args.addr = (__u64)(uintptr_t)mem;
678 args.size = size;
679 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
680 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
681 if (ret) {
682 if (errno == EINVAL)
683 ret = KSFT_SKIP;
684 else
685 ret = KSFT_FAIL;
686 ksft_perror("PIN_LONGTERM_TEST_START failed");
687 log_test_result(ret);
688 goto wait;
689 }
690
691 /* Modify the page. */
692 memset(mem, 0xff, size);
693
694 /*
695 * Read back the content via the pin to the temporary buffer and
696 * test if we observed the modification.
697 */
698 tmp_val = (__u64)(uintptr_t)tmp;
699 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
700 if (ret) {
701 ksft_perror("PIN_LONGTERM_TEST_READ failed");
702 log_test_result(KSFT_FAIL);
703 } else {
704 if (!memcmp(mem, tmp, size)) {
705 log_test_result(KSFT_PASS);
706 } else {
707 ksft_print_msg("Longterm R/O pin is not reliable\n");
708 log_test_result(KSFT_FAIL);
709 }
710 }
711
712 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
713 if (ret)
714 ksft_perror("PIN_LONGTERM_TEST_STOP failed");
715 wait:
716 switch (test) {
717 case RO_PIN_TEST_SHARED:
718 write(comm_pipes.parent_ready[1], "0", 1);
719 wait(&ret);
720 if (!WIFEXITED(ret))
721 ksft_perror("wait() failed");
722 break;
723 default:
724 break;
725 }
726 close_comm_pipes:
727 close_comm_pipes(&comm_pipes);
728 free_tmp:
729 free(tmp);
730 }
731
test_ro_pin_on_shared(char * mem,size_t size,bool is_hugetlb)732 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
733 {
734 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
735 }
736
test_ro_fast_pin_on_shared(char * mem,size_t size,bool is_hugetlb)737 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
738 {
739 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
740 }
741
test_ro_pin_on_ro_previously_shared(char * mem,size_t size,bool is_hugetlb)742 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
743 bool is_hugetlb)
744 {
745 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
746 }
747
test_ro_fast_pin_on_ro_previously_shared(char * mem,size_t size,bool is_hugetlb)748 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
749 bool is_hugetlb)
750 {
751 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
752 }
753
test_ro_pin_on_ro_exclusive(char * mem,size_t size,bool is_hugetlb)754 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
755 bool is_hugetlb)
756 {
757 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
758 }
759
test_ro_fast_pin_on_ro_exclusive(char * mem,size_t size,bool is_hugetlb)760 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
761 bool is_hugetlb)
762 {
763 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
764 }
765
766 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
767
do_run_with_base_page(test_fn fn,bool swapout)768 static void do_run_with_base_page(test_fn fn, bool swapout)
769 {
770 char *mem;
771 int ret;
772
773 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
774 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
775 if (mem == MAP_FAILED) {
776 ksft_perror("mmap() failed");
777 log_test_result(KSFT_FAIL);
778 return;
779 }
780
781 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
782 /* Ignore if not around on a kernel. */
783 if (ret && errno != EINVAL) {
784 ksft_perror("MADV_NOHUGEPAGE failed");
785 log_test_result(KSFT_FAIL);
786 goto munmap;
787 }
788
789 /* Populate a base page. */
790 memset(mem, 1, pagesize);
791
792 if (swapout) {
793 madvise(mem, pagesize, MADV_PAGEOUT);
794 if (!pagemap_is_swapped(pagemap_fd, mem)) {
795 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
796 log_test_result(KSFT_SKIP);
797 goto munmap;
798 }
799 }
800
801 fn(mem, pagesize, false);
802 munmap:
803 munmap(mem, pagesize);
804 }
805
run_with_base_page(test_fn fn,const char * desc)806 static void run_with_base_page(test_fn fn, const char *desc)
807 {
808 log_test_start("%s ... with base page", desc);
809 do_run_with_base_page(fn, false);
810 }
811
run_with_base_page_swap(test_fn fn,const char * desc)812 static void run_with_base_page_swap(test_fn fn, const char *desc)
813 {
814 log_test_start("%s ... with swapped out base page", desc);
815 do_run_with_base_page(fn, true);
816 }
817
818 enum thp_run {
819 THP_RUN_PMD,
820 THP_RUN_PMD_SWAPOUT,
821 THP_RUN_PTE,
822 THP_RUN_PTE_SWAPOUT,
823 THP_RUN_SINGLE_PTE,
824 THP_RUN_SINGLE_PTE_SWAPOUT,
825 THP_RUN_PARTIAL_MREMAP,
826 THP_RUN_PARTIAL_SHARED,
827 };
828
do_run_with_thp(test_fn fn,enum thp_run thp_run,size_t thpsize)829 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
830 {
831 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
832 size_t size, mmap_size, mremap_size;
833 int ret;
834
835 /* For alignment purposes, we need twice the thp size. */
836 mmap_size = 2 * thpsize;
837 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
838 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
839 if (mmap_mem == MAP_FAILED) {
840 ksft_perror("mmap() failed");
841 log_test_result(KSFT_FAIL);
842 return;
843 }
844
845 /* We need a THP-aligned memory area. */
846 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
847
848 ret = madvise(mem, thpsize, MADV_HUGEPAGE);
849 if (ret) {
850 ksft_perror("MADV_HUGEPAGE failed");
851 log_test_result(KSFT_FAIL);
852 goto munmap;
853 }
854
855 /*
856 * Try to populate a THP. Touch the first sub-page and test if
857 * we get the last sub-page populated automatically.
858 */
859 mem[0] = 1;
860 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
861 ksft_print_msg("Did not get a THP populated\n");
862 log_test_result(KSFT_SKIP);
863 goto munmap;
864 }
865 memset(mem, 1, thpsize);
866
867 size = thpsize;
868 switch (thp_run) {
869 case THP_RUN_PMD:
870 case THP_RUN_PMD_SWAPOUT:
871 assert(thpsize == pmdsize);
872 break;
873 case THP_RUN_PTE:
874 case THP_RUN_PTE_SWAPOUT:
875 /*
876 * Trigger PTE-mapping the THP by temporarily mapping a single
877 * subpage R/O. This is a noop if the THP is not pmdsize (and
878 * therefore already PTE-mapped).
879 */
880 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
881 if (ret) {
882 ksft_perror("mprotect() failed");
883 log_test_result(KSFT_FAIL);
884 goto munmap;
885 }
886 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
887 if (ret) {
888 ksft_perror("mprotect() failed");
889 log_test_result(KSFT_FAIL);
890 goto munmap;
891 }
892 break;
893 case THP_RUN_SINGLE_PTE:
894 case THP_RUN_SINGLE_PTE_SWAPOUT:
895 /*
896 * Discard all but a single subpage of that PTE-mapped THP. What
897 * remains is a single PTE mapping a single subpage.
898 */
899 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
900 if (ret) {
901 ksft_perror("MADV_DONTNEED failed");
902 log_test_result(KSFT_FAIL);
903 goto munmap;
904 }
905 size = pagesize;
906 break;
907 case THP_RUN_PARTIAL_MREMAP:
908 /*
909 * Remap half of the THP. We need some new memory location
910 * for that.
911 */
912 mremap_size = thpsize / 2;
913 mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
914 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
915 if (mremap_mem == MAP_FAILED) {
916 ksft_perror("mmap() failed");
917 log_test_result(KSFT_FAIL);
918 goto munmap;
919 }
920 tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
921 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
922 if (tmp != mremap_mem) {
923 ksft_perror("mremap() failed");
924 log_test_result(KSFT_FAIL);
925 goto munmap;
926 }
927 size = mremap_size;
928 break;
929 case THP_RUN_PARTIAL_SHARED:
930 /*
931 * Share the first page of the THP with a child and quit the
932 * child. This will result in some parts of the THP never
933 * have been shared.
934 */
935 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
936 if (ret) {
937 ksft_perror("MADV_DONTFORK failed");
938 log_test_result(KSFT_FAIL);
939 goto munmap;
940 }
941 ret = fork();
942 if (ret < 0) {
943 ksft_perror("fork() failed");
944 log_test_result(KSFT_FAIL);
945 goto munmap;
946 } else if (!ret) {
947 exit(0);
948 }
949 wait(&ret);
950 /* Allow for sharing all pages again. */
951 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
952 if (ret) {
953 ksft_perror("MADV_DOFORK failed");
954 log_test_result(KSFT_FAIL);
955 goto munmap;
956 }
957 break;
958 default:
959 assert(false);
960 }
961
962 switch (thp_run) {
963 case THP_RUN_PMD_SWAPOUT:
964 case THP_RUN_PTE_SWAPOUT:
965 case THP_RUN_SINGLE_PTE_SWAPOUT:
966 madvise(mem, size, MADV_PAGEOUT);
967 if (!range_is_swapped(mem, size)) {
968 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
969 log_test_result(KSFT_SKIP);
970 goto munmap;
971 }
972 break;
973 default:
974 break;
975 }
976
977 fn(mem, size, false);
978 munmap:
979 munmap(mmap_mem, mmap_size);
980 if (mremap_mem != MAP_FAILED)
981 munmap(mremap_mem, mremap_size);
982 }
983
run_with_thp(test_fn fn,const char * desc,size_t size)984 static void run_with_thp(test_fn fn, const char *desc, size_t size)
985 {
986 log_test_start("%s ... with THP (%zu kB)",
987 desc, size / 1024);
988 do_run_with_thp(fn, THP_RUN_PMD, size);
989 }
990
run_with_thp_swap(test_fn fn,const char * desc,size_t size)991 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
992 {
993 log_test_start("%s ... with swapped-out THP (%zu kB)",
994 desc, size / 1024);
995 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
996 }
997
run_with_pte_mapped_thp(test_fn fn,const char * desc,size_t size)998 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
999 {
1000 log_test_start("%s ... with PTE-mapped THP (%zu kB)",
1001 desc, size / 1024);
1002 do_run_with_thp(fn, THP_RUN_PTE, size);
1003 }
1004
run_with_pte_mapped_thp_swap(test_fn fn,const char * desc,size_t size)1005 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
1006 {
1007 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
1008 desc, size / 1024);
1009 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
1010 }
1011
run_with_single_pte_of_thp(test_fn fn,const char * desc,size_t size)1012 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
1013 {
1014 log_test_start("%s ... with single PTE of THP (%zu kB)",
1015 desc, size / 1024);
1016 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
1017 }
1018
run_with_single_pte_of_thp_swap(test_fn fn,const char * desc,size_t size)1019 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
1020 {
1021 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
1022 desc, size / 1024);
1023 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
1024 }
1025
run_with_partial_mremap_thp(test_fn fn,const char * desc,size_t size)1026 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
1027 {
1028 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
1029 desc, size / 1024);
1030 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
1031 }
1032
run_with_partial_shared_thp(test_fn fn,const char * desc,size_t size)1033 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
1034 {
1035 log_test_start("%s ... with partially shared THP (%zu kB)",
1036 desc, size / 1024);
1037 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
1038 }
1039
run_with_hugetlb(test_fn fn,const char * desc,size_t hugetlbsize)1040 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
1041 {
1042 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
1043 char *mem, *dummy;
1044
1045 log_test_start("%s ... with hugetlb (%zu kB)", desc,
1046 hugetlbsize / 1024);
1047
1048 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
1049
1050 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1051 if (mem == MAP_FAILED) {
1052 ksft_perror("need more free huge pages");
1053 log_test_result(KSFT_SKIP);
1054 return;
1055 }
1056
1057 /* Populate an huge page. */
1058 memset(mem, 1, hugetlbsize);
1059
1060 /*
1061 * We need a total of two hugetlb pages to handle COW/unsharing
1062 * properly, otherwise we might get zapped by a SIGBUS.
1063 */
1064 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1065 if (dummy == MAP_FAILED) {
1066 ksft_perror("need more free huge pages");
1067 log_test_result(KSFT_SKIP);
1068 goto munmap;
1069 }
1070 munmap(dummy, hugetlbsize);
1071
1072 fn(mem, hugetlbsize, true);
1073 munmap:
1074 munmap(mem, hugetlbsize);
1075 }
1076
1077 struct test_case {
1078 const char *desc;
1079 test_fn fn;
1080 };
1081
1082 /*
1083 * Test cases that are specific to anonymous pages: pages in private mappings
1084 * that may get shared via COW during fork().
1085 */
1086 static const struct test_case anon_test_cases[] = {
1087 /*
1088 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1089 * either the child can observe modifications by the parent or the
1090 * other way around.
1091 */
1092 {
1093 "Basic COW after fork()",
1094 test_cow_in_parent,
1095 },
1096 /*
1097 * Basic test, but do an additional mprotect(PROT_READ)+
1098 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1099 */
1100 {
1101 "Basic COW after fork() with mprotect() optimization",
1102 test_cow_in_parent_mprotect,
1103 },
1104 /*
1105 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1106 * we miss to break COW, the child observes modifications by the parent.
1107 * This is CVE-2020-29374 reported by Jann Horn.
1108 */
1109 {
1110 "vmsplice() + unmap in child",
1111 test_vmsplice_in_child,
1112 },
1113 /*
1114 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1115 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1116 */
1117 {
1118 "vmsplice() + unmap in child with mprotect() optimization",
1119 test_vmsplice_in_child_mprotect,
1120 },
1121 /*
1122 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1123 * fork(); modify in the child. If we miss to break COW, the parent
1124 * observes modifications by the child.
1125 */
1126 {
1127 "vmsplice() before fork(), unmap in parent after fork()",
1128 test_vmsplice_before_fork,
1129 },
1130 /*
1131 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1132 * child. If we miss to break COW, the parent observes modifications by
1133 * the child.
1134 */
1135 {
1136 "vmsplice() + unmap in parent after fork()",
1137 test_vmsplice_after_fork,
1138 },
1139 #ifdef LOCAL_CONFIG_HAVE_LIBURING
1140 /*
1141 * Take a R/W longterm pin and then map the page R/O into the page
1142 * table to trigger a write fault on next access. When modifying the
1143 * page, the page content must be visible via the pin.
1144 */
1145 {
1146 "R/O-mapping a page registered as iouring fixed buffer",
1147 test_iouring_ro,
1148 },
1149 /*
1150 * Take a R/W longterm pin and then fork() a child. When modifying the
1151 * page, the page content must be visible via the pin. We expect the
1152 * pinned page to not get shared with the child.
1153 */
1154 {
1155 "fork() with an iouring fixed buffer",
1156 test_iouring_fork,
1157 },
1158
1159 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1160 /*
1161 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1162 * When modifying the page via the page table, the page content change
1163 * must be visible via the pin.
1164 */
1165 {
1166 "R/O GUP pin on R/O-mapped shared page",
1167 test_ro_pin_on_shared,
1168 },
1169 /* Same as above, but using GUP-fast. */
1170 {
1171 "R/O GUP-fast pin on R/O-mapped shared page",
1172 test_ro_fast_pin_on_shared,
1173 },
1174 /*
1175 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1176 * was previously shared. When modifying the page via the page table,
1177 * the page content change must be visible via the pin.
1178 */
1179 {
1180 "R/O GUP pin on R/O-mapped previously-shared page",
1181 test_ro_pin_on_ro_previously_shared,
1182 },
1183 /* Same as above, but using GUP-fast. */
1184 {
1185 "R/O GUP-fast pin on R/O-mapped previously-shared page",
1186 test_ro_fast_pin_on_ro_previously_shared,
1187 },
1188 /*
1189 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1190 * When modifying the page via the page table, the page content change
1191 * must be visible via the pin.
1192 */
1193 {
1194 "R/O GUP pin on R/O-mapped exclusive page",
1195 test_ro_pin_on_ro_exclusive,
1196 },
1197 /* Same as above, but using GUP-fast. */
1198 {
1199 "R/O GUP-fast pin on R/O-mapped exclusive page",
1200 test_ro_fast_pin_on_ro_exclusive,
1201 },
1202 };
1203
run_anon_test_case(struct test_case const * test_case)1204 static void run_anon_test_case(struct test_case const *test_case)
1205 {
1206 int i;
1207
1208 run_with_base_page(test_case->fn, test_case->desc);
1209 run_with_base_page_swap(test_case->fn, test_case->desc);
1210 for (i = 0; i < nr_thpsizes; i++) {
1211 size_t size = thpsizes[i];
1212 struct thp_settings settings = *thp_current_settings();
1213
1214 settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER;
1215 settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
1216 thp_push_settings(&settings);
1217
1218 if (size == pmdsize) {
1219 run_with_thp(test_case->fn, test_case->desc, size);
1220 run_with_thp_swap(test_case->fn, test_case->desc, size);
1221 }
1222
1223 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
1224 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
1225 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
1226 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
1227 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
1228 run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
1229
1230 thp_pop_settings();
1231 }
1232 for (i = 0; i < nr_hugetlbsizes; i++)
1233 run_with_hugetlb(test_case->fn, test_case->desc,
1234 hugetlbsizes[i]);
1235 }
1236
run_anon_test_cases(void)1237 static void run_anon_test_cases(void)
1238 {
1239 int i;
1240
1241 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1242
1243 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1244 run_anon_test_case(&anon_test_cases[i]);
1245 }
1246
tests_per_anon_test_case(void)1247 static int tests_per_anon_test_case(void)
1248 {
1249 int tests = 2 + nr_hugetlbsizes;
1250
1251 tests += 6 * nr_thpsizes;
1252 if (pmdsize)
1253 tests += 2;
1254 return tests;
1255 }
1256
1257 enum anon_thp_collapse_test {
1258 ANON_THP_COLLAPSE_UNSHARED,
1259 ANON_THP_COLLAPSE_FULLY_SHARED,
1260 ANON_THP_COLLAPSE_LOWER_SHARED,
1261 ANON_THP_COLLAPSE_UPPER_SHARED,
1262 };
1263
do_test_anon_thp_collapse(char * mem,size_t size,enum anon_thp_collapse_test test)1264 static void do_test_anon_thp_collapse(char *mem, size_t size,
1265 enum anon_thp_collapse_test test)
1266 {
1267 struct comm_pipes comm_pipes;
1268 char buf;
1269 int ret;
1270
1271 ret = setup_comm_pipes(&comm_pipes);
1272 if (ret) {
1273 log_test_result(KSFT_FAIL);
1274 return;
1275 }
1276
1277 /*
1278 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1279 * R/O, such that we can try collapsing it later.
1280 */
1281 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1282 if (ret) {
1283 ksft_perror("mprotect() failed");
1284 log_test_result(KSFT_FAIL);
1285 goto close_comm_pipes;
1286 }
1287 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1288 if (ret) {
1289 ksft_perror("mprotect() failed");
1290 log_test_result(KSFT_FAIL);
1291 goto close_comm_pipes;
1292 }
1293
1294 switch (test) {
1295 case ANON_THP_COLLAPSE_UNSHARED:
1296 /* Collapse before actually COW-sharing the page. */
1297 ret = madvise(mem, size, MADV_COLLAPSE);
1298 if (ret) {
1299 ksft_perror("MADV_COLLAPSE failed");
1300 log_test_result(KSFT_SKIP);
1301 goto close_comm_pipes;
1302 }
1303 break;
1304 case ANON_THP_COLLAPSE_FULLY_SHARED:
1305 /* COW-share the full PTE-mapped THP. */
1306 break;
1307 case ANON_THP_COLLAPSE_LOWER_SHARED:
1308 /* Don't COW-share the upper part of the THP. */
1309 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1310 if (ret) {
1311 ksft_perror("MADV_DONTFORK failed");
1312 log_test_result(KSFT_FAIL);
1313 goto close_comm_pipes;
1314 }
1315 break;
1316 case ANON_THP_COLLAPSE_UPPER_SHARED:
1317 /* Don't COW-share the lower part of the THP. */
1318 ret = madvise(mem, size / 2, MADV_DONTFORK);
1319 if (ret) {
1320 ksft_perror("MADV_DONTFORK failed");
1321 log_test_result(KSFT_FAIL);
1322 goto close_comm_pipes;
1323 }
1324 break;
1325 default:
1326 assert(false);
1327 }
1328
1329 ret = fork();
1330 if (ret < 0) {
1331 ksft_perror("fork() failed");
1332 log_test_result(KSFT_FAIL);
1333 goto close_comm_pipes;
1334 } else if (!ret) {
1335 switch (test) {
1336 case ANON_THP_COLLAPSE_UNSHARED:
1337 case ANON_THP_COLLAPSE_FULLY_SHARED:
1338 exit(child_memcmp_fn(mem, size, &comm_pipes));
1339 break;
1340 case ANON_THP_COLLAPSE_LOWER_SHARED:
1341 exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1342 break;
1343 case ANON_THP_COLLAPSE_UPPER_SHARED:
1344 exit(child_memcmp_fn(mem + size / 2, size / 2,
1345 &comm_pipes));
1346 break;
1347 default:
1348 assert(false);
1349 }
1350 }
1351
1352 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1353 ;
1354
1355 switch (test) {
1356 case ANON_THP_COLLAPSE_UNSHARED:
1357 break;
1358 case ANON_THP_COLLAPSE_UPPER_SHARED:
1359 case ANON_THP_COLLAPSE_LOWER_SHARED:
1360 /*
1361 * Revert MADV_DONTFORK such that we merge the VMAs and are
1362 * able to actually collapse.
1363 */
1364 ret = madvise(mem, size, MADV_DOFORK);
1365 if (ret) {
1366 ksft_perror("MADV_DOFORK failed");
1367 log_test_result(KSFT_FAIL);
1368 write(comm_pipes.parent_ready[1], "0", 1);
1369 wait(&ret);
1370 goto close_comm_pipes;
1371 }
1372 /* FALLTHROUGH */
1373 case ANON_THP_COLLAPSE_FULLY_SHARED:
1374 /* Collapse before anyone modified the COW-shared page. */
1375 ret = madvise(mem, size, MADV_COLLAPSE);
1376 if (ret) {
1377 ksft_perror("MADV_COLLAPSE failed");
1378 log_test_result(KSFT_SKIP);
1379 write(comm_pipes.parent_ready[1], "0", 1);
1380 wait(&ret);
1381 goto close_comm_pipes;
1382 }
1383 break;
1384 default:
1385 assert(false);
1386 }
1387
1388 /* Modify the page. */
1389 memset(mem, 0xff, size);
1390 write(comm_pipes.parent_ready[1], "0", 1);
1391
1392 wait(&ret);
1393 if (WIFEXITED(ret))
1394 ret = WEXITSTATUS(ret);
1395 else
1396 ret = -EINVAL;
1397
1398 if (!ret) {
1399 log_test_result(KSFT_PASS);
1400 } else {
1401 ksft_print_msg("Leak from parent into child\n");
1402 log_test_result(KSFT_FAIL);
1403 }
1404 close_comm_pipes:
1405 close_comm_pipes(&comm_pipes);
1406 }
1407
test_anon_thp_collapse_unshared(char * mem,size_t size,bool is_hugetlb)1408 static void test_anon_thp_collapse_unshared(char *mem, size_t size,
1409 bool is_hugetlb)
1410 {
1411 assert(!is_hugetlb);
1412 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1413 }
1414
test_anon_thp_collapse_fully_shared(char * mem,size_t size,bool is_hugetlb)1415 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
1416 bool is_hugetlb)
1417 {
1418 assert(!is_hugetlb);
1419 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1420 }
1421
test_anon_thp_collapse_lower_shared(char * mem,size_t size,bool is_hugetlb)1422 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
1423 bool is_hugetlb)
1424 {
1425 assert(!is_hugetlb);
1426 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1427 }
1428
test_anon_thp_collapse_upper_shared(char * mem,size_t size,bool is_hugetlb)1429 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
1430 bool is_hugetlb)
1431 {
1432 assert(!is_hugetlb);
1433 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1434 }
1435
1436 /*
1437 * Test cases that are specific to anonymous THP: pages in private mappings
1438 * that may get shared via COW during fork().
1439 */
1440 static const struct test_case anon_thp_test_cases[] = {
1441 /*
1442 * Basic COW test for fork() without any GUP when collapsing a THP
1443 * before fork().
1444 *
1445 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1446 * collapse") might easily get COW handling wrong when not collapsing
1447 * exclusivity information properly.
1448 */
1449 {
1450 "Basic COW after fork() when collapsing before fork()",
1451 test_anon_thp_collapse_unshared,
1452 },
1453 /* Basic COW test, but collapse after COW-sharing a full THP. */
1454 {
1455 "Basic COW after fork() when collapsing after fork() (fully shared)",
1456 test_anon_thp_collapse_fully_shared,
1457 },
1458 /*
1459 * Basic COW test, but collapse after COW-sharing the lower half of a
1460 * THP.
1461 */
1462 {
1463 "Basic COW after fork() when collapsing after fork() (lower shared)",
1464 test_anon_thp_collapse_lower_shared,
1465 },
1466 /*
1467 * Basic COW test, but collapse after COW-sharing the upper half of a
1468 * THP.
1469 */
1470 {
1471 "Basic COW after fork() when collapsing after fork() (upper shared)",
1472 test_anon_thp_collapse_upper_shared,
1473 },
1474 };
1475
run_anon_thp_test_cases(void)1476 static void run_anon_thp_test_cases(void)
1477 {
1478 int i;
1479
1480 if (!pmdsize)
1481 return;
1482
1483 ksft_print_msg("[INFO] Anonymous THP tests\n");
1484
1485 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1486 struct test_case const *test_case = &anon_thp_test_cases[i];
1487
1488 log_test_start("%s", test_case->desc);
1489 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
1490 }
1491 }
1492
tests_per_anon_thp_test_case(void)1493 static int tests_per_anon_thp_test_case(void)
1494 {
1495 return pmdsize ? 1 : 0;
1496 }
1497
1498 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1499
test_cow(char * mem,const char * smem,size_t size)1500 static void test_cow(char *mem, const char *smem, size_t size)
1501 {
1502 char *old = malloc(size);
1503
1504 /* Backup the original content. */
1505 memcpy(old, smem, size);
1506
1507 /* Modify the page. */
1508 memset(mem, 0xff, size);
1509
1510 /* See if we still read the old values via the other mapping. */
1511 if (!memcmp(smem, old, size)) {
1512 log_test_result(KSFT_PASS);
1513 } else {
1514 ksft_print_msg("Other mapping modified\n");
1515 log_test_result(KSFT_FAIL);
1516 }
1517 free(old);
1518 }
1519
test_ro_pin(char * mem,const char * smem,size_t size)1520 static void test_ro_pin(char *mem, const char *smem, size_t size)
1521 {
1522 do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1523 }
1524
test_ro_fast_pin(char * mem,const char * smem,size_t size)1525 static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1526 {
1527 do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1528 }
1529
run_with_zeropage(non_anon_test_fn fn,const char * desc)1530 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1531 {
1532 char *mem, *smem;
1533
1534 log_test_start("%s ... with shared zeropage", desc);
1535
1536 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1537 MAP_PRIVATE | MAP_ANON, -1, 0);
1538 if (mem == MAP_FAILED) {
1539 ksft_perror("mmap() failed");
1540 log_test_result(KSFT_FAIL);
1541 return;
1542 }
1543
1544 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1545 if (smem == MAP_FAILED) {
1546 ksft_perror("mmap() failed");
1547 log_test_result(KSFT_FAIL);
1548 goto munmap;
1549 }
1550
1551 /* Read from the page to populate the shared zeropage. */
1552 FORCE_READ(*mem);
1553 FORCE_READ(*smem);
1554
1555 fn(mem, smem, pagesize);
1556 munmap:
1557 munmap(mem, pagesize);
1558 if (smem != MAP_FAILED)
1559 munmap(smem, pagesize);
1560 }
1561
run_with_huge_zeropage(non_anon_test_fn fn,const char * desc)1562 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1563 {
1564 char *mem, *smem, *mmap_mem, *mmap_smem;
1565 size_t mmap_size;
1566 int ret;
1567
1568 log_test_start("%s ... with huge zeropage", desc);
1569
1570 if (!has_huge_zeropage) {
1571 ksft_print_msg("Huge zeropage not enabled\n");
1572 log_test_result(KSFT_SKIP);
1573 return;
1574 }
1575
1576 /* For alignment purposes, we need twice the thp size. */
1577 mmap_size = 2 * pmdsize;
1578 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1579 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1580 if (mmap_mem == MAP_FAILED) {
1581 ksft_perror("mmap() failed");
1582 log_test_result(KSFT_FAIL);
1583 return;
1584 }
1585 mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1586 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1587 if (mmap_smem == MAP_FAILED) {
1588 ksft_perror("mmap() failed");
1589 log_test_result(KSFT_FAIL);
1590 goto munmap;
1591 }
1592
1593 /* We need a THP-aligned memory area. */
1594 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
1595 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
1596
1597 ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
1598 if (ret) {
1599 ksft_perror("madvise()");
1600 log_test_result(KSFT_FAIL);
1601 goto munmap;
1602 }
1603 ret = madvise(smem, pmdsize, MADV_HUGEPAGE);
1604 if (ret) {
1605 ksft_perror("madvise()");
1606 log_test_result(KSFT_FAIL);
1607 goto munmap;
1608 }
1609
1610 /*
1611 * Read from the memory to populate the huge shared zeropage. Read from
1612 * the first sub-page and test if we get another sub-page populated
1613 * automatically.
1614 */
1615 FORCE_READ(mem);
1616 FORCE_READ(smem);
1617 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1618 !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1619 ksft_test_result_skip("Did not get THPs populated\n");
1620 goto munmap;
1621 }
1622
1623 fn(mem, smem, pmdsize);
1624 munmap:
1625 munmap(mmap_mem, mmap_size);
1626 if (mmap_smem != MAP_FAILED)
1627 munmap(mmap_smem, mmap_size);
1628 }
1629
run_with_memfd(non_anon_test_fn fn,const char * desc)1630 static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1631 {
1632 char *mem, *smem;
1633 int fd;
1634
1635 log_test_start("%s ... with memfd", desc);
1636
1637 fd = memfd_create("test", 0);
1638 if (fd < 0) {
1639 ksft_perror("memfd_create() failed");
1640 log_test_result(KSFT_FAIL);
1641 return;
1642 }
1643
1644 /* File consists of a single page filled with zeroes. */
1645 if (fallocate(fd, 0, 0, pagesize)) {
1646 ksft_perror("fallocate() failed");
1647 log_test_result(KSFT_FAIL);
1648 goto close;
1649 }
1650
1651 /* Create a private mapping of the memfd. */
1652 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1653 if (mem == MAP_FAILED) {
1654 ksft_perror("mmap() failed");
1655 log_test_result(KSFT_FAIL);
1656 goto close;
1657 }
1658 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1659 if (smem == MAP_FAILED) {
1660 ksft_perror("mmap() failed");
1661 log_test_result(KSFT_FAIL);
1662 goto munmap;
1663 }
1664
1665 /* Fault the page in. */
1666 FORCE_READ(mem);
1667 FORCE_READ(smem);
1668
1669 fn(mem, smem, pagesize);
1670 munmap:
1671 munmap(mem, pagesize);
1672 if (smem != MAP_FAILED)
1673 munmap(smem, pagesize);
1674 close:
1675 close(fd);
1676 }
1677
run_with_tmpfile(non_anon_test_fn fn,const char * desc)1678 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1679 {
1680 char *mem, *smem;
1681 FILE *file;
1682 int fd;
1683
1684 log_test_start("%s ... with tmpfile", desc);
1685
1686 file = tmpfile();
1687 if (!file) {
1688 ksft_perror("tmpfile() failed");
1689 log_test_result(KSFT_FAIL);
1690 return;
1691 }
1692
1693 fd = fileno(file);
1694 if (fd < 0) {
1695 ksft_perror("fileno() failed");
1696 log_test_result(KSFT_SKIP);
1697 return;
1698 }
1699
1700 /* File consists of a single page filled with zeroes. */
1701 if (fallocate(fd, 0, 0, pagesize)) {
1702 ksft_perror("fallocate() failed");
1703 log_test_result(KSFT_FAIL);
1704 goto close;
1705 }
1706
1707 /* Create a private mapping of the memfd. */
1708 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1709 if (mem == MAP_FAILED) {
1710 ksft_perror("mmap() failed");
1711 log_test_result(KSFT_FAIL);
1712 goto close;
1713 }
1714 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1715 if (smem == MAP_FAILED) {
1716 ksft_perror("mmap() failed");
1717 log_test_result(KSFT_FAIL);
1718 goto munmap;
1719 }
1720
1721 /* Fault the page in. */
1722 FORCE_READ(mem);
1723 FORCE_READ(smem);
1724
1725 fn(mem, smem, pagesize);
1726 munmap:
1727 munmap(mem, pagesize);
1728 if (smem != MAP_FAILED)
1729 munmap(smem, pagesize);
1730 close:
1731 fclose(file);
1732 }
1733
run_with_memfd_hugetlb(non_anon_test_fn fn,const char * desc,size_t hugetlbsize)1734 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1735 size_t hugetlbsize)
1736 {
1737 int flags = MFD_HUGETLB;
1738 char *mem, *smem;
1739 int fd;
1740
1741 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
1742 hugetlbsize / 1024);
1743
1744 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1745
1746 fd = memfd_create("test", flags);
1747 if (fd < 0) {
1748 ksft_perror("memfd_create() failed");
1749 log_test_result(KSFT_SKIP);
1750 return;
1751 }
1752
1753 /* File consists of a single page filled with zeroes. */
1754 if (fallocate(fd, 0, 0, hugetlbsize)) {
1755 ksft_perror("need more free huge pages");
1756 log_test_result(KSFT_SKIP);
1757 goto close;
1758 }
1759
1760 /* Create a private mapping of the memfd. */
1761 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1762 0);
1763 if (mem == MAP_FAILED) {
1764 ksft_perror("need more free huge pages");
1765 log_test_result(KSFT_SKIP);
1766 goto close;
1767 }
1768 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1769 if (smem == MAP_FAILED) {
1770 ksft_perror("mmap() failed");
1771 log_test_result(KSFT_FAIL);
1772 goto munmap;
1773 }
1774
1775 /* Fault the page in. */
1776 FORCE_READ(mem);
1777 FORCE_READ(smem);
1778
1779 fn(mem, smem, hugetlbsize);
1780 munmap:
1781 munmap(mem, hugetlbsize);
1782 if (smem != MAP_FAILED)
1783 munmap(smem, hugetlbsize);
1784 close:
1785 close(fd);
1786 }
1787
1788 struct non_anon_test_case {
1789 const char *desc;
1790 non_anon_test_fn fn;
1791 };
1792
1793 /*
1794 * Test cases that target any pages in private mappings that are not anonymous:
1795 * pages that may get shared via COW ndependent of fork(). This includes
1796 * the shared zeropage(s), pagecache pages, ...
1797 */
1798 static const struct non_anon_test_case non_anon_test_cases[] = {
1799 /*
1800 * Basic COW test without any GUP. If we miss to break COW, changes are
1801 * visible via other private/shared mappings.
1802 */
1803 {
1804 "Basic COW",
1805 test_cow,
1806 },
1807 /*
1808 * Take a R/O longterm pin. When modifying the page via the page table,
1809 * the page content change must be visible via the pin.
1810 */
1811 {
1812 "R/O longterm GUP pin",
1813 test_ro_pin,
1814 },
1815 /* Same as above, but using GUP-fast. */
1816 {
1817 "R/O longterm GUP-fast pin",
1818 test_ro_fast_pin,
1819 },
1820 };
1821
run_non_anon_test_case(struct non_anon_test_case const * test_case)1822 static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1823 {
1824 int i;
1825
1826 run_with_zeropage(test_case->fn, test_case->desc);
1827 run_with_memfd(test_case->fn, test_case->desc);
1828 run_with_tmpfile(test_case->fn, test_case->desc);
1829 if (pmdsize)
1830 run_with_huge_zeropage(test_case->fn, test_case->desc);
1831 for (i = 0; i < nr_hugetlbsizes; i++)
1832 run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1833 hugetlbsizes[i]);
1834 }
1835
run_non_anon_test_cases(void)1836 static void run_non_anon_test_cases(void)
1837 {
1838 int i;
1839
1840 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1841
1842 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1843 run_non_anon_test_case(&non_anon_test_cases[i]);
1844 }
1845
tests_per_non_anon_test_case(void)1846 static int tests_per_non_anon_test_case(void)
1847 {
1848 int tests = 3 + nr_hugetlbsizes;
1849
1850 if (pmdsize)
1851 tests += 1;
1852 return tests;
1853 }
1854
main(int argc,char ** argv)1855 int main(int argc, char **argv)
1856 {
1857 struct thp_settings default_settings;
1858
1859 ksft_print_header();
1860
1861 pagesize = getpagesize();
1862 pmdsize = read_pmd_pagesize();
1863 if (pmdsize) {
1864 /* Only if THP is supported. */
1865 thp_read_settings(&default_settings);
1866 default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
1867 thp_save_settings();
1868 thp_push_settings(&default_settings);
1869
1870 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
1871 pmdsize / 1024);
1872 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
1873 }
1874 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
1875 ARRAY_SIZE(hugetlbsizes));
1876 has_huge_zeropage = detect_huge_zeropage();
1877
1878 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1879 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1880 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1881
1882 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1883 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1884 if (pagemap_fd < 0)
1885 ksft_exit_fail_msg("opening pagemap failed\n");
1886
1887 run_anon_test_cases();
1888 run_anon_thp_test_cases();
1889 run_non_anon_test_cases();
1890
1891 if (pmdsize) {
1892 /* Only if THP is supported. */
1893 thp_restore_settings();
1894 }
1895
1896 ksft_finished();
1897 }
1898