xref: /linux/tools/testing/selftests/mm/cow.c (revision 3349e275067f94ffb4141989aed9cbae7409429b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * COW (Copy On Write) tests.
4  *
5  * Copyright 2022, Red Hat, Inc.
6  *
7  * Author(s): David Hildenbrand <david@redhat.com>
8  */
9 #define _GNU_SOURCE
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <assert.h>
18 #include <linux/mman.h>
19 #include <sys/mman.h>
20 #include <sys/ioctl.h>
21 #include <sys/wait.h>
22 #include <linux/memfd.h>
23 
24 #include "local_config.h"
25 #ifdef LOCAL_CONFIG_HAVE_LIBURING
26 #include <liburing.h>
27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
28 
29 #include "../../../../mm/gup_test.h"
30 #include "../kselftest.h"
31 #include "vm_util.h"
32 #include "thp_settings.h"
33 
34 static size_t pagesize;
35 static int pagemap_fd;
36 static size_t pmdsize;
37 static int nr_thpsizes;
38 static size_t thpsizes[20];
39 static int nr_hugetlbsizes;
40 static size_t hugetlbsizes[10];
41 static int gup_fd;
42 static bool has_huge_zeropage;
43 
sz2ord(size_t size)44 static int sz2ord(size_t size)
45 {
46 	return __builtin_ctzll(size / pagesize);
47 }
48 
detect_thp_sizes(size_t sizes[],int max)49 static int detect_thp_sizes(size_t sizes[], int max)
50 {
51 	int count = 0;
52 	unsigned long orders;
53 	size_t kb;
54 	int i;
55 
56 	/* thp not supported at all. */
57 	if (!pmdsize)
58 		return 0;
59 
60 	orders = 1UL << sz2ord(pmdsize);
61 	orders |= thp_supported_orders();
62 
63 	for (i = 0; orders && count < max; i++) {
64 		if (!(orders & (1UL << i)))
65 			continue;
66 		orders &= ~(1UL << i);
67 		kb = (pagesize >> 10) << i;
68 		sizes[count++] = kb * 1024;
69 		ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
70 	}
71 
72 	return count;
73 }
74 
detect_huge_zeropage(void)75 static void detect_huge_zeropage(void)
76 {
77 	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
78 		      O_RDONLY);
79 	size_t enabled = 0;
80 	char buf[15];
81 	int ret;
82 
83 	if (fd < 0)
84 		return;
85 
86 	ret = pread(fd, buf, sizeof(buf), 0);
87 	if (ret > 0 && ret < sizeof(buf)) {
88 		buf[ret] = 0;
89 
90 		enabled = strtoul(buf, NULL, 10);
91 		if (enabled == 1) {
92 			has_huge_zeropage = true;
93 			ksft_print_msg("[INFO] huge zeropage is enabled\n");
94 		}
95 	}
96 
97 	close(fd);
98 }
99 
range_is_swapped(void * addr,size_t size)100 static bool range_is_swapped(void *addr, size_t size)
101 {
102 	for (; size; addr += pagesize, size -= pagesize)
103 		if (!pagemap_is_swapped(pagemap_fd, addr))
104 			return false;
105 	return true;
106 }
107 
108 struct comm_pipes {
109 	int child_ready[2];
110 	int parent_ready[2];
111 };
112 
setup_comm_pipes(struct comm_pipes * comm_pipes)113 static int setup_comm_pipes(struct comm_pipes *comm_pipes)
114 {
115 	if (pipe(comm_pipes->child_ready) < 0) {
116 		ksft_perror("pipe()");
117 		return -errno;
118 	}
119 	if (pipe(comm_pipes->parent_ready) < 0) {
120 		ksft_perror("pipe()");
121 		close(comm_pipes->child_ready[0]);
122 		close(comm_pipes->child_ready[1]);
123 		return -errno;
124 	}
125 
126 	return 0;
127 }
128 
close_comm_pipes(struct comm_pipes * comm_pipes)129 static void close_comm_pipes(struct comm_pipes *comm_pipes)
130 {
131 	close(comm_pipes->child_ready[0]);
132 	close(comm_pipes->child_ready[1]);
133 	close(comm_pipes->parent_ready[0]);
134 	close(comm_pipes->parent_ready[1]);
135 }
136 
child_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)137 static int child_memcmp_fn(char *mem, size_t size,
138 			   struct comm_pipes *comm_pipes)
139 {
140 	char *old = malloc(size);
141 	char buf;
142 
143 	/* Backup the original content. */
144 	memcpy(old, mem, size);
145 
146 	/* Wait until the parent modified the page. */
147 	write(comm_pipes->child_ready[1], "0", 1);
148 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
149 		;
150 
151 	/* See if we still read the old values. */
152 	return memcmp(old, mem, size);
153 }
154 
child_vmsplice_memcmp_fn(char * mem,size_t size,struct comm_pipes * comm_pipes)155 static int child_vmsplice_memcmp_fn(char *mem, size_t size,
156 				    struct comm_pipes *comm_pipes)
157 {
158 	struct iovec iov = {
159 		.iov_base = mem,
160 		.iov_len = size,
161 	};
162 	ssize_t cur, total, transferred;
163 	char *old, *new;
164 	int fds[2];
165 	char buf;
166 
167 	old = malloc(size);
168 	new = malloc(size);
169 
170 	/* Backup the original content. */
171 	memcpy(old, mem, size);
172 
173 	if (pipe(fds) < 0)
174 		return -errno;
175 
176 	/* Trigger a read-only pin. */
177 	transferred = vmsplice(fds[1], &iov, 1, 0);
178 	if (transferred < 0)
179 		return -errno;
180 	if (transferred == 0)
181 		return -EINVAL;
182 
183 	/* Unmap it from our page tables. */
184 	if (munmap(mem, size) < 0)
185 		return -errno;
186 
187 	/* Wait until the parent modified it. */
188 	write(comm_pipes->child_ready[1], "0", 1);
189 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
190 		;
191 
192 	/* See if we still read the old values via the pipe. */
193 	for (total = 0; total < transferred; total += cur) {
194 		cur = read(fds[0], new + total, transferred - total);
195 		if (cur < 0)
196 			return -errno;
197 	}
198 
199 	return memcmp(old, new, transferred);
200 }
201 
202 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
203 
do_test_cow_in_parent(char * mem,size_t size,bool do_mprotect,child_fn fn,bool xfail)204 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
205 		child_fn fn, bool xfail)
206 {
207 	struct comm_pipes comm_pipes;
208 	char buf;
209 	int ret;
210 
211 	ret = setup_comm_pipes(&comm_pipes);
212 	if (ret) {
213 		log_test_result(KSFT_FAIL);
214 		return;
215 	}
216 
217 	ret = fork();
218 	if (ret < 0) {
219 		ksft_perror("fork() failed");
220 		log_test_result(KSFT_FAIL);
221 		goto close_comm_pipes;
222 	} else if (!ret) {
223 		exit(fn(mem, size, &comm_pipes));
224 	}
225 
226 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
227 		;
228 
229 	if (do_mprotect) {
230 		/*
231 		 * mprotect() optimizations might try avoiding
232 		 * write-faults by directly mapping pages writable.
233 		 */
234 		ret = mprotect(mem, size, PROT_READ);
235 		if (ret) {
236 			ksft_perror("mprotect() failed");
237 			log_test_result(KSFT_FAIL);
238 			write(comm_pipes.parent_ready[1], "0", 1);
239 			wait(&ret);
240 			goto close_comm_pipes;
241 		}
242 
243 		ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
244 		if (ret) {
245 			ksft_perror("mprotect() failed");
246 			log_test_result(KSFT_FAIL);
247 			write(comm_pipes.parent_ready[1], "0", 1);
248 			wait(&ret);
249 			goto close_comm_pipes;
250 		}
251 	}
252 
253 	/* Modify the page. */
254 	memset(mem, 0xff, size);
255 	write(comm_pipes.parent_ready[1], "0", 1);
256 
257 	wait(&ret);
258 	if (WIFEXITED(ret))
259 		ret = WEXITSTATUS(ret);
260 	else
261 		ret = -EINVAL;
262 
263 	if (!ret) {
264 		log_test_result(KSFT_PASS);
265 	} else if (xfail) {
266 		/*
267 		 * With hugetlb, some vmsplice() tests are currently expected to
268 		 * fail because (a) harder to fix and (b) nobody really cares.
269 		 * Flag them as expected failure for now.
270 		 */
271 		log_test_result(KSFT_XFAIL);
272 	} else {
273 		log_test_result(KSFT_FAIL);
274 	}
275 close_comm_pipes:
276 	close_comm_pipes(&comm_pipes);
277 }
278 
test_cow_in_parent(char * mem,size_t size,bool is_hugetlb)279 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
280 {
281 	do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
282 }
283 
test_cow_in_parent_mprotect(char * mem,size_t size,bool is_hugetlb)284 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
285 {
286 	do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
287 }
288 
test_vmsplice_in_child(char * mem,size_t size,bool is_hugetlb)289 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
290 {
291 	do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
292 			      is_hugetlb);
293 }
294 
test_vmsplice_in_child_mprotect(char * mem,size_t size,bool is_hugetlb)295 static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
296 		bool is_hugetlb)
297 {
298 	do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
299 			      is_hugetlb);
300 }
301 
do_test_vmsplice_in_parent(char * mem,size_t size,bool before_fork,bool xfail)302 static void do_test_vmsplice_in_parent(char *mem, size_t size,
303 				       bool before_fork, bool xfail)
304 {
305 	struct iovec iov = {
306 		.iov_base = mem,
307 		.iov_len = size,
308 	};
309 	ssize_t cur, total, transferred = 0;
310 	struct comm_pipes comm_pipes;
311 	char *old, *new;
312 	int ret, fds[2];
313 	char buf;
314 
315 	old = malloc(size);
316 	new = malloc(size);
317 
318 	memcpy(old, mem, size);
319 
320 	ret = setup_comm_pipes(&comm_pipes);
321 	if (ret) {
322 		log_test_result(KSFT_FAIL);
323 		goto free;
324 	}
325 
326 	if (pipe(fds) < 0) {
327 		ksft_perror("pipe() failed");
328 		log_test_result(KSFT_FAIL);
329 		goto close_comm_pipes;
330 	}
331 
332 	if (before_fork) {
333 		transferred = vmsplice(fds[1], &iov, 1, 0);
334 		if (transferred <= 0) {
335 			ksft_print_msg("vmsplice() failed\n");
336 			log_test_result(KSFT_FAIL);
337 			goto close_pipe;
338 		}
339 	}
340 
341 	ret = fork();
342 	if (ret < 0) {
343 		ksft_perror("fork() failed\n");
344 		log_test_result(KSFT_FAIL);
345 		goto close_pipe;
346 	} else if (!ret) {
347 		write(comm_pipes.child_ready[1], "0", 1);
348 		while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
349 			;
350 		/* Modify page content in the child. */
351 		memset(mem, 0xff, size);
352 		exit(0);
353 	}
354 
355 	if (!before_fork) {
356 		transferred = vmsplice(fds[1], &iov, 1, 0);
357 		if (transferred <= 0) {
358 			ksft_perror("vmsplice() failed");
359 			log_test_result(KSFT_FAIL);
360 			wait(&ret);
361 			goto close_pipe;
362 		}
363 	}
364 
365 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
366 		;
367 	if (munmap(mem, size) < 0) {
368 		ksft_perror("munmap() failed");
369 		log_test_result(KSFT_FAIL);
370 		goto close_pipe;
371 	}
372 	write(comm_pipes.parent_ready[1], "0", 1);
373 
374 	/* Wait until the child is done writing. */
375 	wait(&ret);
376 	if (!WIFEXITED(ret)) {
377 		ksft_perror("wait() failed");
378 		log_test_result(KSFT_FAIL);
379 		goto close_pipe;
380 	}
381 
382 	/* See if we still read the old values. */
383 	for (total = 0; total < transferred; total += cur) {
384 		cur = read(fds[0], new + total, transferred - total);
385 		if (cur < 0) {
386 			ksft_perror("read() failed");
387 			log_test_result(KSFT_FAIL);
388 			goto close_pipe;
389 		}
390 	}
391 
392 	if (!memcmp(old, new, transferred)) {
393 		log_test_result(KSFT_PASS);
394 	} else if (xfail) {
395 		/*
396 		 * With hugetlb, some vmsplice() tests are currently expected to
397 		 * fail because (a) harder to fix and (b) nobody really cares.
398 		 * Flag them as expected failure for now.
399 		 */
400 		log_test_result(KSFT_XFAIL);
401 	} else {
402 		log_test_result(KSFT_FAIL);
403 	}
404 close_pipe:
405 	close(fds[0]);
406 	close(fds[1]);
407 close_comm_pipes:
408 	close_comm_pipes(&comm_pipes);
409 free:
410 	free(old);
411 	free(new);
412 }
413 
test_vmsplice_before_fork(char * mem,size_t size,bool is_hugetlb)414 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
415 {
416 	do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
417 }
418 
test_vmsplice_after_fork(char * mem,size_t size,bool is_hugetlb)419 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
420 {
421 	do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
422 }
423 
424 #ifdef LOCAL_CONFIG_HAVE_LIBURING
do_test_iouring(char * mem,size_t size,bool use_fork)425 static void do_test_iouring(char *mem, size_t size, bool use_fork)
426 {
427 	struct comm_pipes comm_pipes;
428 	struct io_uring_cqe *cqe;
429 	struct io_uring_sqe *sqe;
430 	struct io_uring ring;
431 	ssize_t cur, total;
432 	struct iovec iov;
433 	char *buf, *tmp;
434 	int ret, fd;
435 	FILE *file;
436 
437 	ret = setup_comm_pipes(&comm_pipes);
438 	if (ret) {
439 		log_test_result(KSFT_FAIL);
440 		return;
441 	}
442 
443 	file = tmpfile();
444 	if (!file) {
445 		ksft_perror("tmpfile() failed");
446 		log_test_result(KSFT_FAIL);
447 		goto close_comm_pipes;
448 	}
449 	fd = fileno(file);
450 	assert(fd);
451 
452 	tmp = malloc(size);
453 	if (!tmp) {
454 		ksft_print_msg("malloc() failed\n");
455 		log_test_result(KSFT_FAIL);
456 		goto close_file;
457 	}
458 
459 	/* Skip on errors, as we might just lack kernel support. */
460 	ret = io_uring_queue_init(1, &ring, 0);
461 	if (ret < 0) {
462 		ksft_print_msg("io_uring_queue_init() failed\n");
463 		log_test_result(KSFT_SKIP);
464 		goto free_tmp;
465 	}
466 
467 	/*
468 	 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
469 	 * | FOLL_LONGTERM the range.
470 	 *
471 	 * Skip on errors, as we might just lack kernel support or might not
472 	 * have sufficient MEMLOCK permissions.
473 	 */
474 	iov.iov_base = mem;
475 	iov.iov_len = size;
476 	ret = io_uring_register_buffers(&ring, &iov, 1);
477 	if (ret) {
478 		ksft_print_msg("io_uring_register_buffers() failed\n");
479 		log_test_result(KSFT_SKIP);
480 		goto queue_exit;
481 	}
482 
483 	if (use_fork) {
484 		/*
485 		 * fork() and keep the child alive until we're done. Note that
486 		 * we expect the pinned page to not get shared with the child.
487 		 */
488 		ret = fork();
489 		if (ret < 0) {
490 			ksft_perror("fork() failed");
491 			log_test_result(KSFT_FAIL);
492 			goto unregister_buffers;
493 		} else if (!ret) {
494 			write(comm_pipes.child_ready[1], "0", 1);
495 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
496 				;
497 			exit(0);
498 		}
499 
500 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
501 			;
502 	} else {
503 		/*
504 		 * Map the page R/O into the page table. Enable softdirty
505 		 * tracking to stop the page from getting mapped R/W immediately
506 		 * again by mprotect() optimizations. Note that we don't have an
507 		 * easy way to test if that worked (the pagemap does not export
508 		 * if the page is mapped R/O vs. R/W).
509 		 */
510 		ret = mprotect(mem, size, PROT_READ);
511 		if (ret) {
512 			ksft_perror("mprotect() failed");
513 			log_test_result(KSFT_FAIL);
514 			goto unregister_buffers;
515 		}
516 
517 		clear_softdirty();
518 		ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
519 		if (ret) {
520 			ksft_perror("mprotect() failed");
521 			log_test_result(KSFT_FAIL);
522 			goto unregister_buffers;
523 		}
524 	}
525 
526 	/*
527 	 * Modify the page and write page content as observed by the fixed
528 	 * buffer pin to the file so we can verify it.
529 	 */
530 	memset(mem, 0xff, size);
531 	sqe = io_uring_get_sqe(&ring);
532 	if (!sqe) {
533 		ksft_print_msg("io_uring_get_sqe() failed\n");
534 		log_test_result(KSFT_FAIL);
535 		goto quit_child;
536 	}
537 	io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
538 
539 	ret = io_uring_submit(&ring);
540 	if (ret < 0) {
541 		ksft_print_msg("io_uring_submit() failed\n");
542 		log_test_result(KSFT_FAIL);
543 		goto quit_child;
544 	}
545 
546 	ret = io_uring_wait_cqe(&ring, &cqe);
547 	if (ret < 0) {
548 		ksft_print_msg("io_uring_wait_cqe() failed\n");
549 		log_test_result(KSFT_FAIL);
550 		goto quit_child;
551 	}
552 
553 	if (cqe->res != size) {
554 		ksft_print_msg("write_fixed failed\n");
555 		log_test_result(KSFT_FAIL);
556 		goto quit_child;
557 	}
558 	io_uring_cqe_seen(&ring, cqe);
559 
560 	/* Read back the file content to the temporary buffer. */
561 	total = 0;
562 	while (total < size) {
563 		cur = pread(fd, tmp + total, size - total, total);
564 		if (cur < 0) {
565 			ksft_print_msg("pread() failed\n");
566 			log_test_result(KSFT_FAIL);
567 			goto quit_child;
568 		}
569 		total += cur;
570 	}
571 
572 	/* Finally, check if we read what we expected. */
573 	if (!memcmp(mem, tmp, size))
574 		log_test_result(KSFT_PASS);
575 	else
576 		log_test_result(KSFT_FAIL);
577 
578 quit_child:
579 	if (use_fork) {
580 		write(comm_pipes.parent_ready[1], "0", 1);
581 		wait(&ret);
582 	}
583 unregister_buffers:
584 	io_uring_unregister_buffers(&ring);
585 queue_exit:
586 	io_uring_queue_exit(&ring);
587 free_tmp:
588 	free(tmp);
589 close_file:
590 	fclose(file);
591 close_comm_pipes:
592 	close_comm_pipes(&comm_pipes);
593 }
594 
test_iouring_ro(char * mem,size_t size,bool is_hugetlb)595 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
596 {
597 	do_test_iouring(mem, size, false);
598 }
599 
test_iouring_fork(char * mem,size_t size,bool is_hugetlb)600 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
601 {
602 	do_test_iouring(mem, size, true);
603 }
604 
605 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
606 
607 enum ro_pin_test {
608 	RO_PIN_TEST,
609 	RO_PIN_TEST_SHARED,
610 	RO_PIN_TEST_PREVIOUSLY_SHARED,
611 	RO_PIN_TEST_RO_EXCLUSIVE,
612 };
613 
do_test_ro_pin(char * mem,size_t size,enum ro_pin_test test,bool fast)614 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
615 			   bool fast)
616 {
617 	struct pin_longterm_test args;
618 	struct comm_pipes comm_pipes;
619 	char *tmp, buf;
620 	__u64 tmp_val;
621 	int ret;
622 
623 	if (gup_fd < 0) {
624 		ksft_print_msg("gup_test not available\n");
625 		log_test_result(KSFT_SKIP);
626 		return;
627 	}
628 
629 	tmp = malloc(size);
630 	if (!tmp) {
631 		ksft_print_msg("malloc() failed\n");
632 		log_test_result(KSFT_FAIL);
633 		return;
634 	}
635 
636 	ret = setup_comm_pipes(&comm_pipes);
637 	if (ret) {
638 		log_test_result(KSFT_FAIL);
639 		goto free_tmp;
640 	}
641 
642 	switch (test) {
643 	case RO_PIN_TEST:
644 		break;
645 	case RO_PIN_TEST_SHARED:
646 	case RO_PIN_TEST_PREVIOUSLY_SHARED:
647 		/*
648 		 * Share the pages with our child. As the pages are not pinned,
649 		 * this should just work.
650 		 */
651 		ret = fork();
652 		if (ret < 0) {
653 			ksft_perror("fork() failed");
654 			log_test_result(KSFT_FAIL);
655 			goto close_comm_pipes;
656 		} else if (!ret) {
657 			write(comm_pipes.child_ready[1], "0", 1);
658 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
659 				;
660 			exit(0);
661 		}
662 
663 		/* Wait until our child is ready. */
664 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
665 			;
666 
667 		if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
668 			/*
669 			 * Tell the child to quit now and wait until it quit.
670 			 * The pages should now be mapped R/O into our page
671 			 * tables, but they are no longer shared.
672 			 */
673 			write(comm_pipes.parent_ready[1], "0", 1);
674 			wait(&ret);
675 			if (!WIFEXITED(ret))
676 				ksft_print_msg("[INFO] wait() failed\n");
677 		}
678 		break;
679 	case RO_PIN_TEST_RO_EXCLUSIVE:
680 		/*
681 		 * Map the page R/O into the page table. Enable softdirty
682 		 * tracking to stop the page from getting mapped R/W immediately
683 		 * again by mprotect() optimizations. Note that we don't have an
684 		 * easy way to test if that worked (the pagemap does not export
685 		 * if the page is mapped R/O vs. R/W).
686 		 */
687 		ret = mprotect(mem, size, PROT_READ);
688 		clear_softdirty();
689 		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
690 		if (ret) {
691 			ksft_perror("mprotect() failed");
692 			log_test_result(KSFT_FAIL);
693 			goto close_comm_pipes;
694 		}
695 		break;
696 	default:
697 		assert(false);
698 	}
699 
700 	/* Take a R/O pin. This should trigger unsharing. */
701 	args.addr = (__u64)(uintptr_t)mem;
702 	args.size = size;
703 	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
704 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
705 	if (ret) {
706 		if (errno == EINVAL)
707 			ret = KSFT_SKIP;
708 		else
709 			ret = KSFT_FAIL;
710 		ksft_perror("PIN_LONGTERM_TEST_START failed");
711 		log_test_result(ret);
712 		goto wait;
713 	}
714 
715 	/* Modify the page. */
716 	memset(mem, 0xff, size);
717 
718 	/*
719 	 * Read back the content via the pin to the temporary buffer and
720 	 * test if we observed the modification.
721 	 */
722 	tmp_val = (__u64)(uintptr_t)tmp;
723 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
724 	if (ret) {
725 		ksft_perror("PIN_LONGTERM_TEST_READ failed");
726 		log_test_result(KSFT_FAIL);
727 	} else {
728 		if (!memcmp(mem, tmp, size))
729 			log_test_result(KSFT_PASS);
730 		else
731 			log_test_result(KSFT_FAIL);
732 	}
733 
734 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
735 	if (ret)
736 		ksft_perror("PIN_LONGTERM_TEST_STOP failed");
737 wait:
738 	switch (test) {
739 	case RO_PIN_TEST_SHARED:
740 		write(comm_pipes.parent_ready[1], "0", 1);
741 		wait(&ret);
742 		if (!WIFEXITED(ret))
743 			ksft_perror("wait() failed");
744 		break;
745 	default:
746 		break;
747 	}
748 close_comm_pipes:
749 	close_comm_pipes(&comm_pipes);
750 free_tmp:
751 	free(tmp);
752 }
753 
test_ro_pin_on_shared(char * mem,size_t size,bool is_hugetlb)754 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
755 {
756 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
757 }
758 
test_ro_fast_pin_on_shared(char * mem,size_t size,bool is_hugetlb)759 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
760 {
761 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
762 }
763 
test_ro_pin_on_ro_previously_shared(char * mem,size_t size,bool is_hugetlb)764 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
765 		bool is_hugetlb)
766 {
767 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
768 }
769 
test_ro_fast_pin_on_ro_previously_shared(char * mem,size_t size,bool is_hugetlb)770 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
771 		bool is_hugetlb)
772 {
773 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
774 }
775 
test_ro_pin_on_ro_exclusive(char * mem,size_t size,bool is_hugetlb)776 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
777 		bool is_hugetlb)
778 {
779 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
780 }
781 
test_ro_fast_pin_on_ro_exclusive(char * mem,size_t size,bool is_hugetlb)782 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
783 		bool is_hugetlb)
784 {
785 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
786 }
787 
788 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
789 
do_run_with_base_page(test_fn fn,bool swapout)790 static void do_run_with_base_page(test_fn fn, bool swapout)
791 {
792 	char *mem;
793 	int ret;
794 
795 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
796 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
797 	if (mem == MAP_FAILED) {
798 		ksft_perror("mmap() failed");
799 		log_test_result(KSFT_FAIL);
800 		return;
801 	}
802 
803 	ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
804 	/* Ignore if not around on a kernel. */
805 	if (ret && errno != EINVAL) {
806 		ksft_perror("MADV_NOHUGEPAGE failed");
807 		log_test_result(KSFT_FAIL);
808 		goto munmap;
809 	}
810 
811 	/* Populate a base page. */
812 	memset(mem, 1, pagesize);
813 
814 	if (swapout) {
815 		madvise(mem, pagesize, MADV_PAGEOUT);
816 		if (!pagemap_is_swapped(pagemap_fd, mem)) {
817 			ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
818 			log_test_result(KSFT_SKIP);
819 			goto munmap;
820 		}
821 	}
822 
823 	fn(mem, pagesize, false);
824 munmap:
825 	munmap(mem, pagesize);
826 }
827 
run_with_base_page(test_fn fn,const char * desc)828 static void run_with_base_page(test_fn fn, const char *desc)
829 {
830 	log_test_start("%s ... with base page", desc);
831 	do_run_with_base_page(fn, false);
832 }
833 
run_with_base_page_swap(test_fn fn,const char * desc)834 static void run_with_base_page_swap(test_fn fn, const char *desc)
835 {
836 	log_test_start("%s ... with swapped out base page", desc);
837 	do_run_with_base_page(fn, true);
838 }
839 
840 enum thp_run {
841 	THP_RUN_PMD,
842 	THP_RUN_PMD_SWAPOUT,
843 	THP_RUN_PTE,
844 	THP_RUN_PTE_SWAPOUT,
845 	THP_RUN_SINGLE_PTE,
846 	THP_RUN_SINGLE_PTE_SWAPOUT,
847 	THP_RUN_PARTIAL_MREMAP,
848 	THP_RUN_PARTIAL_SHARED,
849 };
850 
do_run_with_thp(test_fn fn,enum thp_run thp_run,size_t thpsize)851 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
852 {
853 	char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
854 	size_t size, mmap_size, mremap_size;
855 	int ret;
856 
857 	/* For alignment purposes, we need twice the thp size. */
858 	mmap_size = 2 * thpsize;
859 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
860 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
861 	if (mmap_mem == MAP_FAILED) {
862 		ksft_perror("mmap() failed");
863 		log_test_result(KSFT_FAIL);
864 		return;
865 	}
866 
867 	/* We need a THP-aligned memory area. */
868 	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
869 
870 	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
871 	if (ret) {
872 		ksft_perror("MADV_HUGEPAGE failed");
873 		log_test_result(KSFT_FAIL);
874 		goto munmap;
875 	}
876 
877 	/*
878 	 * Try to populate a THP. Touch the first sub-page and test if
879 	 * we get the last sub-page populated automatically.
880 	 */
881 	mem[0] = 1;
882 	if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
883 		ksft_print_msg("Did not get a THP populated\n");
884 		log_test_result(KSFT_SKIP);
885 		goto munmap;
886 	}
887 	memset(mem, 1, thpsize);
888 
889 	size = thpsize;
890 	switch (thp_run) {
891 	case THP_RUN_PMD:
892 	case THP_RUN_PMD_SWAPOUT:
893 		assert(thpsize == pmdsize);
894 		break;
895 	case THP_RUN_PTE:
896 	case THP_RUN_PTE_SWAPOUT:
897 		/*
898 		 * Trigger PTE-mapping the THP by temporarily mapping a single
899 		 * subpage R/O. This is a noop if the THP is not pmdsize (and
900 		 * therefore already PTE-mapped).
901 		 */
902 		ret = mprotect(mem + pagesize, pagesize, PROT_READ);
903 		if (ret) {
904 			ksft_perror("mprotect() failed");
905 			log_test_result(KSFT_FAIL);
906 			goto munmap;
907 		}
908 		ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
909 		if (ret) {
910 			ksft_perror("mprotect() failed");
911 			log_test_result(KSFT_FAIL);
912 			goto munmap;
913 		}
914 		break;
915 	case THP_RUN_SINGLE_PTE:
916 	case THP_RUN_SINGLE_PTE_SWAPOUT:
917 		/*
918 		 * Discard all but a single subpage of that PTE-mapped THP. What
919 		 * remains is a single PTE mapping a single subpage.
920 		 */
921 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
922 		if (ret) {
923 			ksft_perror("MADV_DONTNEED failed");
924 			log_test_result(KSFT_FAIL);
925 			goto munmap;
926 		}
927 		size = pagesize;
928 		break;
929 	case THP_RUN_PARTIAL_MREMAP:
930 		/*
931 		 * Remap half of the THP. We need some new memory location
932 		 * for that.
933 		 */
934 		mremap_size = thpsize / 2;
935 		mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
936 				  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
937 		if (mremap_mem == MAP_FAILED) {
938 			ksft_perror("mmap() failed");
939 			log_test_result(KSFT_FAIL);
940 			goto munmap;
941 		}
942 		tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
943 			     MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
944 		if (tmp != mremap_mem) {
945 			ksft_perror("mremap() failed");
946 			log_test_result(KSFT_FAIL);
947 			goto munmap;
948 		}
949 		size = mremap_size;
950 		break;
951 	case THP_RUN_PARTIAL_SHARED:
952 		/*
953 		 * Share the first page of the THP with a child and quit the
954 		 * child. This will result in some parts of the THP never
955 		 * have been shared.
956 		 */
957 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
958 		if (ret) {
959 			ksft_perror("MADV_DONTFORK failed");
960 			log_test_result(KSFT_FAIL);
961 			goto munmap;
962 		}
963 		ret = fork();
964 		if (ret < 0) {
965 			ksft_perror("fork() failed");
966 			log_test_result(KSFT_FAIL);
967 			goto munmap;
968 		} else if (!ret) {
969 			exit(0);
970 		}
971 		wait(&ret);
972 		/* Allow for sharing all pages again. */
973 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
974 		if (ret) {
975 			ksft_perror("MADV_DOFORK failed");
976 			log_test_result(KSFT_FAIL);
977 			goto munmap;
978 		}
979 		break;
980 	default:
981 		assert(false);
982 	}
983 
984 	switch (thp_run) {
985 	case THP_RUN_PMD_SWAPOUT:
986 	case THP_RUN_PTE_SWAPOUT:
987 	case THP_RUN_SINGLE_PTE_SWAPOUT:
988 		madvise(mem, size, MADV_PAGEOUT);
989 		if (!range_is_swapped(mem, size)) {
990 			ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
991 			log_test_result(KSFT_SKIP);
992 			goto munmap;
993 		}
994 		break;
995 	default:
996 		break;
997 	}
998 
999 	fn(mem, size, false);
1000 munmap:
1001 	munmap(mmap_mem, mmap_size);
1002 	if (mremap_mem != MAP_FAILED)
1003 		munmap(mremap_mem, mremap_size);
1004 }
1005 
run_with_thp(test_fn fn,const char * desc,size_t size)1006 static void run_with_thp(test_fn fn, const char *desc, size_t size)
1007 {
1008 	log_test_start("%s ... with THP (%zu kB)",
1009 		desc, size / 1024);
1010 	do_run_with_thp(fn, THP_RUN_PMD, size);
1011 }
1012 
run_with_thp_swap(test_fn fn,const char * desc,size_t size)1013 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
1014 {
1015 	log_test_start("%s ... with swapped-out THP (%zu kB)",
1016 		desc, size / 1024);
1017 	do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
1018 }
1019 
run_with_pte_mapped_thp(test_fn fn,const char * desc,size_t size)1020 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
1021 {
1022 	log_test_start("%s ... with PTE-mapped THP (%zu kB)",
1023 		desc, size / 1024);
1024 	do_run_with_thp(fn, THP_RUN_PTE, size);
1025 }
1026 
run_with_pte_mapped_thp_swap(test_fn fn,const char * desc,size_t size)1027 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
1028 {
1029 	log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
1030 		desc, size / 1024);
1031 	do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
1032 }
1033 
run_with_single_pte_of_thp(test_fn fn,const char * desc,size_t size)1034 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
1035 {
1036 	log_test_start("%s ... with single PTE of THP (%zu kB)",
1037 		desc, size / 1024);
1038 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
1039 }
1040 
run_with_single_pte_of_thp_swap(test_fn fn,const char * desc,size_t size)1041 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
1042 {
1043 	log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
1044 		desc, size / 1024);
1045 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
1046 }
1047 
run_with_partial_mremap_thp(test_fn fn,const char * desc,size_t size)1048 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
1049 {
1050 	log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
1051 		desc, size / 1024);
1052 	do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
1053 }
1054 
run_with_partial_shared_thp(test_fn fn,const char * desc,size_t size)1055 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
1056 {
1057 	log_test_start("%s ... with partially shared THP (%zu kB)",
1058 		desc, size / 1024);
1059 	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
1060 }
1061 
run_with_hugetlb(test_fn fn,const char * desc,size_t hugetlbsize)1062 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
1063 {
1064 	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
1065 	char *mem, *dummy;
1066 
1067 	log_test_start("%s ... with hugetlb (%zu kB)", desc,
1068 		       hugetlbsize / 1024);
1069 
1070 	flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
1071 
1072 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1073 	if (mem == MAP_FAILED) {
1074 		ksft_perror("need more free huge pages");
1075 		log_test_result(KSFT_SKIP);
1076 		return;
1077 	}
1078 
1079 	/* Populate an huge page. */
1080 	memset(mem, 1, hugetlbsize);
1081 
1082 	/*
1083 	 * We need a total of two hugetlb pages to handle COW/unsharing
1084 	 * properly, otherwise we might get zapped by a SIGBUS.
1085 	 */
1086 	dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1087 	if (dummy == MAP_FAILED) {
1088 		ksft_perror("need more free huge pages");
1089 		log_test_result(KSFT_SKIP);
1090 		goto munmap;
1091 	}
1092 	munmap(dummy, hugetlbsize);
1093 
1094 	fn(mem, hugetlbsize, true);
1095 munmap:
1096 	munmap(mem, hugetlbsize);
1097 }
1098 
1099 struct test_case {
1100 	const char *desc;
1101 	test_fn fn;
1102 };
1103 
1104 /*
1105  * Test cases that are specific to anonymous pages: pages in private mappings
1106  * that may get shared via COW during fork().
1107  */
1108 static const struct test_case anon_test_cases[] = {
1109 	/*
1110 	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1111 	 * either the child can observe modifications by the parent or the
1112 	 * other way around.
1113 	 */
1114 	{
1115 		"Basic COW after fork()",
1116 		test_cow_in_parent,
1117 	},
1118 	/*
1119 	 * Basic test, but do an additional mprotect(PROT_READ)+
1120 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1121 	 */
1122 	{
1123 		"Basic COW after fork() with mprotect() optimization",
1124 		test_cow_in_parent_mprotect,
1125 	},
1126 	/*
1127 	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1128 	 * we miss to break COW, the child observes modifications by the parent.
1129 	 * This is CVE-2020-29374 reported by Jann Horn.
1130 	 */
1131 	{
1132 		"vmsplice() + unmap in child",
1133 		test_vmsplice_in_child,
1134 	},
1135 	/*
1136 	 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1137 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1138 	 */
1139 	{
1140 		"vmsplice() + unmap in child with mprotect() optimization",
1141 		test_vmsplice_in_child_mprotect,
1142 	},
1143 	/*
1144 	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1145 	 * fork(); modify in the child. If we miss to break COW, the parent
1146 	 * observes modifications by the child.
1147 	 */
1148 	{
1149 		"vmsplice() before fork(), unmap in parent after fork()",
1150 		test_vmsplice_before_fork,
1151 	},
1152 	/*
1153 	 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1154 	 * child. If we miss to break COW, the parent observes modifications by
1155 	 * the child.
1156 	 */
1157 	{
1158 		"vmsplice() + unmap in parent after fork()",
1159 		test_vmsplice_after_fork,
1160 	},
1161 #ifdef LOCAL_CONFIG_HAVE_LIBURING
1162 	/*
1163 	 * Take a R/W longterm pin and then map the page R/O into the page
1164 	 * table to trigger a write fault on next access. When modifying the
1165 	 * page, the page content must be visible via the pin.
1166 	 */
1167 	{
1168 		"R/O-mapping a page registered as iouring fixed buffer",
1169 		test_iouring_ro,
1170 	},
1171 	/*
1172 	 * Take a R/W longterm pin and then fork() a child. When modifying the
1173 	 * page, the page content must be visible via the pin. We expect the
1174 	 * pinned page to not get shared with the child.
1175 	 */
1176 	{
1177 		"fork() with an iouring fixed buffer",
1178 		test_iouring_fork,
1179 	},
1180 
1181 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1182 	/*
1183 	 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1184 	 * When modifying the page via the page table, the page content change
1185 	 * must be visible via the pin.
1186 	 */
1187 	{
1188 		"R/O GUP pin on R/O-mapped shared page",
1189 		test_ro_pin_on_shared,
1190 	},
1191 	/* Same as above, but using GUP-fast. */
1192 	{
1193 		"R/O GUP-fast pin on R/O-mapped shared page",
1194 		test_ro_fast_pin_on_shared,
1195 	},
1196 	/*
1197 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1198 	 * was previously shared. When modifying the page via the page table,
1199 	 * the page content change must be visible via the pin.
1200 	 */
1201 	{
1202 		"R/O GUP pin on R/O-mapped previously-shared page",
1203 		test_ro_pin_on_ro_previously_shared,
1204 	},
1205 	/* Same as above, but using GUP-fast. */
1206 	{
1207 		"R/O GUP-fast pin on R/O-mapped previously-shared page",
1208 		test_ro_fast_pin_on_ro_previously_shared,
1209 	},
1210 	/*
1211 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1212 	 * When modifying the page via the page table, the page content change
1213 	 * must be visible via the pin.
1214 	 */
1215 	{
1216 		"R/O GUP pin on R/O-mapped exclusive page",
1217 		test_ro_pin_on_ro_exclusive,
1218 	},
1219 	/* Same as above, but using GUP-fast. */
1220 	{
1221 		"R/O GUP-fast pin on R/O-mapped exclusive page",
1222 		test_ro_fast_pin_on_ro_exclusive,
1223 	},
1224 };
1225 
run_anon_test_case(struct test_case const * test_case)1226 static void run_anon_test_case(struct test_case const *test_case)
1227 {
1228 	int i;
1229 
1230 	run_with_base_page(test_case->fn, test_case->desc);
1231 	run_with_base_page_swap(test_case->fn, test_case->desc);
1232 	for (i = 0; i < nr_thpsizes; i++) {
1233 		size_t size = thpsizes[i];
1234 		struct thp_settings settings = *thp_current_settings();
1235 
1236 		settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
1237 		settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
1238 		thp_push_settings(&settings);
1239 
1240 		if (size == pmdsize) {
1241 			run_with_thp(test_case->fn, test_case->desc, size);
1242 			run_with_thp_swap(test_case->fn, test_case->desc, size);
1243 		}
1244 
1245 		run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
1246 		run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
1247 		run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
1248 		run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
1249 		run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
1250 		run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
1251 
1252 		thp_pop_settings();
1253 	}
1254 	for (i = 0; i < nr_hugetlbsizes; i++)
1255 		run_with_hugetlb(test_case->fn, test_case->desc,
1256 				 hugetlbsizes[i]);
1257 }
1258 
run_anon_test_cases(void)1259 static void run_anon_test_cases(void)
1260 {
1261 	int i;
1262 
1263 	ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1264 
1265 	for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1266 		run_anon_test_case(&anon_test_cases[i]);
1267 }
1268 
tests_per_anon_test_case(void)1269 static int tests_per_anon_test_case(void)
1270 {
1271 	int tests = 2 + nr_hugetlbsizes;
1272 
1273 	tests += 6 * nr_thpsizes;
1274 	if (pmdsize)
1275 		tests += 2;
1276 	return tests;
1277 }
1278 
1279 enum anon_thp_collapse_test {
1280 	ANON_THP_COLLAPSE_UNSHARED,
1281 	ANON_THP_COLLAPSE_FULLY_SHARED,
1282 	ANON_THP_COLLAPSE_LOWER_SHARED,
1283 	ANON_THP_COLLAPSE_UPPER_SHARED,
1284 };
1285 
do_test_anon_thp_collapse(char * mem,size_t size,enum anon_thp_collapse_test test)1286 static void do_test_anon_thp_collapse(char *mem, size_t size,
1287 				      enum anon_thp_collapse_test test)
1288 {
1289 	struct comm_pipes comm_pipes;
1290 	char buf;
1291 	int ret;
1292 
1293 	ret = setup_comm_pipes(&comm_pipes);
1294 	if (ret) {
1295 		log_test_result(KSFT_FAIL);
1296 		return;
1297 	}
1298 
1299 	/*
1300 	 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1301 	 * R/O, such that we can try collapsing it later.
1302 	 */
1303 	ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1304 	if (ret) {
1305 		ksft_perror("mprotect() failed");
1306 		log_test_result(KSFT_FAIL);
1307 		goto close_comm_pipes;
1308 	}
1309 	ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1310 	if (ret) {
1311 		ksft_perror("mprotect() failed");
1312 		log_test_result(KSFT_FAIL);
1313 		goto close_comm_pipes;
1314 	}
1315 
1316 	switch (test) {
1317 	case ANON_THP_COLLAPSE_UNSHARED:
1318 		/* Collapse before actually COW-sharing the page. */
1319 		ret = madvise(mem, size, MADV_COLLAPSE);
1320 		if (ret) {
1321 			ksft_perror("MADV_COLLAPSE failed");
1322 			log_test_result(KSFT_SKIP);
1323 			goto close_comm_pipes;
1324 		}
1325 		break;
1326 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1327 		/* COW-share the full PTE-mapped THP. */
1328 		break;
1329 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1330 		/* Don't COW-share the upper part of the THP. */
1331 		ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1332 		if (ret) {
1333 			ksft_perror("MADV_DONTFORK failed");
1334 			log_test_result(KSFT_FAIL);
1335 			goto close_comm_pipes;
1336 		}
1337 		break;
1338 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1339 		/* Don't COW-share the lower part of the THP. */
1340 		ret = madvise(mem, size / 2, MADV_DONTFORK);
1341 		if (ret) {
1342 			ksft_perror("MADV_DONTFORK failed");
1343 			log_test_result(KSFT_FAIL);
1344 			goto close_comm_pipes;
1345 		}
1346 		break;
1347 	default:
1348 		assert(false);
1349 	}
1350 
1351 	ret = fork();
1352 	if (ret < 0) {
1353 		ksft_perror("fork() failed");
1354 		log_test_result(KSFT_FAIL);
1355 		goto close_comm_pipes;
1356 	} else if (!ret) {
1357 		switch (test) {
1358 		case ANON_THP_COLLAPSE_UNSHARED:
1359 		case ANON_THP_COLLAPSE_FULLY_SHARED:
1360 			exit(child_memcmp_fn(mem, size, &comm_pipes));
1361 			break;
1362 		case ANON_THP_COLLAPSE_LOWER_SHARED:
1363 			exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1364 			break;
1365 		case ANON_THP_COLLAPSE_UPPER_SHARED:
1366 			exit(child_memcmp_fn(mem + size / 2, size / 2,
1367 					     &comm_pipes));
1368 			break;
1369 		default:
1370 			assert(false);
1371 		}
1372 	}
1373 
1374 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1375 		;
1376 
1377 	switch (test) {
1378 	case ANON_THP_COLLAPSE_UNSHARED:
1379 		break;
1380 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1381 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1382 		/*
1383 		 * Revert MADV_DONTFORK such that we merge the VMAs and are
1384 		 * able to actually collapse.
1385 		 */
1386 		ret = madvise(mem, size, MADV_DOFORK);
1387 		if (ret) {
1388 			ksft_perror("MADV_DOFORK failed");
1389 			log_test_result(KSFT_FAIL);
1390 			write(comm_pipes.parent_ready[1], "0", 1);
1391 			wait(&ret);
1392 			goto close_comm_pipes;
1393 		}
1394 		/* FALLTHROUGH */
1395 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1396 		/* Collapse before anyone modified the COW-shared page. */
1397 		ret = madvise(mem, size, MADV_COLLAPSE);
1398 		if (ret) {
1399 			ksft_perror("MADV_COLLAPSE failed");
1400 			log_test_result(KSFT_SKIP);
1401 			write(comm_pipes.parent_ready[1], "0", 1);
1402 			wait(&ret);
1403 			goto close_comm_pipes;
1404 		}
1405 		break;
1406 	default:
1407 		assert(false);
1408 	}
1409 
1410 	/* Modify the page. */
1411 	memset(mem, 0xff, size);
1412 	write(comm_pipes.parent_ready[1], "0", 1);
1413 
1414 	wait(&ret);
1415 	if (WIFEXITED(ret))
1416 		ret = WEXITSTATUS(ret);
1417 	else
1418 		ret = -EINVAL;
1419 
1420 	if (!ret)
1421 		log_test_result(KSFT_PASS);
1422 	else
1423 		log_test_result(KSFT_FAIL);
1424 close_comm_pipes:
1425 	close_comm_pipes(&comm_pipes);
1426 }
1427 
test_anon_thp_collapse_unshared(char * mem,size_t size,bool is_hugetlb)1428 static void test_anon_thp_collapse_unshared(char *mem, size_t size,
1429 		bool is_hugetlb)
1430 {
1431 	assert(!is_hugetlb);
1432 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1433 }
1434 
test_anon_thp_collapse_fully_shared(char * mem,size_t size,bool is_hugetlb)1435 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
1436 		bool is_hugetlb)
1437 {
1438 	assert(!is_hugetlb);
1439 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1440 }
1441 
test_anon_thp_collapse_lower_shared(char * mem,size_t size,bool is_hugetlb)1442 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
1443 		bool is_hugetlb)
1444 {
1445 	assert(!is_hugetlb);
1446 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1447 }
1448 
test_anon_thp_collapse_upper_shared(char * mem,size_t size,bool is_hugetlb)1449 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
1450 		bool is_hugetlb)
1451 {
1452 	assert(!is_hugetlb);
1453 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1454 }
1455 
1456 /*
1457  * Test cases that are specific to anonymous THP: pages in private mappings
1458  * that may get shared via COW during fork().
1459  */
1460 static const struct test_case anon_thp_test_cases[] = {
1461 	/*
1462 	 * Basic COW test for fork() without any GUP when collapsing a THP
1463 	 * before fork().
1464 	 *
1465 	 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1466 	 * collapse") might easily get COW handling wrong when not collapsing
1467 	 * exclusivity information properly.
1468 	 */
1469 	{
1470 		"Basic COW after fork() when collapsing before fork()",
1471 		test_anon_thp_collapse_unshared,
1472 	},
1473 	/* Basic COW test, but collapse after COW-sharing a full THP. */
1474 	{
1475 		"Basic COW after fork() when collapsing after fork() (fully shared)",
1476 		test_anon_thp_collapse_fully_shared,
1477 	},
1478 	/*
1479 	 * Basic COW test, but collapse after COW-sharing the lower half of a
1480 	 * THP.
1481 	 */
1482 	{
1483 		"Basic COW after fork() when collapsing after fork() (lower shared)",
1484 		test_anon_thp_collapse_lower_shared,
1485 	},
1486 	/*
1487 	 * Basic COW test, but collapse after COW-sharing the upper half of a
1488 	 * THP.
1489 	 */
1490 	{
1491 		"Basic COW after fork() when collapsing after fork() (upper shared)",
1492 		test_anon_thp_collapse_upper_shared,
1493 	},
1494 };
1495 
run_anon_thp_test_cases(void)1496 static void run_anon_thp_test_cases(void)
1497 {
1498 	int i;
1499 
1500 	if (!pmdsize)
1501 		return;
1502 
1503 	ksft_print_msg("[INFO] Anonymous THP tests\n");
1504 
1505 	for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1506 		struct test_case const *test_case = &anon_thp_test_cases[i];
1507 
1508 		log_test_start("%s", test_case->desc);
1509 		do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
1510 	}
1511 }
1512 
tests_per_anon_thp_test_case(void)1513 static int tests_per_anon_thp_test_case(void)
1514 {
1515 	return pmdsize ? 1 : 0;
1516 }
1517 
1518 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1519 
test_cow(char * mem,const char * smem,size_t size)1520 static void test_cow(char *mem, const char *smem, size_t size)
1521 {
1522 	char *old = malloc(size);
1523 
1524 	/* Backup the original content. */
1525 	memcpy(old, smem, size);
1526 
1527 	/* Modify the page. */
1528 	memset(mem, 0xff, size);
1529 
1530 	/* See if we still read the old values via the other mapping. */
1531 	if (!memcmp(smem, old, size))
1532 		log_test_result(KSFT_PASS);
1533 	else
1534 		log_test_result(KSFT_FAIL);
1535 	free(old);
1536 }
1537 
test_ro_pin(char * mem,const char * smem,size_t size)1538 static void test_ro_pin(char *mem, const char *smem, size_t size)
1539 {
1540 	do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1541 }
1542 
test_ro_fast_pin(char * mem,const char * smem,size_t size)1543 static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1544 {
1545 	do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1546 }
1547 
run_with_zeropage(non_anon_test_fn fn,const char * desc)1548 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1549 {
1550 	char *mem, *smem, tmp;
1551 
1552 	log_test_start("%s ... with shared zeropage", desc);
1553 
1554 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1555 		   MAP_PRIVATE | MAP_ANON, -1, 0);
1556 	if (mem == MAP_FAILED) {
1557 		ksft_perror("mmap() failed");
1558 		log_test_result(KSFT_FAIL);
1559 		return;
1560 	}
1561 
1562 	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1563 	if (smem == MAP_FAILED) {
1564 		ksft_perror("mmap() failed");
1565 		log_test_result(KSFT_FAIL);
1566 		goto munmap;
1567 	}
1568 
1569 	/* Read from the page to populate the shared zeropage. */
1570 	tmp = *mem + *smem;
1571 	asm volatile("" : "+r" (tmp));
1572 
1573 	fn(mem, smem, pagesize);
1574 munmap:
1575 	munmap(mem, pagesize);
1576 	if (smem != MAP_FAILED)
1577 		munmap(smem, pagesize);
1578 }
1579 
run_with_huge_zeropage(non_anon_test_fn fn,const char * desc)1580 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1581 {
1582 	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1583 	size_t mmap_size;
1584 	int ret;
1585 
1586 	log_test_start("%s ... with huge zeropage", desc);
1587 
1588 	if (!has_huge_zeropage) {
1589 		ksft_print_msg("Huge zeropage not enabled\n");
1590 		log_test_result(KSFT_SKIP);
1591 		return;
1592 	}
1593 
1594 	/* For alignment purposes, we need twice the thp size. */
1595 	mmap_size = 2 * pmdsize;
1596 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1597 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1598 	if (mmap_mem == MAP_FAILED) {
1599 		ksft_perror("mmap() failed");
1600 		log_test_result(KSFT_FAIL);
1601 		return;
1602 	}
1603 	mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1604 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1605 	if (mmap_smem == MAP_FAILED) {
1606 		ksft_perror("mmap() failed");
1607 		log_test_result(KSFT_FAIL);
1608 		goto munmap;
1609 	}
1610 
1611 	/* We need a THP-aligned memory area. */
1612 	mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
1613 	smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
1614 
1615 	ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
1616 	if (ret != 0) {
1617 		ksft_perror("madvise()");
1618 		log_test_result(KSFT_FAIL);
1619 		goto munmap;
1620 	}
1621 	ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
1622 	if (ret != 0) {
1623 		ksft_perror("madvise()");
1624 		log_test_result(KSFT_FAIL);
1625 		goto munmap;
1626 	}
1627 
1628 	/*
1629 	 * Read from the memory to populate the huge shared zeropage. Read from
1630 	 * the first sub-page and test if we get another sub-page populated
1631 	 * automatically.
1632 	 */
1633 	tmp = *mem + *smem;
1634 	asm volatile("" : "+r" (tmp));
1635 	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1636 	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1637 		ksft_test_result_skip("Did not get THPs populated\n");
1638 		goto munmap;
1639 	}
1640 
1641 	fn(mem, smem, pmdsize);
1642 munmap:
1643 	munmap(mmap_mem, mmap_size);
1644 	if (mmap_smem != MAP_FAILED)
1645 		munmap(mmap_smem, mmap_size);
1646 }
1647 
run_with_memfd(non_anon_test_fn fn,const char * desc)1648 static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1649 {
1650 	char *mem, *smem, tmp;
1651 	int fd;
1652 
1653 	log_test_start("%s ... with memfd", desc);
1654 
1655 	fd = memfd_create("test", 0);
1656 	if (fd < 0) {
1657 		ksft_perror("memfd_create() failed");
1658 		log_test_result(KSFT_FAIL);
1659 		return;
1660 	}
1661 
1662 	/* File consists of a single page filled with zeroes. */
1663 	if (fallocate(fd, 0, 0, pagesize)) {
1664 		ksft_perror("fallocate() failed");
1665 		log_test_result(KSFT_FAIL);
1666 		goto close;
1667 	}
1668 
1669 	/* Create a private mapping of the memfd. */
1670 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1671 	if (mem == MAP_FAILED) {
1672 		ksft_perror("mmap() failed");
1673 		log_test_result(KSFT_FAIL);
1674 		goto close;
1675 	}
1676 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1677 	if (smem == MAP_FAILED) {
1678 		ksft_perror("mmap() failed");
1679 		log_test_result(KSFT_FAIL);
1680 		goto munmap;
1681 	}
1682 
1683 	/* Fault the page in. */
1684 	tmp = *mem + *smem;
1685 	asm volatile("" : "+r" (tmp));
1686 
1687 	fn(mem, smem, pagesize);
1688 munmap:
1689 	munmap(mem, pagesize);
1690 	if (smem != MAP_FAILED)
1691 		munmap(smem, pagesize);
1692 close:
1693 	close(fd);
1694 }
1695 
run_with_tmpfile(non_anon_test_fn fn,const char * desc)1696 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1697 {
1698 	char *mem, *smem, tmp;
1699 	FILE *file;
1700 	int fd;
1701 
1702 	log_test_start("%s ... with tmpfile", desc);
1703 
1704 	file = tmpfile();
1705 	if (!file) {
1706 		ksft_perror("tmpfile() failed");
1707 		log_test_result(KSFT_FAIL);
1708 		return;
1709 	}
1710 
1711 	fd = fileno(file);
1712 	if (fd < 0) {
1713 		ksft_perror("fileno() failed");
1714 		log_test_result(KSFT_SKIP);
1715 		return;
1716 	}
1717 
1718 	/* File consists of a single page filled with zeroes. */
1719 	if (fallocate(fd, 0, 0, pagesize)) {
1720 		ksft_perror("fallocate() failed");
1721 		log_test_result(KSFT_FAIL);
1722 		goto close;
1723 	}
1724 
1725 	/* Create a private mapping of the memfd. */
1726 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1727 	if (mem == MAP_FAILED) {
1728 		ksft_perror("mmap() failed");
1729 		log_test_result(KSFT_FAIL);
1730 		goto close;
1731 	}
1732 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1733 	if (smem == MAP_FAILED) {
1734 		ksft_perror("mmap() failed");
1735 		log_test_result(KSFT_FAIL);
1736 		goto munmap;
1737 	}
1738 
1739 	/* Fault the page in. */
1740 	tmp = *mem + *smem;
1741 	asm volatile("" : "+r" (tmp));
1742 
1743 	fn(mem, smem, pagesize);
1744 munmap:
1745 	munmap(mem, pagesize);
1746 	if (smem != MAP_FAILED)
1747 		munmap(smem, pagesize);
1748 close:
1749 	fclose(file);
1750 }
1751 
run_with_memfd_hugetlb(non_anon_test_fn fn,const char * desc,size_t hugetlbsize)1752 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1753 				   size_t hugetlbsize)
1754 {
1755 	int flags = MFD_HUGETLB;
1756 	char *mem, *smem, tmp;
1757 	int fd;
1758 
1759 	log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
1760 		       hugetlbsize / 1024);
1761 
1762 	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1763 
1764 	fd = memfd_create("test", flags);
1765 	if (fd < 0) {
1766 		ksft_perror("memfd_create() failed");
1767 		log_test_result(KSFT_SKIP);
1768 		return;
1769 	}
1770 
1771 	/* File consists of a single page filled with zeroes. */
1772 	if (fallocate(fd, 0, 0, hugetlbsize)) {
1773 		ksft_perror("need more free huge pages");
1774 		log_test_result(KSFT_SKIP);
1775 		goto close;
1776 	}
1777 
1778 	/* Create a private mapping of the memfd. */
1779 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1780 		   0);
1781 	if (mem == MAP_FAILED) {
1782 		ksft_perror("need more free huge pages");
1783 		log_test_result(KSFT_SKIP);
1784 		goto close;
1785 	}
1786 	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1787 	if (smem == MAP_FAILED) {
1788 		ksft_perror("mmap() failed");
1789 		log_test_result(KSFT_FAIL);
1790 		goto munmap;
1791 	}
1792 
1793 	/* Fault the page in. */
1794 	tmp = *mem + *smem;
1795 	asm volatile("" : "+r" (tmp));
1796 
1797 	fn(mem, smem, hugetlbsize);
1798 munmap:
1799 	munmap(mem, hugetlbsize);
1800 	if (smem != MAP_FAILED)
1801 		munmap(smem, hugetlbsize);
1802 close:
1803 	close(fd);
1804 }
1805 
1806 struct non_anon_test_case {
1807 	const char *desc;
1808 	non_anon_test_fn fn;
1809 };
1810 
1811 /*
1812  * Test cases that target any pages in private mappings that are not anonymous:
1813  * pages that may get shared via COW ndependent of fork(). This includes
1814  * the shared zeropage(s), pagecache pages, ...
1815  */
1816 static const struct non_anon_test_case non_anon_test_cases[] = {
1817 	/*
1818 	 * Basic COW test without any GUP. If we miss to break COW, changes are
1819 	 * visible via other private/shared mappings.
1820 	 */
1821 	{
1822 		"Basic COW",
1823 		test_cow,
1824 	},
1825 	/*
1826 	 * Take a R/O longterm pin. When modifying the page via the page table,
1827 	 * the page content change must be visible via the pin.
1828 	 */
1829 	{
1830 		"R/O longterm GUP pin",
1831 		test_ro_pin,
1832 	},
1833 	/* Same as above, but using GUP-fast. */
1834 	{
1835 		"R/O longterm GUP-fast pin",
1836 		test_ro_fast_pin,
1837 	},
1838 };
1839 
run_non_anon_test_case(struct non_anon_test_case const * test_case)1840 static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1841 {
1842 	int i;
1843 
1844 	run_with_zeropage(test_case->fn, test_case->desc);
1845 	run_with_memfd(test_case->fn, test_case->desc);
1846 	run_with_tmpfile(test_case->fn, test_case->desc);
1847 	if (pmdsize)
1848 		run_with_huge_zeropage(test_case->fn, test_case->desc);
1849 	for (i = 0; i < nr_hugetlbsizes; i++)
1850 		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1851 				       hugetlbsizes[i]);
1852 }
1853 
run_non_anon_test_cases(void)1854 static void run_non_anon_test_cases(void)
1855 {
1856 	int i;
1857 
1858 	ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1859 
1860 	for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1861 		run_non_anon_test_case(&non_anon_test_cases[i]);
1862 }
1863 
tests_per_non_anon_test_case(void)1864 static int tests_per_non_anon_test_case(void)
1865 {
1866 	int tests = 3 + nr_hugetlbsizes;
1867 
1868 	if (pmdsize)
1869 		tests += 1;
1870 	return tests;
1871 }
1872 
main(int argc,char ** argv)1873 int main(int argc, char **argv)
1874 {
1875 	struct thp_settings default_settings;
1876 
1877 	ksft_print_header();
1878 
1879 	pagesize = getpagesize();
1880 	pmdsize = read_pmd_pagesize();
1881 	if (pmdsize) {
1882 		/* Only if THP is supported. */
1883 		thp_read_settings(&default_settings);
1884 		default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
1885 		thp_save_settings();
1886 		thp_push_settings(&default_settings);
1887 
1888 		ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
1889 			       pmdsize / 1024);
1890 		nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
1891 	}
1892 	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
1893 						    ARRAY_SIZE(hugetlbsizes));
1894 	detect_huge_zeropage();
1895 
1896 	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1897 		      ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1898 		      ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1899 
1900 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1901 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1902 	if (pagemap_fd < 0)
1903 		ksft_exit_fail_msg("opening pagemap failed\n");
1904 
1905 	run_anon_test_cases();
1906 	run_anon_thp_test_cases();
1907 	run_non_anon_test_cases();
1908 
1909 	if (pmdsize) {
1910 		/* Only if THP is supported. */
1911 		thp_restore_settings();
1912 	}
1913 
1914 	ksft_finished();
1915 }
1916