xref: /linux/tools/testing/selftests/mm/cow.c (revision 81b1e3f91d77564611ab10d2c61774cf6a46ec78)
1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0-only
2baa489faSSeongJae Park /*
3baa489faSSeongJae Park  * COW (Copy On Write) tests.
4baa489faSSeongJae Park  *
5baa489faSSeongJae Park  * Copyright 2022, Red Hat, Inc.
6baa489faSSeongJae Park  *
7baa489faSSeongJae Park  * Author(s): David Hildenbrand <david@redhat.com>
8baa489faSSeongJae Park  */
9baa489faSSeongJae Park #define _GNU_SOURCE
10baa489faSSeongJae Park #include <stdlib.h>
11baa489faSSeongJae Park #include <string.h>
12baa489faSSeongJae Park #include <stdbool.h>
13baa489faSSeongJae Park #include <stdint.h>
14baa489faSSeongJae Park #include <unistd.h>
15baa489faSSeongJae Park #include <errno.h>
16baa489faSSeongJae Park #include <fcntl.h>
17baa489faSSeongJae Park #include <assert.h>
18baa489faSSeongJae Park #include <sys/mman.h>
19baa489faSSeongJae Park #include <sys/ioctl.h>
20baa489faSSeongJae Park #include <sys/wait.h>
21baa489faSSeongJae Park #include <linux/memfd.h>
22baa489faSSeongJae Park 
23baa489faSSeongJae Park #include "local_config.h"
24baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
25baa489faSSeongJae Park #include <liburing.h>
26baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
27baa489faSSeongJae Park 
28baa489faSSeongJae Park #include "../../../../mm/gup_test.h"
29baa489faSSeongJae Park #include "../kselftest.h"
30baa489faSSeongJae Park #include "vm_util.h"
31baa489faSSeongJae Park 
32f4d9139fSDavid Hildenbrand #ifndef MADV_PAGEOUT
33f4d9139fSDavid Hildenbrand #define MADV_PAGEOUT 21
34f4d9139fSDavid Hildenbrand #endif
35baa489faSSeongJae Park #ifndef MADV_COLLAPSE
36baa489faSSeongJae Park #define MADV_COLLAPSE 25
37baa489faSSeongJae Park #endif
38baa489faSSeongJae Park 
39baa489faSSeongJae Park static size_t pagesize;
40baa489faSSeongJae Park static int pagemap_fd;
41baa489faSSeongJae Park static size_t thpsize;
42baa489faSSeongJae Park static int nr_hugetlbsizes;
43baa489faSSeongJae Park static size_t hugetlbsizes[10];
44baa489faSSeongJae Park static int gup_fd;
45baa489faSSeongJae Park static bool has_huge_zeropage;
46baa489faSSeongJae Park 
47baa489faSSeongJae Park static void detect_huge_zeropage(void)
48baa489faSSeongJae Park {
49baa489faSSeongJae Park 	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
50baa489faSSeongJae Park 		      O_RDONLY);
51baa489faSSeongJae Park 	size_t enabled = 0;
52baa489faSSeongJae Park 	char buf[15];
53baa489faSSeongJae Park 	int ret;
54baa489faSSeongJae Park 
55baa489faSSeongJae Park 	if (fd < 0)
56baa489faSSeongJae Park 		return;
57baa489faSSeongJae Park 
58baa489faSSeongJae Park 	ret = pread(fd, buf, sizeof(buf), 0);
59baa489faSSeongJae Park 	if (ret > 0 && ret < sizeof(buf)) {
60baa489faSSeongJae Park 		buf[ret] = 0;
61baa489faSSeongJae Park 
62baa489faSSeongJae Park 		enabled = strtoul(buf, NULL, 10);
63baa489faSSeongJae Park 		if (enabled == 1) {
64baa489faSSeongJae Park 			has_huge_zeropage = true;
65baa489faSSeongJae Park 			ksft_print_msg("[INFO] huge zeropage is enabled\n");
66baa489faSSeongJae Park 		}
67baa489faSSeongJae Park 	}
68baa489faSSeongJae Park 
69baa489faSSeongJae Park 	close(fd);
70baa489faSSeongJae Park }
71baa489faSSeongJae Park 
72baa489faSSeongJae Park static bool range_is_swapped(void *addr, size_t size)
73baa489faSSeongJae Park {
74baa489faSSeongJae Park 	for (; size; addr += pagesize, size -= pagesize)
75baa489faSSeongJae Park 		if (!pagemap_is_swapped(pagemap_fd, addr))
76baa489faSSeongJae Park 			return false;
77baa489faSSeongJae Park 	return true;
78baa489faSSeongJae Park }
79baa489faSSeongJae Park 
80baa489faSSeongJae Park struct comm_pipes {
81baa489faSSeongJae Park 	int child_ready[2];
82baa489faSSeongJae Park 	int parent_ready[2];
83baa489faSSeongJae Park };
84baa489faSSeongJae Park 
85baa489faSSeongJae Park static int setup_comm_pipes(struct comm_pipes *comm_pipes)
86baa489faSSeongJae Park {
87baa489faSSeongJae Park 	if (pipe(comm_pipes->child_ready) < 0)
88baa489faSSeongJae Park 		return -errno;
89baa489faSSeongJae Park 	if (pipe(comm_pipes->parent_ready) < 0) {
90baa489faSSeongJae Park 		close(comm_pipes->child_ready[0]);
91baa489faSSeongJae Park 		close(comm_pipes->child_ready[1]);
92baa489faSSeongJae Park 		return -errno;
93baa489faSSeongJae Park 	}
94baa489faSSeongJae Park 
95baa489faSSeongJae Park 	return 0;
96baa489faSSeongJae Park }
97baa489faSSeongJae Park 
98baa489faSSeongJae Park static void close_comm_pipes(struct comm_pipes *comm_pipes)
99baa489faSSeongJae Park {
100baa489faSSeongJae Park 	close(comm_pipes->child_ready[0]);
101baa489faSSeongJae Park 	close(comm_pipes->child_ready[1]);
102baa489faSSeongJae Park 	close(comm_pipes->parent_ready[0]);
103baa489faSSeongJae Park 	close(comm_pipes->parent_ready[1]);
104baa489faSSeongJae Park }
105baa489faSSeongJae Park 
106baa489faSSeongJae Park static int child_memcmp_fn(char *mem, size_t size,
107baa489faSSeongJae Park 			   struct comm_pipes *comm_pipes)
108baa489faSSeongJae Park {
109baa489faSSeongJae Park 	char *old = malloc(size);
110baa489faSSeongJae Park 	char buf;
111baa489faSSeongJae Park 
112baa489faSSeongJae Park 	/* Backup the original content. */
113baa489faSSeongJae Park 	memcpy(old, mem, size);
114baa489faSSeongJae Park 
115baa489faSSeongJae Park 	/* Wait until the parent modified the page. */
116baa489faSSeongJae Park 	write(comm_pipes->child_ready[1], "0", 1);
117baa489faSSeongJae Park 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
118baa489faSSeongJae Park 		;
119baa489faSSeongJae Park 
120baa489faSSeongJae Park 	/* See if we still read the old values. */
121baa489faSSeongJae Park 	return memcmp(old, mem, size);
122baa489faSSeongJae Park }
123baa489faSSeongJae Park 
124baa489faSSeongJae Park static int child_vmsplice_memcmp_fn(char *mem, size_t size,
125baa489faSSeongJae Park 				    struct comm_pipes *comm_pipes)
126baa489faSSeongJae Park {
127baa489faSSeongJae Park 	struct iovec iov = {
128baa489faSSeongJae Park 		.iov_base = mem,
129baa489faSSeongJae Park 		.iov_len = size,
130baa489faSSeongJae Park 	};
131baa489faSSeongJae Park 	ssize_t cur, total, transferred;
132baa489faSSeongJae Park 	char *old, *new;
133baa489faSSeongJae Park 	int fds[2];
134baa489faSSeongJae Park 	char buf;
135baa489faSSeongJae Park 
136baa489faSSeongJae Park 	old = malloc(size);
137baa489faSSeongJae Park 	new = malloc(size);
138baa489faSSeongJae Park 
139baa489faSSeongJae Park 	/* Backup the original content. */
140baa489faSSeongJae Park 	memcpy(old, mem, size);
141baa489faSSeongJae Park 
142baa489faSSeongJae Park 	if (pipe(fds) < 0)
143baa489faSSeongJae Park 		return -errno;
144baa489faSSeongJae Park 
145baa489faSSeongJae Park 	/* Trigger a read-only pin. */
146baa489faSSeongJae Park 	transferred = vmsplice(fds[1], &iov, 1, 0);
147baa489faSSeongJae Park 	if (transferred < 0)
148baa489faSSeongJae Park 		return -errno;
149baa489faSSeongJae Park 	if (transferred == 0)
150baa489faSSeongJae Park 		return -EINVAL;
151baa489faSSeongJae Park 
152baa489faSSeongJae Park 	/* Unmap it from our page tables. */
153baa489faSSeongJae Park 	if (munmap(mem, size) < 0)
154baa489faSSeongJae Park 		return -errno;
155baa489faSSeongJae Park 
156baa489faSSeongJae Park 	/* Wait until the parent modified it. */
157baa489faSSeongJae Park 	write(comm_pipes->child_ready[1], "0", 1);
158baa489faSSeongJae Park 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
159baa489faSSeongJae Park 		;
160baa489faSSeongJae Park 
161baa489faSSeongJae Park 	/* See if we still read the old values via the pipe. */
162baa489faSSeongJae Park 	for (total = 0; total < transferred; total += cur) {
163baa489faSSeongJae Park 		cur = read(fds[0], new + total, transferred - total);
164baa489faSSeongJae Park 		if (cur < 0)
165baa489faSSeongJae Park 			return -errno;
166baa489faSSeongJae Park 	}
167baa489faSSeongJae Park 
168baa489faSSeongJae Park 	return memcmp(old, new, transferred);
169baa489faSSeongJae Park }
170baa489faSSeongJae Park 
171baa489faSSeongJae Park typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
172baa489faSSeongJae Park 
173baa489faSSeongJae Park static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
174baa489faSSeongJae Park 				  child_fn fn)
175baa489faSSeongJae Park {
176baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
177baa489faSSeongJae Park 	char buf;
178baa489faSSeongJae Park 	int ret;
179baa489faSSeongJae Park 
180baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
181baa489faSSeongJae Park 	if (ret) {
182baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
183baa489faSSeongJae Park 		return;
184baa489faSSeongJae Park 	}
185baa489faSSeongJae Park 
186baa489faSSeongJae Park 	ret = fork();
187baa489faSSeongJae Park 	if (ret < 0) {
188baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
189baa489faSSeongJae Park 		goto close_comm_pipes;
190baa489faSSeongJae Park 	} else if (!ret) {
191baa489faSSeongJae Park 		exit(fn(mem, size, &comm_pipes));
192baa489faSSeongJae Park 	}
193baa489faSSeongJae Park 
194baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
195baa489faSSeongJae Park 		;
196baa489faSSeongJae Park 
197baa489faSSeongJae Park 	if (do_mprotect) {
198baa489faSSeongJae Park 		/*
199baa489faSSeongJae Park 		 * mprotect() optimizations might try avoiding
200baa489faSSeongJae Park 		 * write-faults by directly mapping pages writable.
201baa489faSSeongJae Park 		 */
202baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
203baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
204baa489faSSeongJae Park 		if (ret) {
205baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
206baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
207baa489faSSeongJae Park 			wait(&ret);
208baa489faSSeongJae Park 			goto close_comm_pipes;
209baa489faSSeongJae Park 		}
210baa489faSSeongJae Park 	}
211baa489faSSeongJae Park 
212baa489faSSeongJae Park 	/* Modify the page. */
213baa489faSSeongJae Park 	memset(mem, 0xff, size);
214baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
215baa489faSSeongJae Park 
216baa489faSSeongJae Park 	wait(&ret);
217baa489faSSeongJae Park 	if (WIFEXITED(ret))
218baa489faSSeongJae Park 		ret = WEXITSTATUS(ret);
219baa489faSSeongJae Park 	else
220baa489faSSeongJae Park 		ret = -EINVAL;
221baa489faSSeongJae Park 
222baa489faSSeongJae Park 	ksft_test_result(!ret, "No leak from parent into child\n");
223baa489faSSeongJae Park close_comm_pipes:
224baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
225baa489faSSeongJae Park }
226baa489faSSeongJae Park 
227baa489faSSeongJae Park static void test_cow_in_parent(char *mem, size_t size)
228baa489faSSeongJae Park {
229baa489faSSeongJae Park 	do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
230baa489faSSeongJae Park }
231baa489faSSeongJae Park 
232baa489faSSeongJae Park static void test_cow_in_parent_mprotect(char *mem, size_t size)
233baa489faSSeongJae Park {
234baa489faSSeongJae Park 	do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
235baa489faSSeongJae Park }
236baa489faSSeongJae Park 
237baa489faSSeongJae Park static void test_vmsplice_in_child(char *mem, size_t size)
238baa489faSSeongJae Park {
239baa489faSSeongJae Park 	do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
240baa489faSSeongJae Park }
241baa489faSSeongJae Park 
242baa489faSSeongJae Park static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
243baa489faSSeongJae Park {
244baa489faSSeongJae Park 	do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
245baa489faSSeongJae Park }
246baa489faSSeongJae Park 
247baa489faSSeongJae Park static void do_test_vmsplice_in_parent(char *mem, size_t size,
248baa489faSSeongJae Park 				       bool before_fork)
249baa489faSSeongJae Park {
250baa489faSSeongJae Park 	struct iovec iov = {
251baa489faSSeongJae Park 		.iov_base = mem,
252baa489faSSeongJae Park 		.iov_len = size,
253baa489faSSeongJae Park 	};
254baa489faSSeongJae Park 	ssize_t cur, total, transferred;
255baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
256baa489faSSeongJae Park 	char *old, *new;
257baa489faSSeongJae Park 	int ret, fds[2];
258baa489faSSeongJae Park 	char buf;
259baa489faSSeongJae Park 
260baa489faSSeongJae Park 	old = malloc(size);
261baa489faSSeongJae Park 	new = malloc(size);
262baa489faSSeongJae Park 
263baa489faSSeongJae Park 	memcpy(old, mem, size);
264baa489faSSeongJae Park 
265baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
266baa489faSSeongJae Park 	if (ret) {
267baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
268baa489faSSeongJae Park 		goto free;
269baa489faSSeongJae Park 	}
270baa489faSSeongJae Park 
271baa489faSSeongJae Park 	if (pipe(fds) < 0) {
272baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
273baa489faSSeongJae Park 		goto close_comm_pipes;
274baa489faSSeongJae Park 	}
275baa489faSSeongJae Park 
276baa489faSSeongJae Park 	if (before_fork) {
277baa489faSSeongJae Park 		transferred = vmsplice(fds[1], &iov, 1, 0);
278baa489faSSeongJae Park 		if (transferred <= 0) {
279baa489faSSeongJae Park 			ksft_test_result_fail("vmsplice() failed\n");
280baa489faSSeongJae Park 			goto close_pipe;
281baa489faSSeongJae Park 		}
282baa489faSSeongJae Park 	}
283baa489faSSeongJae Park 
284baa489faSSeongJae Park 	ret = fork();
285baa489faSSeongJae Park 	if (ret < 0) {
286baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
287baa489faSSeongJae Park 		goto close_pipe;
288baa489faSSeongJae Park 	} else if (!ret) {
289baa489faSSeongJae Park 		write(comm_pipes.child_ready[1], "0", 1);
290baa489faSSeongJae Park 		while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
291baa489faSSeongJae Park 			;
292baa489faSSeongJae Park 		/* Modify page content in the child. */
293baa489faSSeongJae Park 		memset(mem, 0xff, size);
294baa489faSSeongJae Park 		exit(0);
295baa489faSSeongJae Park 	}
296baa489faSSeongJae Park 
297baa489faSSeongJae Park 	if (!before_fork) {
298baa489faSSeongJae Park 		transferred = vmsplice(fds[1], &iov, 1, 0);
299baa489faSSeongJae Park 		if (transferred <= 0) {
300baa489faSSeongJae Park 			ksft_test_result_fail("vmsplice() failed\n");
301baa489faSSeongJae Park 			wait(&ret);
302baa489faSSeongJae Park 			goto close_pipe;
303baa489faSSeongJae Park 		}
304baa489faSSeongJae Park 	}
305baa489faSSeongJae Park 
306baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
307baa489faSSeongJae Park 		;
308baa489faSSeongJae Park 	if (munmap(mem, size) < 0) {
309baa489faSSeongJae Park 		ksft_test_result_fail("munmap() failed\n");
310baa489faSSeongJae Park 		goto close_pipe;
311baa489faSSeongJae Park 	}
312baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
313baa489faSSeongJae Park 
314baa489faSSeongJae Park 	/* Wait until the child is done writing. */
315baa489faSSeongJae Park 	wait(&ret);
316baa489faSSeongJae Park 	if (!WIFEXITED(ret)) {
317baa489faSSeongJae Park 		ksft_test_result_fail("wait() failed\n");
318baa489faSSeongJae Park 		goto close_pipe;
319baa489faSSeongJae Park 	}
320baa489faSSeongJae Park 
321baa489faSSeongJae Park 	/* See if we still read the old values. */
322baa489faSSeongJae Park 	for (total = 0; total < transferred; total += cur) {
323baa489faSSeongJae Park 		cur = read(fds[0], new + total, transferred - total);
324baa489faSSeongJae Park 		if (cur < 0) {
325baa489faSSeongJae Park 			ksft_test_result_fail("read() failed\n");
326baa489faSSeongJae Park 			goto close_pipe;
327baa489faSSeongJae Park 		}
328baa489faSSeongJae Park 	}
329baa489faSSeongJae Park 
330baa489faSSeongJae Park 	ksft_test_result(!memcmp(old, new, transferred),
331baa489faSSeongJae Park 			 "No leak from child into parent\n");
332baa489faSSeongJae Park close_pipe:
333baa489faSSeongJae Park 	close(fds[0]);
334baa489faSSeongJae Park 	close(fds[1]);
335baa489faSSeongJae Park close_comm_pipes:
336baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
337baa489faSSeongJae Park free:
338baa489faSSeongJae Park 	free(old);
339baa489faSSeongJae Park 	free(new);
340baa489faSSeongJae Park }
341baa489faSSeongJae Park 
342baa489faSSeongJae Park static void test_vmsplice_before_fork(char *mem, size_t size)
343baa489faSSeongJae Park {
344baa489faSSeongJae Park 	do_test_vmsplice_in_parent(mem, size, true);
345baa489faSSeongJae Park }
346baa489faSSeongJae Park 
347baa489faSSeongJae Park static void test_vmsplice_after_fork(char *mem, size_t size)
348baa489faSSeongJae Park {
349baa489faSSeongJae Park 	do_test_vmsplice_in_parent(mem, size, false);
350baa489faSSeongJae Park }
351baa489faSSeongJae Park 
352baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
353baa489faSSeongJae Park static void do_test_iouring(char *mem, size_t size, bool use_fork)
354baa489faSSeongJae Park {
355baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
356baa489faSSeongJae Park 	struct io_uring_cqe *cqe;
357baa489faSSeongJae Park 	struct io_uring_sqe *sqe;
358baa489faSSeongJae Park 	struct io_uring ring;
359baa489faSSeongJae Park 	ssize_t cur, total;
360baa489faSSeongJae Park 	struct iovec iov;
361baa489faSSeongJae Park 	char *buf, *tmp;
362baa489faSSeongJae Park 	int ret, fd;
363baa489faSSeongJae Park 	FILE *file;
364baa489faSSeongJae Park 
365baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
366baa489faSSeongJae Park 	if (ret) {
367baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
368baa489faSSeongJae Park 		return;
369baa489faSSeongJae Park 	}
370baa489faSSeongJae Park 
371baa489faSSeongJae Park 	file = tmpfile();
372baa489faSSeongJae Park 	if (!file) {
373baa489faSSeongJae Park 		ksft_test_result_fail("tmpfile() failed\n");
374baa489faSSeongJae Park 		goto close_comm_pipes;
375baa489faSSeongJae Park 	}
376baa489faSSeongJae Park 	fd = fileno(file);
377baa489faSSeongJae Park 	assert(fd);
378baa489faSSeongJae Park 
379baa489faSSeongJae Park 	tmp = malloc(size);
380baa489faSSeongJae Park 	if (!tmp) {
381baa489faSSeongJae Park 		ksft_test_result_fail("malloc() failed\n");
382baa489faSSeongJae Park 		goto close_file;
383baa489faSSeongJae Park 	}
384baa489faSSeongJae Park 
385baa489faSSeongJae Park 	/* Skip on errors, as we might just lack kernel support. */
386baa489faSSeongJae Park 	ret = io_uring_queue_init(1, &ring, 0);
387baa489faSSeongJae Park 	if (ret < 0) {
388baa489faSSeongJae Park 		ksft_test_result_skip("io_uring_queue_init() failed\n");
389baa489faSSeongJae Park 		goto free_tmp;
390baa489faSSeongJae Park 	}
391baa489faSSeongJae Park 
392baa489faSSeongJae Park 	/*
393baa489faSSeongJae Park 	 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
394baa489faSSeongJae Park 	 * | FOLL_LONGTERM the range.
395baa489faSSeongJae Park 	 *
396baa489faSSeongJae Park 	 * Skip on errors, as we might just lack kernel support or might not
397baa489faSSeongJae Park 	 * have sufficient MEMLOCK permissions.
398baa489faSSeongJae Park 	 */
399baa489faSSeongJae Park 	iov.iov_base = mem;
400baa489faSSeongJae Park 	iov.iov_len = size;
401baa489faSSeongJae Park 	ret = io_uring_register_buffers(&ring, &iov, 1);
402baa489faSSeongJae Park 	if (ret) {
403baa489faSSeongJae Park 		ksft_test_result_skip("io_uring_register_buffers() failed\n");
404baa489faSSeongJae Park 		goto queue_exit;
405baa489faSSeongJae Park 	}
406baa489faSSeongJae Park 
407baa489faSSeongJae Park 	if (use_fork) {
408baa489faSSeongJae Park 		/*
409baa489faSSeongJae Park 		 * fork() and keep the child alive until we're done. Note that
410baa489faSSeongJae Park 		 * we expect the pinned page to not get shared with the child.
411baa489faSSeongJae Park 		 */
412baa489faSSeongJae Park 		ret = fork();
413baa489faSSeongJae Park 		if (ret < 0) {
414baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
415baa489faSSeongJae Park 			goto unregister_buffers;
416baa489faSSeongJae Park 		} else if (!ret) {
417baa489faSSeongJae Park 			write(comm_pipes.child_ready[1], "0", 1);
418baa489faSSeongJae Park 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
419baa489faSSeongJae Park 				;
420baa489faSSeongJae Park 			exit(0);
421baa489faSSeongJae Park 		}
422baa489faSSeongJae Park 
423baa489faSSeongJae Park 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
424baa489faSSeongJae Park 			;
425baa489faSSeongJae Park 	} else {
426baa489faSSeongJae Park 		/*
427baa489faSSeongJae Park 		 * Map the page R/O into the page table. Enable softdirty
428baa489faSSeongJae Park 		 * tracking to stop the page from getting mapped R/W immediately
429baa489faSSeongJae Park 		 * again by mprotect() optimizations. Note that we don't have an
430baa489faSSeongJae Park 		 * easy way to test if that worked (the pagemap does not export
431baa489faSSeongJae Park 		 * if the page is mapped R/O vs. R/W).
432baa489faSSeongJae Park 		 */
433baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
434baa489faSSeongJae Park 		clear_softdirty();
435baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
436baa489faSSeongJae Park 		if (ret) {
437baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
438baa489faSSeongJae Park 			goto unregister_buffers;
439baa489faSSeongJae Park 		}
440baa489faSSeongJae Park 	}
441baa489faSSeongJae Park 
442baa489faSSeongJae Park 	/*
443baa489faSSeongJae Park 	 * Modify the page and write page content as observed by the fixed
444baa489faSSeongJae Park 	 * buffer pin to the file so we can verify it.
445baa489faSSeongJae Park 	 */
446baa489faSSeongJae Park 	memset(mem, 0xff, size);
447baa489faSSeongJae Park 	sqe = io_uring_get_sqe(&ring);
448baa489faSSeongJae Park 	if (!sqe) {
449baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_get_sqe() failed\n");
450baa489faSSeongJae Park 		goto quit_child;
451baa489faSSeongJae Park 	}
452baa489faSSeongJae Park 	io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
453baa489faSSeongJae Park 
454baa489faSSeongJae Park 	ret = io_uring_submit(&ring);
455baa489faSSeongJae Park 	if (ret < 0) {
456baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_submit() failed\n");
457baa489faSSeongJae Park 		goto quit_child;
458baa489faSSeongJae Park 	}
459baa489faSSeongJae Park 
460baa489faSSeongJae Park 	ret = io_uring_wait_cqe(&ring, &cqe);
461baa489faSSeongJae Park 	if (ret < 0) {
462baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_wait_cqe() failed\n");
463baa489faSSeongJae Park 		goto quit_child;
464baa489faSSeongJae Park 	}
465baa489faSSeongJae Park 
466baa489faSSeongJae Park 	if (cqe->res != size) {
467baa489faSSeongJae Park 		ksft_test_result_fail("write_fixed failed\n");
468baa489faSSeongJae Park 		goto quit_child;
469baa489faSSeongJae Park 	}
470baa489faSSeongJae Park 	io_uring_cqe_seen(&ring, cqe);
471baa489faSSeongJae Park 
472baa489faSSeongJae Park 	/* Read back the file content to the temporary buffer. */
473baa489faSSeongJae Park 	total = 0;
474baa489faSSeongJae Park 	while (total < size) {
475baa489faSSeongJae Park 		cur = pread(fd, tmp + total, size - total, total);
476baa489faSSeongJae Park 		if (cur < 0) {
477baa489faSSeongJae Park 			ksft_test_result_fail("pread() failed\n");
478baa489faSSeongJae Park 			goto quit_child;
479baa489faSSeongJae Park 		}
480baa489faSSeongJae Park 		total += cur;
481baa489faSSeongJae Park 	}
482baa489faSSeongJae Park 
483baa489faSSeongJae Park 	/* Finally, check if we read what we expected. */
484baa489faSSeongJae Park 	ksft_test_result(!memcmp(mem, tmp, size),
485baa489faSSeongJae Park 			 "Longterm R/W pin is reliable\n");
486baa489faSSeongJae Park 
487baa489faSSeongJae Park quit_child:
488baa489faSSeongJae Park 	if (use_fork) {
489baa489faSSeongJae Park 		write(comm_pipes.parent_ready[1], "0", 1);
490baa489faSSeongJae Park 		wait(&ret);
491baa489faSSeongJae Park 	}
492baa489faSSeongJae Park unregister_buffers:
493baa489faSSeongJae Park 	io_uring_unregister_buffers(&ring);
494baa489faSSeongJae Park queue_exit:
495baa489faSSeongJae Park 	io_uring_queue_exit(&ring);
496baa489faSSeongJae Park free_tmp:
497baa489faSSeongJae Park 	free(tmp);
498baa489faSSeongJae Park close_file:
499baa489faSSeongJae Park 	fclose(file);
500baa489faSSeongJae Park close_comm_pipes:
501baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
502baa489faSSeongJae Park }
503baa489faSSeongJae Park 
504baa489faSSeongJae Park static void test_iouring_ro(char *mem, size_t size)
505baa489faSSeongJae Park {
506baa489faSSeongJae Park 	do_test_iouring(mem, size, false);
507baa489faSSeongJae Park }
508baa489faSSeongJae Park 
509baa489faSSeongJae Park static void test_iouring_fork(char *mem, size_t size)
510baa489faSSeongJae Park {
511baa489faSSeongJae Park 	do_test_iouring(mem, size, true);
512baa489faSSeongJae Park }
513baa489faSSeongJae Park 
514baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
515baa489faSSeongJae Park 
516baa489faSSeongJae Park enum ro_pin_test {
517baa489faSSeongJae Park 	RO_PIN_TEST,
518baa489faSSeongJae Park 	RO_PIN_TEST_SHARED,
519baa489faSSeongJae Park 	RO_PIN_TEST_PREVIOUSLY_SHARED,
520baa489faSSeongJae Park 	RO_PIN_TEST_RO_EXCLUSIVE,
521baa489faSSeongJae Park };
522baa489faSSeongJae Park 
523baa489faSSeongJae Park static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
524baa489faSSeongJae Park 			   bool fast)
525baa489faSSeongJae Park {
526baa489faSSeongJae Park 	struct pin_longterm_test args;
527baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
528baa489faSSeongJae Park 	char *tmp, buf;
529baa489faSSeongJae Park 	__u64 tmp_val;
530baa489faSSeongJae Park 	int ret;
531baa489faSSeongJae Park 
532baa489faSSeongJae Park 	if (gup_fd < 0) {
533baa489faSSeongJae Park 		ksft_test_result_skip("gup_test not available\n");
534baa489faSSeongJae Park 		return;
535baa489faSSeongJae Park 	}
536baa489faSSeongJae Park 
537baa489faSSeongJae Park 	tmp = malloc(size);
538baa489faSSeongJae Park 	if (!tmp) {
539baa489faSSeongJae Park 		ksft_test_result_fail("malloc() failed\n");
540baa489faSSeongJae Park 		return;
541baa489faSSeongJae Park 	}
542baa489faSSeongJae Park 
543baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
544baa489faSSeongJae Park 	if (ret) {
545baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
546baa489faSSeongJae Park 		goto free_tmp;
547baa489faSSeongJae Park 	}
548baa489faSSeongJae Park 
549baa489faSSeongJae Park 	switch (test) {
550baa489faSSeongJae Park 	case RO_PIN_TEST:
551baa489faSSeongJae Park 		break;
552baa489faSSeongJae Park 	case RO_PIN_TEST_SHARED:
553baa489faSSeongJae Park 	case RO_PIN_TEST_PREVIOUSLY_SHARED:
554baa489faSSeongJae Park 		/*
555baa489faSSeongJae Park 		 * Share the pages with our child. As the pages are not pinned,
556baa489faSSeongJae Park 		 * this should just work.
557baa489faSSeongJae Park 		 */
558baa489faSSeongJae Park 		ret = fork();
559baa489faSSeongJae Park 		if (ret < 0) {
560baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
561baa489faSSeongJae Park 			goto close_comm_pipes;
562baa489faSSeongJae Park 		} else if (!ret) {
563baa489faSSeongJae Park 			write(comm_pipes.child_ready[1], "0", 1);
564baa489faSSeongJae Park 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
565baa489faSSeongJae Park 				;
566baa489faSSeongJae Park 			exit(0);
567baa489faSSeongJae Park 		}
568baa489faSSeongJae Park 
569baa489faSSeongJae Park 		/* Wait until our child is ready. */
570baa489faSSeongJae Park 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
571baa489faSSeongJae Park 			;
572baa489faSSeongJae Park 
573baa489faSSeongJae Park 		if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
574baa489faSSeongJae Park 			/*
575baa489faSSeongJae Park 			 * Tell the child to quit now and wait until it quit.
576baa489faSSeongJae Park 			 * The pages should now be mapped R/O into our page
577baa489faSSeongJae Park 			 * tables, but they are no longer shared.
578baa489faSSeongJae Park 			 */
579baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
580baa489faSSeongJae Park 			wait(&ret);
581baa489faSSeongJae Park 			if (!WIFEXITED(ret))
582baa489faSSeongJae Park 				ksft_print_msg("[INFO] wait() failed\n");
583baa489faSSeongJae Park 		}
584baa489faSSeongJae Park 		break;
585baa489faSSeongJae Park 	case RO_PIN_TEST_RO_EXCLUSIVE:
586baa489faSSeongJae Park 		/*
587baa489faSSeongJae Park 		 * Map the page R/O into the page table. Enable softdirty
588baa489faSSeongJae Park 		 * tracking to stop the page from getting mapped R/W immediately
589baa489faSSeongJae Park 		 * again by mprotect() optimizations. Note that we don't have an
590baa489faSSeongJae Park 		 * easy way to test if that worked (the pagemap does not export
591baa489faSSeongJae Park 		 * if the page is mapped R/O vs. R/W).
592baa489faSSeongJae Park 		 */
593baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
594baa489faSSeongJae Park 		clear_softdirty();
595baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
596baa489faSSeongJae Park 		if (ret) {
597baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
598baa489faSSeongJae Park 			goto close_comm_pipes;
599baa489faSSeongJae Park 		}
600baa489faSSeongJae Park 		break;
601baa489faSSeongJae Park 	default:
602baa489faSSeongJae Park 		assert(false);
603baa489faSSeongJae Park 	}
604baa489faSSeongJae Park 
605baa489faSSeongJae Park 	/* Take a R/O pin. This should trigger unsharing. */
606baa489faSSeongJae Park 	args.addr = (__u64)(uintptr_t)mem;
607baa489faSSeongJae Park 	args.size = size;
608baa489faSSeongJae Park 	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
609baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
610baa489faSSeongJae Park 	if (ret) {
611baa489faSSeongJae Park 		if (errno == EINVAL)
612baa489faSSeongJae Park 			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
613baa489faSSeongJae Park 		else
614baa489faSSeongJae Park 			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
615baa489faSSeongJae Park 		goto wait;
616baa489faSSeongJae Park 	}
617baa489faSSeongJae Park 
618baa489faSSeongJae Park 	/* Modify the page. */
619baa489faSSeongJae Park 	memset(mem, 0xff, size);
620baa489faSSeongJae Park 
621baa489faSSeongJae Park 	/*
622baa489faSSeongJae Park 	 * Read back the content via the pin to the temporary buffer and
623baa489faSSeongJae Park 	 * test if we observed the modification.
624baa489faSSeongJae Park 	 */
625baa489faSSeongJae Park 	tmp_val = (__u64)(uintptr_t)tmp;
626baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
627baa489faSSeongJae Park 	if (ret)
628baa489faSSeongJae Park 		ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
629baa489faSSeongJae Park 	else
630baa489faSSeongJae Park 		ksft_test_result(!memcmp(mem, tmp, size),
631baa489faSSeongJae Park 				 "Longterm R/O pin is reliable\n");
632baa489faSSeongJae Park 
633baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
634baa489faSSeongJae Park 	if (ret)
635baa489faSSeongJae Park 		ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
636baa489faSSeongJae Park wait:
637baa489faSSeongJae Park 	switch (test) {
638baa489faSSeongJae Park 	case RO_PIN_TEST_SHARED:
639baa489faSSeongJae Park 		write(comm_pipes.parent_ready[1], "0", 1);
640baa489faSSeongJae Park 		wait(&ret);
641baa489faSSeongJae Park 		if (!WIFEXITED(ret))
642baa489faSSeongJae Park 			ksft_print_msg("[INFO] wait() failed\n");
643baa489faSSeongJae Park 		break;
644baa489faSSeongJae Park 	default:
645baa489faSSeongJae Park 		break;
646baa489faSSeongJae Park 	}
647baa489faSSeongJae Park close_comm_pipes:
648baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
649baa489faSSeongJae Park free_tmp:
650baa489faSSeongJae Park 	free(tmp);
651baa489faSSeongJae Park }
652baa489faSSeongJae Park 
653baa489faSSeongJae Park static void test_ro_pin_on_shared(char *mem, size_t size)
654baa489faSSeongJae Park {
655baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
656baa489faSSeongJae Park }
657baa489faSSeongJae Park 
658baa489faSSeongJae Park static void test_ro_fast_pin_on_shared(char *mem, size_t size)
659baa489faSSeongJae Park {
660baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
661baa489faSSeongJae Park }
662baa489faSSeongJae Park 
663baa489faSSeongJae Park static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
664baa489faSSeongJae Park {
665baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
666baa489faSSeongJae Park }
667baa489faSSeongJae Park 
668baa489faSSeongJae Park static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
669baa489faSSeongJae Park {
670baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
671baa489faSSeongJae Park }
672baa489faSSeongJae Park 
673baa489faSSeongJae Park static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
674baa489faSSeongJae Park {
675baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
676baa489faSSeongJae Park }
677baa489faSSeongJae Park 
678baa489faSSeongJae Park static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
679baa489faSSeongJae Park {
680baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
681baa489faSSeongJae Park }
682baa489faSSeongJae Park 
683baa489faSSeongJae Park typedef void (*test_fn)(char *mem, size_t size);
684baa489faSSeongJae Park 
685baa489faSSeongJae Park static void do_run_with_base_page(test_fn fn, bool swapout)
686baa489faSSeongJae Park {
687baa489faSSeongJae Park 	char *mem;
688baa489faSSeongJae Park 	int ret;
689baa489faSSeongJae Park 
690baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
691baa489faSSeongJae Park 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
692baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
693baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
694baa489faSSeongJae Park 		return;
695baa489faSSeongJae Park 	}
696baa489faSSeongJae Park 
697baa489faSSeongJae Park 	ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
698baa489faSSeongJae Park 	/* Ignore if not around on a kernel. */
699baa489faSSeongJae Park 	if (ret && errno != EINVAL) {
700baa489faSSeongJae Park 		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
701baa489faSSeongJae Park 		goto munmap;
702baa489faSSeongJae Park 	}
703baa489faSSeongJae Park 
704baa489faSSeongJae Park 	/* Populate a base page. */
705baa489faSSeongJae Park 	memset(mem, 0, pagesize);
706baa489faSSeongJae Park 
707baa489faSSeongJae Park 	if (swapout) {
708baa489faSSeongJae Park 		madvise(mem, pagesize, MADV_PAGEOUT);
709baa489faSSeongJae Park 		if (!pagemap_is_swapped(pagemap_fd, mem)) {
710baa489faSSeongJae Park 			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
711baa489faSSeongJae Park 			goto munmap;
712baa489faSSeongJae Park 		}
713baa489faSSeongJae Park 	}
714baa489faSSeongJae Park 
715baa489faSSeongJae Park 	fn(mem, pagesize);
716baa489faSSeongJae Park munmap:
717baa489faSSeongJae Park 	munmap(mem, pagesize);
718baa489faSSeongJae Park }
719baa489faSSeongJae Park 
720baa489faSSeongJae Park static void run_with_base_page(test_fn fn, const char *desc)
721baa489faSSeongJae Park {
722baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with base page\n", desc);
723baa489faSSeongJae Park 	do_run_with_base_page(fn, false);
724baa489faSSeongJae Park }
725baa489faSSeongJae Park 
726baa489faSSeongJae Park static void run_with_base_page_swap(test_fn fn, const char *desc)
727baa489faSSeongJae Park {
728baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
729baa489faSSeongJae Park 	do_run_with_base_page(fn, true);
730baa489faSSeongJae Park }
731baa489faSSeongJae Park 
732baa489faSSeongJae Park enum thp_run {
733baa489faSSeongJae Park 	THP_RUN_PMD,
734baa489faSSeongJae Park 	THP_RUN_PMD_SWAPOUT,
735baa489faSSeongJae Park 	THP_RUN_PTE,
736baa489faSSeongJae Park 	THP_RUN_PTE_SWAPOUT,
737baa489faSSeongJae Park 	THP_RUN_SINGLE_PTE,
738baa489faSSeongJae Park 	THP_RUN_SINGLE_PTE_SWAPOUT,
739baa489faSSeongJae Park 	THP_RUN_PARTIAL_MREMAP,
740baa489faSSeongJae Park 	THP_RUN_PARTIAL_SHARED,
741baa489faSSeongJae Park };
742baa489faSSeongJae Park 
743baa489faSSeongJae Park static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
744baa489faSSeongJae Park {
745baa489faSSeongJae Park 	char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
746baa489faSSeongJae Park 	size_t size, mmap_size, mremap_size;
747baa489faSSeongJae Park 	int ret;
748baa489faSSeongJae Park 
749baa489faSSeongJae Park 	/* For alignment purposes, we need twice the thp size. */
750baa489faSSeongJae Park 	mmap_size = 2 * thpsize;
751baa489faSSeongJae Park 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
752baa489faSSeongJae Park 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
753baa489faSSeongJae Park 	if (mmap_mem == MAP_FAILED) {
754baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
755baa489faSSeongJae Park 		return;
756baa489faSSeongJae Park 	}
757baa489faSSeongJae Park 
758baa489faSSeongJae Park 	/* We need a THP-aligned memory area. */
759baa489faSSeongJae Park 	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
760baa489faSSeongJae Park 
761baa489faSSeongJae Park 	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
762baa489faSSeongJae Park 	if (ret) {
763baa489faSSeongJae Park 		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
764baa489faSSeongJae Park 		goto munmap;
765baa489faSSeongJae Park 	}
766baa489faSSeongJae Park 
767baa489faSSeongJae Park 	/*
768baa489faSSeongJae Park 	 * Try to populate a THP. Touch the first sub-page and test if we get
769baa489faSSeongJae Park 	 * another sub-page populated automatically.
770baa489faSSeongJae Park 	 */
771baa489faSSeongJae Park 	mem[0] = 0;
772baa489faSSeongJae Park 	if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) {
773baa489faSSeongJae Park 		ksft_test_result_skip("Did not get a THP populated\n");
774baa489faSSeongJae Park 		goto munmap;
775baa489faSSeongJae Park 	}
776baa489faSSeongJae Park 	memset(mem, 0, thpsize);
777baa489faSSeongJae Park 
778baa489faSSeongJae Park 	size = thpsize;
779baa489faSSeongJae Park 	switch (thp_run) {
780baa489faSSeongJae Park 	case THP_RUN_PMD:
781baa489faSSeongJae Park 	case THP_RUN_PMD_SWAPOUT:
782baa489faSSeongJae Park 		break;
783baa489faSSeongJae Park 	case THP_RUN_PTE:
784baa489faSSeongJae Park 	case THP_RUN_PTE_SWAPOUT:
785baa489faSSeongJae Park 		/*
786baa489faSSeongJae Park 		 * Trigger PTE-mapping the THP by temporarily mapping a single
787baa489faSSeongJae Park 		 * subpage R/O.
788baa489faSSeongJae Park 		 */
789baa489faSSeongJae Park 		ret = mprotect(mem + pagesize, pagesize, PROT_READ);
790baa489faSSeongJae Park 		if (ret) {
791baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
792baa489faSSeongJae Park 			goto munmap;
793baa489faSSeongJae Park 		}
794baa489faSSeongJae Park 		ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
795baa489faSSeongJae Park 		if (ret) {
796baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
797baa489faSSeongJae Park 			goto munmap;
798baa489faSSeongJae Park 		}
799baa489faSSeongJae Park 		break;
800baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE:
801baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE_SWAPOUT:
802baa489faSSeongJae Park 		/*
803baa489faSSeongJae Park 		 * Discard all but a single subpage of that PTE-mapped THP. What
804baa489faSSeongJae Park 		 * remains is a single PTE mapping a single subpage.
805baa489faSSeongJae Park 		 */
806baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
807baa489faSSeongJae Park 		if (ret) {
808baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTNEED failed\n");
809baa489faSSeongJae Park 			goto munmap;
810baa489faSSeongJae Park 		}
811baa489faSSeongJae Park 		size = pagesize;
812baa489faSSeongJae Park 		break;
813baa489faSSeongJae Park 	case THP_RUN_PARTIAL_MREMAP:
814baa489faSSeongJae Park 		/*
815baa489faSSeongJae Park 		 * Remap half of the THP. We need some new memory location
816baa489faSSeongJae Park 		 * for that.
817baa489faSSeongJae Park 		 */
818baa489faSSeongJae Park 		mremap_size = thpsize / 2;
819baa489faSSeongJae Park 		mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
820baa489faSSeongJae Park 				  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
821baa489faSSeongJae Park 		if (mem == MAP_FAILED) {
822baa489faSSeongJae Park 			ksft_test_result_fail("mmap() failed\n");
823baa489faSSeongJae Park 			goto munmap;
824baa489faSSeongJae Park 		}
825baa489faSSeongJae Park 		tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
826baa489faSSeongJae Park 			     MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
827baa489faSSeongJae Park 		if (tmp != mremap_mem) {
828baa489faSSeongJae Park 			ksft_test_result_fail("mremap() failed\n");
829baa489faSSeongJae Park 			goto munmap;
830baa489faSSeongJae Park 		}
831baa489faSSeongJae Park 		size = mremap_size;
832baa489faSSeongJae Park 		break;
833baa489faSSeongJae Park 	case THP_RUN_PARTIAL_SHARED:
834baa489faSSeongJae Park 		/*
835baa489faSSeongJae Park 		 * Share the first page of the THP with a child and quit the
836baa489faSSeongJae Park 		 * child. This will result in some parts of the THP never
837baa489faSSeongJae Park 		 * have been shared.
838baa489faSSeongJae Park 		 */
839baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
840baa489faSSeongJae Park 		if (ret) {
841baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
842baa489faSSeongJae Park 			goto munmap;
843baa489faSSeongJae Park 		}
844baa489faSSeongJae Park 		ret = fork();
845baa489faSSeongJae Park 		if (ret < 0) {
846baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
847baa489faSSeongJae Park 			goto munmap;
848baa489faSSeongJae Park 		} else if (!ret) {
849baa489faSSeongJae Park 			exit(0);
850baa489faSSeongJae Park 		}
851baa489faSSeongJae Park 		wait(&ret);
852baa489faSSeongJae Park 		/* Allow for sharing all pages again. */
853baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
854baa489faSSeongJae Park 		if (ret) {
855baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DOFORK failed\n");
856baa489faSSeongJae Park 			goto munmap;
857baa489faSSeongJae Park 		}
858baa489faSSeongJae Park 		break;
859baa489faSSeongJae Park 	default:
860baa489faSSeongJae Park 		assert(false);
861baa489faSSeongJae Park 	}
862baa489faSSeongJae Park 
863baa489faSSeongJae Park 	switch (thp_run) {
864baa489faSSeongJae Park 	case THP_RUN_PMD_SWAPOUT:
865baa489faSSeongJae Park 	case THP_RUN_PTE_SWAPOUT:
866baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE_SWAPOUT:
867baa489faSSeongJae Park 		madvise(mem, size, MADV_PAGEOUT);
868baa489faSSeongJae Park 		if (!range_is_swapped(mem, size)) {
869baa489faSSeongJae Park 			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
870baa489faSSeongJae Park 			goto munmap;
871baa489faSSeongJae Park 		}
872baa489faSSeongJae Park 		break;
873baa489faSSeongJae Park 	default:
874baa489faSSeongJae Park 		break;
875baa489faSSeongJae Park 	}
876baa489faSSeongJae Park 
877baa489faSSeongJae Park 	fn(mem, size);
878baa489faSSeongJae Park munmap:
879baa489faSSeongJae Park 	munmap(mmap_mem, mmap_size);
880baa489faSSeongJae Park 	if (mremap_mem != MAP_FAILED)
881baa489faSSeongJae Park 		munmap(mremap_mem, mremap_size);
882baa489faSSeongJae Park }
883baa489faSSeongJae Park 
884baa489faSSeongJae Park static void run_with_thp(test_fn fn, const char *desc)
885baa489faSSeongJae Park {
886baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with THP\n", desc);
887baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PMD);
888baa489faSSeongJae Park }
889baa489faSSeongJae Park 
890baa489faSSeongJae Park static void run_with_thp_swap(test_fn fn, const char *desc)
891baa489faSSeongJae Park {
892baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc);
893baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT);
894baa489faSSeongJae Park }
895baa489faSSeongJae Park 
896baa489faSSeongJae Park static void run_with_pte_mapped_thp(test_fn fn, const char *desc)
897baa489faSSeongJae Park {
898baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc);
899baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PTE);
900baa489faSSeongJae Park }
901baa489faSSeongJae Park 
902baa489faSSeongJae Park static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc)
903baa489faSSeongJae Park {
904baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc);
905baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT);
906baa489faSSeongJae Park }
907baa489faSSeongJae Park 
908baa489faSSeongJae Park static void run_with_single_pte_of_thp(test_fn fn, const char *desc)
909baa489faSSeongJae Park {
910baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc);
911baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE);
912baa489faSSeongJae Park }
913baa489faSSeongJae Park 
914baa489faSSeongJae Park static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc)
915baa489faSSeongJae Park {
916baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc);
917baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT);
918baa489faSSeongJae Park }
919baa489faSSeongJae Park 
920baa489faSSeongJae Park static void run_with_partial_mremap_thp(test_fn fn, const char *desc)
921baa489faSSeongJae Park {
922baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc);
923baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP);
924baa489faSSeongJae Park }
925baa489faSSeongJae Park 
926baa489faSSeongJae Park static void run_with_partial_shared_thp(test_fn fn, const char *desc)
927baa489faSSeongJae Park {
928baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc);
929baa489faSSeongJae Park 	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
930baa489faSSeongJae Park }
931baa489faSSeongJae Park 
932baa489faSSeongJae Park static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
933baa489faSSeongJae Park {
934baa489faSSeongJae Park 	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
935baa489faSSeongJae Park 	char *mem, *dummy;
936baa489faSSeongJae Park 
937baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
938baa489faSSeongJae Park 		       hugetlbsize / 1024);
939baa489faSSeongJae Park 
940baa489faSSeongJae Park 	flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
941baa489faSSeongJae Park 
942baa489faSSeongJae Park 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
943baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
944baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
945baa489faSSeongJae Park 		return;
946baa489faSSeongJae Park 	}
947baa489faSSeongJae Park 
948baa489faSSeongJae Park 	/* Populate an huge page. */
949baa489faSSeongJae Park 	memset(mem, 0, hugetlbsize);
950baa489faSSeongJae Park 
951baa489faSSeongJae Park 	/*
952baa489faSSeongJae Park 	 * We need a total of two hugetlb pages to handle COW/unsharing
953baa489faSSeongJae Park 	 * properly, otherwise we might get zapped by a SIGBUS.
954baa489faSSeongJae Park 	 */
955baa489faSSeongJae Park 	dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
956baa489faSSeongJae Park 	if (dummy == MAP_FAILED) {
957baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
958baa489faSSeongJae Park 		goto munmap;
959baa489faSSeongJae Park 	}
960baa489faSSeongJae Park 	munmap(dummy, hugetlbsize);
961baa489faSSeongJae Park 
962baa489faSSeongJae Park 	fn(mem, hugetlbsize);
963baa489faSSeongJae Park munmap:
964baa489faSSeongJae Park 	munmap(mem, hugetlbsize);
965baa489faSSeongJae Park }
966baa489faSSeongJae Park 
967baa489faSSeongJae Park struct test_case {
968baa489faSSeongJae Park 	const char *desc;
969baa489faSSeongJae Park 	test_fn fn;
970baa489faSSeongJae Park };
971baa489faSSeongJae Park 
972baa489faSSeongJae Park /*
973baa489faSSeongJae Park  * Test cases that are specific to anonymous pages: pages in private mappings
974baa489faSSeongJae Park  * that may get shared via COW during fork().
975baa489faSSeongJae Park  */
976baa489faSSeongJae Park static const struct test_case anon_test_cases[] = {
977baa489faSSeongJae Park 	/*
978baa489faSSeongJae Park 	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
979baa489faSSeongJae Park 	 * either the child can observe modifications by the parent or the
980baa489faSSeongJae Park 	 * other way around.
981baa489faSSeongJae Park 	 */
982baa489faSSeongJae Park 	{
983baa489faSSeongJae Park 		"Basic COW after fork()",
984baa489faSSeongJae Park 		test_cow_in_parent,
985baa489faSSeongJae Park 	},
986baa489faSSeongJae Park 	/*
987baa489faSSeongJae Park 	 * Basic test, but do an additional mprotect(PROT_READ)+
988baa489faSSeongJae Park 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
989baa489faSSeongJae Park 	 */
990baa489faSSeongJae Park 	{
991baa489faSSeongJae Park 		"Basic COW after fork() with mprotect() optimization",
992baa489faSSeongJae Park 		test_cow_in_parent_mprotect,
993baa489faSSeongJae Park 	},
994baa489faSSeongJae Park 	/*
995baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
996baa489faSSeongJae Park 	 * we miss to break COW, the child observes modifications by the parent.
997baa489faSSeongJae Park 	 * This is CVE-2020-29374 reported by Jann Horn.
998baa489faSSeongJae Park 	 */
999baa489faSSeongJae Park 	{
1000baa489faSSeongJae Park 		"vmsplice() + unmap in child",
1001baa489faSSeongJae Park 		test_vmsplice_in_child
1002baa489faSSeongJae Park 	},
1003baa489faSSeongJae Park 	/*
1004baa489faSSeongJae Park 	 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1005baa489faSSeongJae Park 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1006baa489faSSeongJae Park 	 */
1007baa489faSSeongJae Park 	{
1008baa489faSSeongJae Park 		"vmsplice() + unmap in child with mprotect() optimization",
1009baa489faSSeongJae Park 		test_vmsplice_in_child_mprotect
1010baa489faSSeongJae Park 	},
1011baa489faSSeongJae Park 	/*
1012baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1013baa489faSSeongJae Park 	 * fork(); modify in the child. If we miss to break COW, the parent
1014baa489faSSeongJae Park 	 * observes modifications by the child.
1015baa489faSSeongJae Park 	 */
1016baa489faSSeongJae Park 	{
1017baa489faSSeongJae Park 		"vmsplice() before fork(), unmap in parent after fork()",
1018baa489faSSeongJae Park 		test_vmsplice_before_fork,
1019baa489faSSeongJae Park 	},
1020baa489faSSeongJae Park 	/*
1021baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1022baa489faSSeongJae Park 	 * child. If we miss to break COW, the parent observes modifications by
1023baa489faSSeongJae Park 	 * the child.
1024baa489faSSeongJae Park 	 */
1025baa489faSSeongJae Park 	{
1026baa489faSSeongJae Park 		"vmsplice() + unmap in parent after fork()",
1027baa489faSSeongJae Park 		test_vmsplice_after_fork,
1028baa489faSSeongJae Park 	},
1029baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
1030baa489faSSeongJae Park 	/*
1031baa489faSSeongJae Park 	 * Take a R/W longterm pin and then map the page R/O into the page
1032baa489faSSeongJae Park 	 * table to trigger a write fault on next access. When modifying the
1033baa489faSSeongJae Park 	 * page, the page content must be visible via the pin.
1034baa489faSSeongJae Park 	 */
1035baa489faSSeongJae Park 	{
1036baa489faSSeongJae Park 		"R/O-mapping a page registered as iouring fixed buffer",
1037baa489faSSeongJae Park 		test_iouring_ro,
1038baa489faSSeongJae Park 	},
1039baa489faSSeongJae Park 	/*
1040baa489faSSeongJae Park 	 * Take a R/W longterm pin and then fork() a child. When modifying the
1041baa489faSSeongJae Park 	 * page, the page content must be visible via the pin. We expect the
1042baa489faSSeongJae Park 	 * pinned page to not get shared with the child.
1043baa489faSSeongJae Park 	 */
1044baa489faSSeongJae Park 	{
1045baa489faSSeongJae Park 		"fork() with an iouring fixed buffer",
1046baa489faSSeongJae Park 		test_iouring_fork,
1047baa489faSSeongJae Park 	},
1048baa489faSSeongJae Park 
1049baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1050baa489faSSeongJae Park 	/*
1051baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1052baa489faSSeongJae Park 	 * When modifying the page via the page table, the page content change
1053baa489faSSeongJae Park 	 * must be visible via the pin.
1054baa489faSSeongJae Park 	 */
1055baa489faSSeongJae Park 	{
1056baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped shared page",
1057baa489faSSeongJae Park 		test_ro_pin_on_shared,
1058baa489faSSeongJae Park 	},
1059baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1060baa489faSSeongJae Park 	{
1061baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped shared page",
1062baa489faSSeongJae Park 		test_ro_fast_pin_on_shared,
1063baa489faSSeongJae Park 	},
1064baa489faSSeongJae Park 	/*
1065baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1066baa489faSSeongJae Park 	 * was previously shared. When modifying the page via the page table,
1067baa489faSSeongJae Park 	 * the page content change must be visible via the pin.
1068baa489faSSeongJae Park 	 */
1069baa489faSSeongJae Park 	{
1070baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped previously-shared page",
1071baa489faSSeongJae Park 		test_ro_pin_on_ro_previously_shared,
1072baa489faSSeongJae Park 	},
1073baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1074baa489faSSeongJae Park 	{
1075baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped previously-shared page",
1076baa489faSSeongJae Park 		test_ro_fast_pin_on_ro_previously_shared,
1077baa489faSSeongJae Park 	},
1078baa489faSSeongJae Park 	/*
1079baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1080baa489faSSeongJae Park 	 * When modifying the page via the page table, the page content change
1081baa489faSSeongJae Park 	 * must be visible via the pin.
1082baa489faSSeongJae Park 	 */
1083baa489faSSeongJae Park 	{
1084baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped exclusive page",
1085baa489faSSeongJae Park 		test_ro_pin_on_ro_exclusive,
1086baa489faSSeongJae Park 	},
1087baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1088baa489faSSeongJae Park 	{
1089baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped exclusive page",
1090baa489faSSeongJae Park 		test_ro_fast_pin_on_ro_exclusive,
1091baa489faSSeongJae Park 	},
1092baa489faSSeongJae Park };
1093baa489faSSeongJae Park 
1094baa489faSSeongJae Park static void run_anon_test_case(struct test_case const *test_case)
1095baa489faSSeongJae Park {
1096baa489faSSeongJae Park 	int i;
1097baa489faSSeongJae Park 
1098baa489faSSeongJae Park 	run_with_base_page(test_case->fn, test_case->desc);
1099baa489faSSeongJae Park 	run_with_base_page_swap(test_case->fn, test_case->desc);
1100baa489faSSeongJae Park 	if (thpsize) {
1101baa489faSSeongJae Park 		run_with_thp(test_case->fn, test_case->desc);
1102baa489faSSeongJae Park 		run_with_thp_swap(test_case->fn, test_case->desc);
1103baa489faSSeongJae Park 		run_with_pte_mapped_thp(test_case->fn, test_case->desc);
1104baa489faSSeongJae Park 		run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc);
1105baa489faSSeongJae Park 		run_with_single_pte_of_thp(test_case->fn, test_case->desc);
1106baa489faSSeongJae Park 		run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc);
1107baa489faSSeongJae Park 		run_with_partial_mremap_thp(test_case->fn, test_case->desc);
1108baa489faSSeongJae Park 		run_with_partial_shared_thp(test_case->fn, test_case->desc);
1109baa489faSSeongJae Park 	}
1110baa489faSSeongJae Park 	for (i = 0; i < nr_hugetlbsizes; i++)
1111baa489faSSeongJae Park 		run_with_hugetlb(test_case->fn, test_case->desc,
1112baa489faSSeongJae Park 				 hugetlbsizes[i]);
1113baa489faSSeongJae Park }
1114baa489faSSeongJae Park 
1115baa489faSSeongJae Park static void run_anon_test_cases(void)
1116baa489faSSeongJae Park {
1117baa489faSSeongJae Park 	int i;
1118baa489faSSeongJae Park 
1119baa489faSSeongJae Park 	ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1120baa489faSSeongJae Park 
1121baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1122baa489faSSeongJae Park 		run_anon_test_case(&anon_test_cases[i]);
1123baa489faSSeongJae Park }
1124baa489faSSeongJae Park 
1125baa489faSSeongJae Park static int tests_per_anon_test_case(void)
1126baa489faSSeongJae Park {
1127baa489faSSeongJae Park 	int tests = 2 + nr_hugetlbsizes;
1128baa489faSSeongJae Park 
1129baa489faSSeongJae Park 	if (thpsize)
1130baa489faSSeongJae Park 		tests += 8;
1131baa489faSSeongJae Park 	return tests;
1132baa489faSSeongJae Park }
1133baa489faSSeongJae Park 
1134baa489faSSeongJae Park enum anon_thp_collapse_test {
1135baa489faSSeongJae Park 	ANON_THP_COLLAPSE_UNSHARED,
1136baa489faSSeongJae Park 	ANON_THP_COLLAPSE_FULLY_SHARED,
1137baa489faSSeongJae Park 	ANON_THP_COLLAPSE_LOWER_SHARED,
1138baa489faSSeongJae Park 	ANON_THP_COLLAPSE_UPPER_SHARED,
1139baa489faSSeongJae Park };
1140baa489faSSeongJae Park 
1141baa489faSSeongJae Park static void do_test_anon_thp_collapse(char *mem, size_t size,
1142baa489faSSeongJae Park 				      enum anon_thp_collapse_test test)
1143baa489faSSeongJae Park {
1144baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
1145baa489faSSeongJae Park 	char buf;
1146baa489faSSeongJae Park 	int ret;
1147baa489faSSeongJae Park 
1148baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
1149baa489faSSeongJae Park 	if (ret) {
1150baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
1151baa489faSSeongJae Park 		return;
1152baa489faSSeongJae Park 	}
1153baa489faSSeongJae Park 
1154baa489faSSeongJae Park 	/*
1155baa489faSSeongJae Park 	 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1156baa489faSSeongJae Park 	 * R/O, such that we can try collapsing it later.
1157baa489faSSeongJae Park 	 */
1158baa489faSSeongJae Park 	ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1159baa489faSSeongJae Park 	if (ret) {
1160baa489faSSeongJae Park 		ksft_test_result_fail("mprotect() failed\n");
1161baa489faSSeongJae Park 		goto close_comm_pipes;
1162baa489faSSeongJae Park 	}
1163baa489faSSeongJae Park 	ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1164baa489faSSeongJae Park 	if (ret) {
1165baa489faSSeongJae Park 		ksft_test_result_fail("mprotect() failed\n");
1166baa489faSSeongJae Park 		goto close_comm_pipes;
1167baa489faSSeongJae Park 	}
1168baa489faSSeongJae Park 
1169baa489faSSeongJae Park 	switch (test) {
1170baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UNSHARED:
1171baa489faSSeongJae Park 		/* Collapse before actually COW-sharing the page. */
1172baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_COLLAPSE);
1173baa489faSSeongJae Park 		if (ret) {
1174baa489faSSeongJae Park 			ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1175baa489faSSeongJae Park 					      strerror(errno));
1176baa489faSSeongJae Park 			goto close_comm_pipes;
1177baa489faSSeongJae Park 		}
1178baa489faSSeongJae Park 		break;
1179baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1180baa489faSSeongJae Park 		/* COW-share the full PTE-mapped THP. */
1181baa489faSSeongJae Park 		break;
1182baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1183baa489faSSeongJae Park 		/* Don't COW-share the upper part of the THP. */
1184baa489faSSeongJae Park 		ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1185baa489faSSeongJae Park 		if (ret) {
1186baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
1187baa489faSSeongJae Park 			goto close_comm_pipes;
1188baa489faSSeongJae Park 		}
1189baa489faSSeongJae Park 		break;
1190baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1191baa489faSSeongJae Park 		/* Don't COW-share the lower part of the THP. */
1192baa489faSSeongJae Park 		ret = madvise(mem, size / 2, MADV_DONTFORK);
1193baa489faSSeongJae Park 		if (ret) {
1194baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
1195baa489faSSeongJae Park 			goto close_comm_pipes;
1196baa489faSSeongJae Park 		}
1197baa489faSSeongJae Park 		break;
1198baa489faSSeongJae Park 	default:
1199baa489faSSeongJae Park 		assert(false);
1200baa489faSSeongJae Park 	}
1201baa489faSSeongJae Park 
1202baa489faSSeongJae Park 	ret = fork();
1203baa489faSSeongJae Park 	if (ret < 0) {
1204baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
1205baa489faSSeongJae Park 		goto close_comm_pipes;
1206baa489faSSeongJae Park 	} else if (!ret) {
1207baa489faSSeongJae Park 		switch (test) {
1208baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_UNSHARED:
1209baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_FULLY_SHARED:
1210baa489faSSeongJae Park 			exit(child_memcmp_fn(mem, size, &comm_pipes));
1211baa489faSSeongJae Park 			break;
1212baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_LOWER_SHARED:
1213baa489faSSeongJae Park 			exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1214baa489faSSeongJae Park 			break;
1215baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_UPPER_SHARED:
1216baa489faSSeongJae Park 			exit(child_memcmp_fn(mem + size / 2, size / 2,
1217baa489faSSeongJae Park 					     &comm_pipes));
1218baa489faSSeongJae Park 			break;
1219baa489faSSeongJae Park 		default:
1220baa489faSSeongJae Park 			assert(false);
1221baa489faSSeongJae Park 		}
1222baa489faSSeongJae Park 	}
1223baa489faSSeongJae Park 
1224baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1225baa489faSSeongJae Park 		;
1226baa489faSSeongJae Park 
1227baa489faSSeongJae Park 	switch (test) {
1228baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UNSHARED:
1229baa489faSSeongJae Park 		break;
1230baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1231baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1232baa489faSSeongJae Park 		/*
1233baa489faSSeongJae Park 		 * Revert MADV_DONTFORK such that we merge the VMAs and are
1234baa489faSSeongJae Park 		 * able to actually collapse.
1235baa489faSSeongJae Park 		 */
1236baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_DOFORK);
1237baa489faSSeongJae Park 		if (ret) {
1238baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DOFORK failed\n");
1239baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
1240baa489faSSeongJae Park 			wait(&ret);
1241baa489faSSeongJae Park 			goto close_comm_pipes;
1242baa489faSSeongJae Park 		}
1243baa489faSSeongJae Park 		/* FALLTHROUGH */
1244baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1245baa489faSSeongJae Park 		/* Collapse before anyone modified the COW-shared page. */
1246baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_COLLAPSE);
1247baa489faSSeongJae Park 		if (ret) {
1248baa489faSSeongJae Park 			ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1249baa489faSSeongJae Park 					      strerror(errno));
1250baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
1251baa489faSSeongJae Park 			wait(&ret);
1252baa489faSSeongJae Park 			goto close_comm_pipes;
1253baa489faSSeongJae Park 		}
1254baa489faSSeongJae Park 		break;
1255baa489faSSeongJae Park 	default:
1256baa489faSSeongJae Park 		assert(false);
1257baa489faSSeongJae Park 	}
1258baa489faSSeongJae Park 
1259baa489faSSeongJae Park 	/* Modify the page. */
1260baa489faSSeongJae Park 	memset(mem, 0xff, size);
1261baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
1262baa489faSSeongJae Park 
1263baa489faSSeongJae Park 	wait(&ret);
1264baa489faSSeongJae Park 	if (WIFEXITED(ret))
1265baa489faSSeongJae Park 		ret = WEXITSTATUS(ret);
1266baa489faSSeongJae Park 	else
1267baa489faSSeongJae Park 		ret = -EINVAL;
1268baa489faSSeongJae Park 
1269baa489faSSeongJae Park 	ksft_test_result(!ret, "No leak from parent into child\n");
1270baa489faSSeongJae Park close_comm_pipes:
1271baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
1272baa489faSSeongJae Park }
1273baa489faSSeongJae Park 
1274baa489faSSeongJae Park static void test_anon_thp_collapse_unshared(char *mem, size_t size)
1275baa489faSSeongJae Park {
1276baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1277baa489faSSeongJae Park }
1278baa489faSSeongJae Park 
1279baa489faSSeongJae Park static void test_anon_thp_collapse_fully_shared(char *mem, size_t size)
1280baa489faSSeongJae Park {
1281baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1282baa489faSSeongJae Park }
1283baa489faSSeongJae Park 
1284baa489faSSeongJae Park static void test_anon_thp_collapse_lower_shared(char *mem, size_t size)
1285baa489faSSeongJae Park {
1286baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1287baa489faSSeongJae Park }
1288baa489faSSeongJae Park 
1289baa489faSSeongJae Park static void test_anon_thp_collapse_upper_shared(char *mem, size_t size)
1290baa489faSSeongJae Park {
1291baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1292baa489faSSeongJae Park }
1293baa489faSSeongJae Park 
1294baa489faSSeongJae Park /*
1295baa489faSSeongJae Park  * Test cases that are specific to anonymous THP: pages in private mappings
1296baa489faSSeongJae Park  * that may get shared via COW during fork().
1297baa489faSSeongJae Park  */
1298baa489faSSeongJae Park static const struct test_case anon_thp_test_cases[] = {
1299baa489faSSeongJae Park 	/*
1300baa489faSSeongJae Park 	 * Basic COW test for fork() without any GUP when collapsing a THP
1301baa489faSSeongJae Park 	 * before fork().
1302baa489faSSeongJae Park 	 *
1303baa489faSSeongJae Park 	 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1304baa489faSSeongJae Park 	 * collapse") might easily get COW handling wrong when not collapsing
1305baa489faSSeongJae Park 	 * exclusivity information properly.
1306baa489faSSeongJae Park 	 */
1307baa489faSSeongJae Park 	{
1308baa489faSSeongJae Park 		"Basic COW after fork() when collapsing before fork()",
1309baa489faSSeongJae Park 		test_anon_thp_collapse_unshared,
1310baa489faSSeongJae Park 	},
1311baa489faSSeongJae Park 	/* Basic COW test, but collapse after COW-sharing a full THP. */
1312baa489faSSeongJae Park 	{
1313baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (fully shared)",
1314baa489faSSeongJae Park 		test_anon_thp_collapse_fully_shared,
1315baa489faSSeongJae Park 	},
1316baa489faSSeongJae Park 	/*
1317baa489faSSeongJae Park 	 * Basic COW test, but collapse after COW-sharing the lower half of a
1318baa489faSSeongJae Park 	 * THP.
1319baa489faSSeongJae Park 	 */
1320baa489faSSeongJae Park 	{
1321baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (lower shared)",
1322baa489faSSeongJae Park 		test_anon_thp_collapse_lower_shared,
1323baa489faSSeongJae Park 	},
1324baa489faSSeongJae Park 	/*
1325baa489faSSeongJae Park 	 * Basic COW test, but collapse after COW-sharing the upper half of a
1326baa489faSSeongJae Park 	 * THP.
1327baa489faSSeongJae Park 	 */
1328baa489faSSeongJae Park 	{
1329baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (upper shared)",
1330baa489faSSeongJae Park 		test_anon_thp_collapse_upper_shared,
1331baa489faSSeongJae Park 	},
1332baa489faSSeongJae Park };
1333baa489faSSeongJae Park 
1334baa489faSSeongJae Park static void run_anon_thp_test_cases(void)
1335baa489faSSeongJae Park {
1336baa489faSSeongJae Park 	int i;
1337baa489faSSeongJae Park 
1338baa489faSSeongJae Park 	if (!thpsize)
1339baa489faSSeongJae Park 		return;
1340baa489faSSeongJae Park 
1341baa489faSSeongJae Park 	ksft_print_msg("[INFO] Anonymous THP tests\n");
1342baa489faSSeongJae Park 
1343baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1344baa489faSSeongJae Park 		struct test_case const *test_case = &anon_thp_test_cases[i];
1345baa489faSSeongJae Park 
1346baa489faSSeongJae Park 		ksft_print_msg("[RUN] %s\n", test_case->desc);
1347baa489faSSeongJae Park 		do_run_with_thp(test_case->fn, THP_RUN_PMD);
1348baa489faSSeongJae Park 	}
1349baa489faSSeongJae Park }
1350baa489faSSeongJae Park 
1351baa489faSSeongJae Park static int tests_per_anon_thp_test_case(void)
1352baa489faSSeongJae Park {
1353baa489faSSeongJae Park 	return thpsize ? 1 : 0;
1354baa489faSSeongJae Park }
1355baa489faSSeongJae Park 
1356baa489faSSeongJae Park typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1357baa489faSSeongJae Park 
1358baa489faSSeongJae Park static void test_cow(char *mem, const char *smem, size_t size)
1359baa489faSSeongJae Park {
1360baa489faSSeongJae Park 	char *old = malloc(size);
1361baa489faSSeongJae Park 
1362baa489faSSeongJae Park 	/* Backup the original content. */
1363baa489faSSeongJae Park 	memcpy(old, smem, size);
1364baa489faSSeongJae Park 
1365baa489faSSeongJae Park 	/* Modify the page. */
1366baa489faSSeongJae Park 	memset(mem, 0xff, size);
1367baa489faSSeongJae Park 
1368baa489faSSeongJae Park 	/* See if we still read the old values via the other mapping. */
1369baa489faSSeongJae Park 	ksft_test_result(!memcmp(smem, old, size),
1370baa489faSSeongJae Park 			 "Other mapping not modified\n");
1371baa489faSSeongJae Park 	free(old);
1372baa489faSSeongJae Park }
1373baa489faSSeongJae Park 
1374baa489faSSeongJae Park static void test_ro_pin(char *mem, const char *smem, size_t size)
1375baa489faSSeongJae Park {
1376baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1377baa489faSSeongJae Park }
1378baa489faSSeongJae Park 
1379baa489faSSeongJae Park static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1380baa489faSSeongJae Park {
1381baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1382baa489faSSeongJae Park }
1383baa489faSSeongJae Park 
1384baa489faSSeongJae Park static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1385baa489faSSeongJae Park {
1386baa489faSSeongJae Park 	char *mem, *smem, tmp;
1387baa489faSSeongJae Park 
1388baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
1389baa489faSSeongJae Park 
1390baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1391baa489faSSeongJae Park 		   MAP_PRIVATE | MAP_ANON, -1, 0);
1392baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1393baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1394baa489faSSeongJae Park 		return;
1395baa489faSSeongJae Park 	}
1396baa489faSSeongJae Park 
1397baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1398baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1399baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1400baa489faSSeongJae Park 		goto munmap;
1401baa489faSSeongJae Park 	}
1402baa489faSSeongJae Park 
1403baa489faSSeongJae Park 	/* Read from the page to populate the shared zeropage. */
1404baa489faSSeongJae Park 	tmp = *mem + *smem;
1405baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1406baa489faSSeongJae Park 
1407baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1408baa489faSSeongJae Park munmap:
1409baa489faSSeongJae Park 	munmap(mem, pagesize);
1410baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1411baa489faSSeongJae Park 		munmap(smem, pagesize);
1412baa489faSSeongJae Park }
1413baa489faSSeongJae Park 
1414baa489faSSeongJae Park static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1415baa489faSSeongJae Park {
1416baa489faSSeongJae Park 	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1417baa489faSSeongJae Park 	size_t mmap_size;
1418baa489faSSeongJae Park 	int ret;
1419baa489faSSeongJae Park 
1420baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
1421baa489faSSeongJae Park 
1422baa489faSSeongJae Park 	if (!has_huge_zeropage) {
1423baa489faSSeongJae Park 		ksft_test_result_skip("Huge zeropage not enabled\n");
1424baa489faSSeongJae Park 		return;
1425baa489faSSeongJae Park 	}
1426baa489faSSeongJae Park 
1427baa489faSSeongJae Park 	/* For alignment purposes, we need twice the thp size. */
1428baa489faSSeongJae Park 	mmap_size = 2 * thpsize;
1429baa489faSSeongJae Park 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1430baa489faSSeongJae Park 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1431baa489faSSeongJae Park 	if (mmap_mem == MAP_FAILED) {
1432baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1433baa489faSSeongJae Park 		return;
1434baa489faSSeongJae Park 	}
1435baa489faSSeongJae Park 	mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1436baa489faSSeongJae Park 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1437baa489faSSeongJae Park 	if (mmap_smem == MAP_FAILED) {
1438baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1439baa489faSSeongJae Park 		goto munmap;
1440baa489faSSeongJae Park 	}
1441baa489faSSeongJae Park 
1442baa489faSSeongJae Park 	/* We need a THP-aligned memory area. */
1443baa489faSSeongJae Park 	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
1444baa489faSSeongJae Park 	smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1));
1445baa489faSSeongJae Park 
1446baa489faSSeongJae Park 	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
1447baa489faSSeongJae Park 	ret |= madvise(smem, thpsize, MADV_HUGEPAGE);
1448baa489faSSeongJae Park 	if (ret) {
1449baa489faSSeongJae Park 		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
1450baa489faSSeongJae Park 		goto munmap;
1451baa489faSSeongJae Park 	}
1452baa489faSSeongJae Park 
1453baa489faSSeongJae Park 	/*
1454baa489faSSeongJae Park 	 * Read from the memory to populate the huge shared zeropage. Read from
1455baa489faSSeongJae Park 	 * the first sub-page and test if we get another sub-page populated
1456baa489faSSeongJae Park 	 * automatically.
1457baa489faSSeongJae Park 	 */
1458baa489faSSeongJae Park 	tmp = *mem + *smem;
1459baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1460baa489faSSeongJae Park 	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1461baa489faSSeongJae Park 	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1462baa489faSSeongJae Park 		ksft_test_result_skip("Did not get THPs populated\n");
1463baa489faSSeongJae Park 		goto munmap;
1464baa489faSSeongJae Park 	}
1465baa489faSSeongJae Park 
1466baa489faSSeongJae Park 	fn(mem, smem, thpsize);
1467baa489faSSeongJae Park munmap:
1468baa489faSSeongJae Park 	munmap(mmap_mem, mmap_size);
1469baa489faSSeongJae Park 	if (mmap_smem != MAP_FAILED)
1470baa489faSSeongJae Park 		munmap(mmap_smem, mmap_size);
1471baa489faSSeongJae Park }
1472baa489faSSeongJae Park 
1473baa489faSSeongJae Park static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1474baa489faSSeongJae Park {
1475baa489faSSeongJae Park 	char *mem, *smem, tmp;
1476baa489faSSeongJae Park 	int fd;
1477baa489faSSeongJae Park 
1478baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
1479baa489faSSeongJae Park 
1480baa489faSSeongJae Park 	fd = memfd_create("test", 0);
1481baa489faSSeongJae Park 	if (fd < 0) {
1482baa489faSSeongJae Park 		ksft_test_result_fail("memfd_create() failed\n");
1483baa489faSSeongJae Park 		return;
1484baa489faSSeongJae Park 	}
1485baa489faSSeongJae Park 
1486baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1487baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, pagesize)) {
1488baa489faSSeongJae Park 		ksft_test_result_fail("fallocate() failed\n");
1489baa489faSSeongJae Park 		goto close;
1490baa489faSSeongJae Park 	}
1491baa489faSSeongJae Park 
1492baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1493baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1494baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1495baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1496baa489faSSeongJae Park 		goto close;
1497baa489faSSeongJae Park 	}
1498baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1499baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1500baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1501baa489faSSeongJae Park 		goto munmap;
1502baa489faSSeongJae Park 	}
1503baa489faSSeongJae Park 
1504baa489faSSeongJae Park 	/* Fault the page in. */
1505baa489faSSeongJae Park 	tmp = *mem + *smem;
1506baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1507baa489faSSeongJae Park 
1508baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1509baa489faSSeongJae Park munmap:
1510baa489faSSeongJae Park 	munmap(mem, pagesize);
1511baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1512baa489faSSeongJae Park 		munmap(smem, pagesize);
1513baa489faSSeongJae Park close:
1514baa489faSSeongJae Park 	close(fd);
1515baa489faSSeongJae Park }
1516baa489faSSeongJae Park 
1517baa489faSSeongJae Park static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1518baa489faSSeongJae Park {
1519baa489faSSeongJae Park 	char *mem, *smem, tmp;
1520baa489faSSeongJae Park 	FILE *file;
1521baa489faSSeongJae Park 	int fd;
1522baa489faSSeongJae Park 
1523baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
1524baa489faSSeongJae Park 
1525baa489faSSeongJae Park 	file = tmpfile();
1526baa489faSSeongJae Park 	if (!file) {
1527baa489faSSeongJae Park 		ksft_test_result_fail("tmpfile() failed\n");
1528baa489faSSeongJae Park 		return;
1529baa489faSSeongJae Park 	}
1530baa489faSSeongJae Park 
1531baa489faSSeongJae Park 	fd = fileno(file);
1532baa489faSSeongJae Park 	if (fd < 0) {
1533baa489faSSeongJae Park 		ksft_test_result_skip("fileno() failed\n");
1534baa489faSSeongJae Park 		return;
1535baa489faSSeongJae Park 	}
1536baa489faSSeongJae Park 
1537baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1538baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, pagesize)) {
1539baa489faSSeongJae Park 		ksft_test_result_fail("fallocate() failed\n");
1540baa489faSSeongJae Park 		goto close;
1541baa489faSSeongJae Park 	}
1542baa489faSSeongJae Park 
1543baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1544baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1545baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1546baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1547baa489faSSeongJae Park 		goto close;
1548baa489faSSeongJae Park 	}
1549baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1550baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1551baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1552baa489faSSeongJae Park 		goto munmap;
1553baa489faSSeongJae Park 	}
1554baa489faSSeongJae Park 
1555baa489faSSeongJae Park 	/* Fault the page in. */
1556baa489faSSeongJae Park 	tmp = *mem + *smem;
1557baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1558baa489faSSeongJae Park 
1559baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1560baa489faSSeongJae Park munmap:
1561baa489faSSeongJae Park 	munmap(mem, pagesize);
1562baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1563baa489faSSeongJae Park 		munmap(smem, pagesize);
1564baa489faSSeongJae Park close:
1565baa489faSSeongJae Park 	fclose(file);
1566baa489faSSeongJae Park }
1567baa489faSSeongJae Park 
1568baa489faSSeongJae Park static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1569baa489faSSeongJae Park 				   size_t hugetlbsize)
1570baa489faSSeongJae Park {
1571baa489faSSeongJae Park 	int flags = MFD_HUGETLB;
1572baa489faSSeongJae Park 	char *mem, *smem, tmp;
1573baa489faSSeongJae Park 	int fd;
1574baa489faSSeongJae Park 
1575baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
1576baa489faSSeongJae Park 		       hugetlbsize / 1024);
1577baa489faSSeongJae Park 
1578baa489faSSeongJae Park 	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1579baa489faSSeongJae Park 
1580baa489faSSeongJae Park 	fd = memfd_create("test", flags);
1581baa489faSSeongJae Park 	if (fd < 0) {
1582baa489faSSeongJae Park 		ksft_test_result_skip("memfd_create() failed\n");
1583baa489faSSeongJae Park 		return;
1584baa489faSSeongJae Park 	}
1585baa489faSSeongJae Park 
1586baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1587baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, hugetlbsize)) {
1588baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1589baa489faSSeongJae Park 		goto close;
1590baa489faSSeongJae Park 	}
1591baa489faSSeongJae Park 
1592baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1593baa489faSSeongJae Park 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1594baa489faSSeongJae Park 		   0);
1595baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1596baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1597baa489faSSeongJae Park 		goto close;
1598baa489faSSeongJae Park 	}
1599baa489faSSeongJae Park 	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1600baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1601baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1602baa489faSSeongJae Park 		goto munmap;
1603baa489faSSeongJae Park 	}
1604baa489faSSeongJae Park 
1605baa489faSSeongJae Park 	/* Fault the page in. */
1606baa489faSSeongJae Park 	tmp = *mem + *smem;
1607baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1608baa489faSSeongJae Park 
1609baa489faSSeongJae Park 	fn(mem, smem, hugetlbsize);
1610baa489faSSeongJae Park munmap:
1611baa489faSSeongJae Park 	munmap(mem, hugetlbsize);
1612baa489faSSeongJae Park 	if (mem != MAP_FAILED)
1613baa489faSSeongJae Park 		munmap(smem, hugetlbsize);
1614baa489faSSeongJae Park close:
1615baa489faSSeongJae Park 	close(fd);
1616baa489faSSeongJae Park }
1617baa489faSSeongJae Park 
1618baa489faSSeongJae Park struct non_anon_test_case {
1619baa489faSSeongJae Park 	const char *desc;
1620baa489faSSeongJae Park 	non_anon_test_fn fn;
1621baa489faSSeongJae Park };
1622baa489faSSeongJae Park 
1623baa489faSSeongJae Park /*
1624baa489faSSeongJae Park  * Test cases that target any pages in private mappings that are not anonymous:
1625baa489faSSeongJae Park  * pages that may get shared via COW ndependent of fork(). This includes
1626baa489faSSeongJae Park  * the shared zeropage(s), pagecache pages, ...
1627baa489faSSeongJae Park  */
1628baa489faSSeongJae Park static const struct non_anon_test_case non_anon_test_cases[] = {
1629baa489faSSeongJae Park 	/*
1630baa489faSSeongJae Park 	 * Basic COW test without any GUP. If we miss to break COW, changes are
1631baa489faSSeongJae Park 	 * visible via other private/shared mappings.
1632baa489faSSeongJae Park 	 */
1633baa489faSSeongJae Park 	{
1634baa489faSSeongJae Park 		"Basic COW",
1635baa489faSSeongJae Park 		test_cow,
1636baa489faSSeongJae Park 	},
1637baa489faSSeongJae Park 	/*
1638baa489faSSeongJae Park 	 * Take a R/O longterm pin. When modifying the page via the page table,
1639baa489faSSeongJae Park 	 * the page content change must be visible via the pin.
1640baa489faSSeongJae Park 	 */
1641baa489faSSeongJae Park 	{
1642baa489faSSeongJae Park 		"R/O longterm GUP pin",
1643baa489faSSeongJae Park 		test_ro_pin,
1644baa489faSSeongJae Park 	},
1645baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1646baa489faSSeongJae Park 	{
1647baa489faSSeongJae Park 		"R/O longterm GUP-fast pin",
1648baa489faSSeongJae Park 		test_ro_fast_pin,
1649baa489faSSeongJae Park 	},
1650baa489faSSeongJae Park };
1651baa489faSSeongJae Park 
1652baa489faSSeongJae Park static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1653baa489faSSeongJae Park {
1654baa489faSSeongJae Park 	int i;
1655baa489faSSeongJae Park 
1656baa489faSSeongJae Park 	run_with_zeropage(test_case->fn, test_case->desc);
1657baa489faSSeongJae Park 	run_with_memfd(test_case->fn, test_case->desc);
1658baa489faSSeongJae Park 	run_with_tmpfile(test_case->fn, test_case->desc);
1659baa489faSSeongJae Park 	if (thpsize)
1660baa489faSSeongJae Park 		run_with_huge_zeropage(test_case->fn, test_case->desc);
1661baa489faSSeongJae Park 	for (i = 0; i < nr_hugetlbsizes; i++)
1662baa489faSSeongJae Park 		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1663baa489faSSeongJae Park 				       hugetlbsizes[i]);
1664baa489faSSeongJae Park }
1665baa489faSSeongJae Park 
1666baa489faSSeongJae Park static void run_non_anon_test_cases(void)
1667baa489faSSeongJae Park {
1668baa489faSSeongJae Park 	int i;
1669baa489faSSeongJae Park 
1670baa489faSSeongJae Park 	ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1671baa489faSSeongJae Park 
1672baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1673baa489faSSeongJae Park 		run_non_anon_test_case(&non_anon_test_cases[i]);
1674baa489faSSeongJae Park }
1675baa489faSSeongJae Park 
1676baa489faSSeongJae Park static int tests_per_non_anon_test_case(void)
1677baa489faSSeongJae Park {
1678baa489faSSeongJae Park 	int tests = 3 + nr_hugetlbsizes;
1679baa489faSSeongJae Park 
1680baa489faSSeongJae Park 	if (thpsize)
1681baa489faSSeongJae Park 		tests += 1;
1682baa489faSSeongJae Park 	return tests;
1683baa489faSSeongJae Park }
1684baa489faSSeongJae Park 
1685baa489faSSeongJae Park int main(int argc, char **argv)
1686baa489faSSeongJae Park {
1687baa489faSSeongJae Park 	int err;
1688baa489faSSeongJae Park 
1689baa489faSSeongJae Park 	pagesize = getpagesize();
1690d6e61afbSDavid Hildenbrand 	thpsize = read_pmd_pagesize();
1691d6e61afbSDavid Hildenbrand 	if (thpsize)
1692d6e61afbSDavid Hildenbrand 		ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
1693d6e61afbSDavid Hildenbrand 			       thpsize / 1024);
1694*81b1e3f9SDavid Hildenbrand 	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
1695*81b1e3f9SDavid Hildenbrand 						    ARRAY_SIZE(hugetlbsizes));
1696baa489faSSeongJae Park 	detect_huge_zeropage();
1697baa489faSSeongJae Park 
1698baa489faSSeongJae Park 	ksft_print_header();
1699baa489faSSeongJae Park 	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1700baa489faSSeongJae Park 		      ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1701baa489faSSeongJae Park 		      ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1702baa489faSSeongJae Park 
1703baa489faSSeongJae Park 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1704baa489faSSeongJae Park 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1705baa489faSSeongJae Park 	if (pagemap_fd < 0)
1706baa489faSSeongJae Park 		ksft_exit_fail_msg("opening pagemap failed\n");
1707baa489faSSeongJae Park 
1708baa489faSSeongJae Park 	run_anon_test_cases();
1709baa489faSSeongJae Park 	run_anon_thp_test_cases();
1710baa489faSSeongJae Park 	run_non_anon_test_cases();
1711baa489faSSeongJae Park 
1712baa489faSSeongJae Park 	err = ksft_get_fail_cnt();
1713baa489faSSeongJae Park 	if (err)
1714baa489faSSeongJae Park 		ksft_exit_fail_msg("%d out of %d tests failed\n",
1715baa489faSSeongJae Park 				   err, ksft_test_num());
1716baa489faSSeongJae Park 	return ksft_exit_pass();
1717baa489faSSeongJae Park }
1718