xref: /linux/tools/testing/selftests/mm/cow.c (revision 36ec807b627b4c0a0a382f0ae48eac7187d14b2b)
1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0-only
2baa489faSSeongJae Park /*
3baa489faSSeongJae Park  * COW (Copy On Write) tests.
4baa489faSSeongJae Park  *
5baa489faSSeongJae Park  * Copyright 2022, Red Hat, Inc.
6baa489faSSeongJae Park  *
7baa489faSSeongJae Park  * Author(s): David Hildenbrand <david@redhat.com>
8baa489faSSeongJae Park  */
9baa489faSSeongJae Park #define _GNU_SOURCE
10baa489faSSeongJae Park #include <stdlib.h>
11baa489faSSeongJae Park #include <string.h>
12baa489faSSeongJae Park #include <stdbool.h>
13baa489faSSeongJae Park #include <stdint.h>
14baa489faSSeongJae Park #include <unistd.h>
15baa489faSSeongJae Park #include <errno.h>
16baa489faSSeongJae Park #include <fcntl.h>
17baa489faSSeongJae Park #include <assert.h>
180183d777SMuhammad Usama Anjum #include <linux/mman.h>
19baa489faSSeongJae Park #include <sys/mman.h>
20baa489faSSeongJae Park #include <sys/ioctl.h>
21baa489faSSeongJae Park #include <sys/wait.h>
22baa489faSSeongJae Park #include <linux/memfd.h>
23baa489faSSeongJae Park 
24baa489faSSeongJae Park #include "local_config.h"
25baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
26baa489faSSeongJae Park #include <liburing.h>
27baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
28baa489faSSeongJae Park 
29baa489faSSeongJae Park #include "../../../../mm/gup_test.h"
30baa489faSSeongJae Park #include "../kselftest.h"
31baa489faSSeongJae Park #include "vm_util.h"
32c0f79103SRyan Roberts #include "thp_settings.h"
33baa489faSSeongJae Park 
34baa489faSSeongJae Park static size_t pagesize;
35baa489faSSeongJae Park static int pagemap_fd;
3612dc16b3SRyan Roberts static size_t pmdsize;
37c0f79103SRyan Roberts static int nr_thpsizes;
38c0f79103SRyan Roberts static size_t thpsizes[20];
39baa489faSSeongJae Park static int nr_hugetlbsizes;
40baa489faSSeongJae Park static size_t hugetlbsizes[10];
41baa489faSSeongJae Park static int gup_fd;
42baa489faSSeongJae Park static bool has_huge_zeropage;
43baa489faSSeongJae Park 
44c0f79103SRyan Roberts static int sz2ord(size_t size)
45c0f79103SRyan Roberts {
46c0f79103SRyan Roberts 	return __builtin_ctzll(size / pagesize);
47c0f79103SRyan Roberts }
48c0f79103SRyan Roberts 
49c0f79103SRyan Roberts static int detect_thp_sizes(size_t sizes[], int max)
50c0f79103SRyan Roberts {
51c0f79103SRyan Roberts 	int count = 0;
52c0f79103SRyan Roberts 	unsigned long orders;
53c0f79103SRyan Roberts 	size_t kb;
54c0f79103SRyan Roberts 	int i;
55c0f79103SRyan Roberts 
56c0f79103SRyan Roberts 	/* thp not supported at all. */
57c0f79103SRyan Roberts 	if (!pmdsize)
58c0f79103SRyan Roberts 		return 0;
59c0f79103SRyan Roberts 
60c0f79103SRyan Roberts 	orders = 1UL << sz2ord(pmdsize);
61c0f79103SRyan Roberts 	orders |= thp_supported_orders();
62c0f79103SRyan Roberts 
63c0f79103SRyan Roberts 	for (i = 0; orders && count < max; i++) {
64c0f79103SRyan Roberts 		if (!(orders & (1UL << i)))
65c0f79103SRyan Roberts 			continue;
66c0f79103SRyan Roberts 		orders &= ~(1UL << i);
67c0f79103SRyan Roberts 		kb = (pagesize >> 10) << i;
68c0f79103SRyan Roberts 		sizes[count++] = kb * 1024;
69c0f79103SRyan Roberts 		ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
70c0f79103SRyan Roberts 	}
71c0f79103SRyan Roberts 
72c0f79103SRyan Roberts 	return count;
73c0f79103SRyan Roberts }
74c0f79103SRyan Roberts 
75baa489faSSeongJae Park static void detect_huge_zeropage(void)
76baa489faSSeongJae Park {
77baa489faSSeongJae Park 	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
78baa489faSSeongJae Park 		      O_RDONLY);
79baa489faSSeongJae Park 	size_t enabled = 0;
80baa489faSSeongJae Park 	char buf[15];
81baa489faSSeongJae Park 	int ret;
82baa489faSSeongJae Park 
83baa489faSSeongJae Park 	if (fd < 0)
84baa489faSSeongJae Park 		return;
85baa489faSSeongJae Park 
86baa489faSSeongJae Park 	ret = pread(fd, buf, sizeof(buf), 0);
87baa489faSSeongJae Park 	if (ret > 0 && ret < sizeof(buf)) {
88baa489faSSeongJae Park 		buf[ret] = 0;
89baa489faSSeongJae Park 
90baa489faSSeongJae Park 		enabled = strtoul(buf, NULL, 10);
91baa489faSSeongJae Park 		if (enabled == 1) {
92baa489faSSeongJae Park 			has_huge_zeropage = true;
93baa489faSSeongJae Park 			ksft_print_msg("[INFO] huge zeropage is enabled\n");
94baa489faSSeongJae Park 		}
95baa489faSSeongJae Park 	}
96baa489faSSeongJae Park 
97baa489faSSeongJae Park 	close(fd);
98baa489faSSeongJae Park }
99baa489faSSeongJae Park 
100baa489faSSeongJae Park static bool range_is_swapped(void *addr, size_t size)
101baa489faSSeongJae Park {
102baa489faSSeongJae Park 	for (; size; addr += pagesize, size -= pagesize)
103baa489faSSeongJae Park 		if (!pagemap_is_swapped(pagemap_fd, addr))
104baa489faSSeongJae Park 			return false;
105baa489faSSeongJae Park 	return true;
106baa489faSSeongJae Park }
107baa489faSSeongJae Park 
108baa489faSSeongJae Park struct comm_pipes {
109baa489faSSeongJae Park 	int child_ready[2];
110baa489faSSeongJae Park 	int parent_ready[2];
111baa489faSSeongJae Park };
112baa489faSSeongJae Park 
113baa489faSSeongJae Park static int setup_comm_pipes(struct comm_pipes *comm_pipes)
114baa489faSSeongJae Park {
115baa489faSSeongJae Park 	if (pipe(comm_pipes->child_ready) < 0)
116baa489faSSeongJae Park 		return -errno;
117baa489faSSeongJae Park 	if (pipe(comm_pipes->parent_ready) < 0) {
118baa489faSSeongJae Park 		close(comm_pipes->child_ready[0]);
119baa489faSSeongJae Park 		close(comm_pipes->child_ready[1]);
120baa489faSSeongJae Park 		return -errno;
121baa489faSSeongJae Park 	}
122baa489faSSeongJae Park 
123baa489faSSeongJae Park 	return 0;
124baa489faSSeongJae Park }
125baa489faSSeongJae Park 
126baa489faSSeongJae Park static void close_comm_pipes(struct comm_pipes *comm_pipes)
127baa489faSSeongJae Park {
128baa489faSSeongJae Park 	close(comm_pipes->child_ready[0]);
129baa489faSSeongJae Park 	close(comm_pipes->child_ready[1]);
130baa489faSSeongJae Park 	close(comm_pipes->parent_ready[0]);
131baa489faSSeongJae Park 	close(comm_pipes->parent_ready[1]);
132baa489faSSeongJae Park }
133baa489faSSeongJae Park 
134baa489faSSeongJae Park static int child_memcmp_fn(char *mem, size_t size,
135baa489faSSeongJae Park 			   struct comm_pipes *comm_pipes)
136baa489faSSeongJae Park {
137baa489faSSeongJae Park 	char *old = malloc(size);
138baa489faSSeongJae Park 	char buf;
139baa489faSSeongJae Park 
140baa489faSSeongJae Park 	/* Backup the original content. */
141baa489faSSeongJae Park 	memcpy(old, mem, size);
142baa489faSSeongJae Park 
143baa489faSSeongJae Park 	/* Wait until the parent modified the page. */
144baa489faSSeongJae Park 	write(comm_pipes->child_ready[1], "0", 1);
145baa489faSSeongJae Park 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
146baa489faSSeongJae Park 		;
147baa489faSSeongJae Park 
148baa489faSSeongJae Park 	/* See if we still read the old values. */
149baa489faSSeongJae Park 	return memcmp(old, mem, size);
150baa489faSSeongJae Park }
151baa489faSSeongJae Park 
152baa489faSSeongJae Park static int child_vmsplice_memcmp_fn(char *mem, size_t size,
153baa489faSSeongJae Park 				    struct comm_pipes *comm_pipes)
154baa489faSSeongJae Park {
155baa489faSSeongJae Park 	struct iovec iov = {
156baa489faSSeongJae Park 		.iov_base = mem,
157baa489faSSeongJae Park 		.iov_len = size,
158baa489faSSeongJae Park 	};
159baa489faSSeongJae Park 	ssize_t cur, total, transferred;
160baa489faSSeongJae Park 	char *old, *new;
161baa489faSSeongJae Park 	int fds[2];
162baa489faSSeongJae Park 	char buf;
163baa489faSSeongJae Park 
164baa489faSSeongJae Park 	old = malloc(size);
165baa489faSSeongJae Park 	new = malloc(size);
166baa489faSSeongJae Park 
167baa489faSSeongJae Park 	/* Backup the original content. */
168baa489faSSeongJae Park 	memcpy(old, mem, size);
169baa489faSSeongJae Park 
170baa489faSSeongJae Park 	if (pipe(fds) < 0)
171baa489faSSeongJae Park 		return -errno;
172baa489faSSeongJae Park 
173baa489faSSeongJae Park 	/* Trigger a read-only pin. */
174baa489faSSeongJae Park 	transferred = vmsplice(fds[1], &iov, 1, 0);
175baa489faSSeongJae Park 	if (transferred < 0)
176baa489faSSeongJae Park 		return -errno;
177baa489faSSeongJae Park 	if (transferred == 0)
178baa489faSSeongJae Park 		return -EINVAL;
179baa489faSSeongJae Park 
180baa489faSSeongJae Park 	/* Unmap it from our page tables. */
181baa489faSSeongJae Park 	if (munmap(mem, size) < 0)
182baa489faSSeongJae Park 		return -errno;
183baa489faSSeongJae Park 
184baa489faSSeongJae Park 	/* Wait until the parent modified it. */
185baa489faSSeongJae Park 	write(comm_pipes->child_ready[1], "0", 1);
186baa489faSSeongJae Park 	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
187baa489faSSeongJae Park 		;
188baa489faSSeongJae Park 
189baa489faSSeongJae Park 	/* See if we still read the old values via the pipe. */
190baa489faSSeongJae Park 	for (total = 0; total < transferred; total += cur) {
191baa489faSSeongJae Park 		cur = read(fds[0], new + total, transferred - total);
192baa489faSSeongJae Park 		if (cur < 0)
193baa489faSSeongJae Park 			return -errno;
194baa489faSSeongJae Park 	}
195baa489faSSeongJae Park 
196baa489faSSeongJae Park 	return memcmp(old, new, transferred);
197baa489faSSeongJae Park }
198baa489faSSeongJae Park 
199baa489faSSeongJae Park typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
200baa489faSSeongJae Park 
201baa489faSSeongJae Park static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
202*4bf6a4ebSDavid Hildenbrand 		child_fn fn, bool xfail)
203baa489faSSeongJae Park {
204baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
205baa489faSSeongJae Park 	char buf;
206baa489faSSeongJae Park 	int ret;
207baa489faSSeongJae Park 
208baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
209baa489faSSeongJae Park 	if (ret) {
210baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
211baa489faSSeongJae Park 		return;
212baa489faSSeongJae Park 	}
213baa489faSSeongJae Park 
214baa489faSSeongJae Park 	ret = fork();
215baa489faSSeongJae Park 	if (ret < 0) {
216baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
217baa489faSSeongJae Park 		goto close_comm_pipes;
218baa489faSSeongJae Park 	} else if (!ret) {
219baa489faSSeongJae Park 		exit(fn(mem, size, &comm_pipes));
220baa489faSSeongJae Park 	}
221baa489faSSeongJae Park 
222baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
223baa489faSSeongJae Park 		;
224baa489faSSeongJae Park 
225baa489faSSeongJae Park 	if (do_mprotect) {
226baa489faSSeongJae Park 		/*
227baa489faSSeongJae Park 		 * mprotect() optimizations might try avoiding
228baa489faSSeongJae Park 		 * write-faults by directly mapping pages writable.
229baa489faSSeongJae Park 		 */
230baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
231baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
232baa489faSSeongJae Park 		if (ret) {
233baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
234baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
235baa489faSSeongJae Park 			wait(&ret);
236baa489faSSeongJae Park 			goto close_comm_pipes;
237baa489faSSeongJae Park 		}
238baa489faSSeongJae Park 	}
239baa489faSSeongJae Park 
240baa489faSSeongJae Park 	/* Modify the page. */
241baa489faSSeongJae Park 	memset(mem, 0xff, size);
242baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
243baa489faSSeongJae Park 
244baa489faSSeongJae Park 	wait(&ret);
245baa489faSSeongJae Park 	if (WIFEXITED(ret))
246baa489faSSeongJae Park 		ret = WEXITSTATUS(ret);
247baa489faSSeongJae Park 	else
248baa489faSSeongJae Park 		ret = -EINVAL;
249baa489faSSeongJae Park 
250*4bf6a4ebSDavid Hildenbrand 	if (!ret) {
251*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_pass("No leak from parent into child\n");
252*4bf6a4ebSDavid Hildenbrand 	} else if (xfail) {
253*4bf6a4ebSDavid Hildenbrand 		/*
254*4bf6a4ebSDavid Hildenbrand 		 * With hugetlb, some vmsplice() tests are currently expected to
255*4bf6a4ebSDavid Hildenbrand 		 * fail because (a) harder to fix and (b) nobody really cares.
256*4bf6a4ebSDavid Hildenbrand 		 * Flag them as expected failure for now.
257*4bf6a4ebSDavid Hildenbrand 		 */
258*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_xfail("Leak from parent into child\n");
259*4bf6a4ebSDavid Hildenbrand 	} else {
260*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_fail("Leak from parent into child\n");
261*4bf6a4ebSDavid Hildenbrand 	}
262baa489faSSeongJae Park close_comm_pipes:
263baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
264baa489faSSeongJae Park }
265baa489faSSeongJae Park 
266*4bf6a4ebSDavid Hildenbrand static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
267baa489faSSeongJae Park {
268*4bf6a4ebSDavid Hildenbrand 	do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
269baa489faSSeongJae Park }
270baa489faSSeongJae Park 
271*4bf6a4ebSDavid Hildenbrand static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
272baa489faSSeongJae Park {
273*4bf6a4ebSDavid Hildenbrand 	do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
274baa489faSSeongJae Park }
275baa489faSSeongJae Park 
276*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
277baa489faSSeongJae Park {
278*4bf6a4ebSDavid Hildenbrand 	do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
279*4bf6a4ebSDavid Hildenbrand 			      is_hugetlb);
280baa489faSSeongJae Park }
281baa489faSSeongJae Park 
282*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
283*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
284baa489faSSeongJae Park {
285*4bf6a4ebSDavid Hildenbrand 	do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
286*4bf6a4ebSDavid Hildenbrand 			      is_hugetlb);
287baa489faSSeongJae Park }
288baa489faSSeongJae Park 
289baa489faSSeongJae Park static void do_test_vmsplice_in_parent(char *mem, size_t size,
290*4bf6a4ebSDavid Hildenbrand 				       bool before_fork, bool xfail)
291baa489faSSeongJae Park {
292baa489faSSeongJae Park 	struct iovec iov = {
293baa489faSSeongJae Park 		.iov_base = mem,
294baa489faSSeongJae Park 		.iov_len = size,
295baa489faSSeongJae Park 	};
296baa489faSSeongJae Park 	ssize_t cur, total, transferred;
297baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
298baa489faSSeongJae Park 	char *old, *new;
299baa489faSSeongJae Park 	int ret, fds[2];
300baa489faSSeongJae Park 	char buf;
301baa489faSSeongJae Park 
302baa489faSSeongJae Park 	old = malloc(size);
303baa489faSSeongJae Park 	new = malloc(size);
304baa489faSSeongJae Park 
305baa489faSSeongJae Park 	memcpy(old, mem, size);
306baa489faSSeongJae Park 
307baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
308baa489faSSeongJae Park 	if (ret) {
309baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
310baa489faSSeongJae Park 		goto free;
311baa489faSSeongJae Park 	}
312baa489faSSeongJae Park 
313baa489faSSeongJae Park 	if (pipe(fds) < 0) {
314baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
315baa489faSSeongJae Park 		goto close_comm_pipes;
316baa489faSSeongJae Park 	}
317baa489faSSeongJae Park 
318baa489faSSeongJae Park 	if (before_fork) {
319baa489faSSeongJae Park 		transferred = vmsplice(fds[1], &iov, 1, 0);
320baa489faSSeongJae Park 		if (transferred <= 0) {
321baa489faSSeongJae Park 			ksft_test_result_fail("vmsplice() failed\n");
322baa489faSSeongJae Park 			goto close_pipe;
323baa489faSSeongJae Park 		}
324baa489faSSeongJae Park 	}
325baa489faSSeongJae Park 
326baa489faSSeongJae Park 	ret = fork();
327baa489faSSeongJae Park 	if (ret < 0) {
328baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
329baa489faSSeongJae Park 		goto close_pipe;
330baa489faSSeongJae Park 	} else if (!ret) {
331baa489faSSeongJae Park 		write(comm_pipes.child_ready[1], "0", 1);
332baa489faSSeongJae Park 		while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
333baa489faSSeongJae Park 			;
334baa489faSSeongJae Park 		/* Modify page content in the child. */
335baa489faSSeongJae Park 		memset(mem, 0xff, size);
336baa489faSSeongJae Park 		exit(0);
337baa489faSSeongJae Park 	}
338baa489faSSeongJae Park 
339baa489faSSeongJae Park 	if (!before_fork) {
340baa489faSSeongJae Park 		transferred = vmsplice(fds[1], &iov, 1, 0);
341baa489faSSeongJae Park 		if (transferred <= 0) {
342baa489faSSeongJae Park 			ksft_test_result_fail("vmsplice() failed\n");
343baa489faSSeongJae Park 			wait(&ret);
344baa489faSSeongJae Park 			goto close_pipe;
345baa489faSSeongJae Park 		}
346baa489faSSeongJae Park 	}
347baa489faSSeongJae Park 
348baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
349baa489faSSeongJae Park 		;
350baa489faSSeongJae Park 	if (munmap(mem, size) < 0) {
351baa489faSSeongJae Park 		ksft_test_result_fail("munmap() failed\n");
352baa489faSSeongJae Park 		goto close_pipe;
353baa489faSSeongJae Park 	}
354baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
355baa489faSSeongJae Park 
356baa489faSSeongJae Park 	/* Wait until the child is done writing. */
357baa489faSSeongJae Park 	wait(&ret);
358baa489faSSeongJae Park 	if (!WIFEXITED(ret)) {
359baa489faSSeongJae Park 		ksft_test_result_fail("wait() failed\n");
360baa489faSSeongJae Park 		goto close_pipe;
361baa489faSSeongJae Park 	}
362baa489faSSeongJae Park 
363baa489faSSeongJae Park 	/* See if we still read the old values. */
364baa489faSSeongJae Park 	for (total = 0; total < transferred; total += cur) {
365baa489faSSeongJae Park 		cur = read(fds[0], new + total, transferred - total);
366baa489faSSeongJae Park 		if (cur < 0) {
367baa489faSSeongJae Park 			ksft_test_result_fail("read() failed\n");
368baa489faSSeongJae Park 			goto close_pipe;
369baa489faSSeongJae Park 		}
370baa489faSSeongJae Park 	}
371baa489faSSeongJae Park 
372*4bf6a4ebSDavid Hildenbrand 	if (!memcmp(old, new, transferred)) {
373*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_pass("No leak from child into parent\n");
374*4bf6a4ebSDavid Hildenbrand 	} else if (xfail) {
375*4bf6a4ebSDavid Hildenbrand 		/*
376*4bf6a4ebSDavid Hildenbrand 		 * With hugetlb, some vmsplice() tests are currently expected to
377*4bf6a4ebSDavid Hildenbrand 		 * fail because (a) harder to fix and (b) nobody really cares.
378*4bf6a4ebSDavid Hildenbrand 		 * Flag them as expected failure for now.
379*4bf6a4ebSDavid Hildenbrand 		 */
380*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_xfail("Leak from child into parent\n");
381*4bf6a4ebSDavid Hildenbrand 	} else {
382*4bf6a4ebSDavid Hildenbrand 		ksft_test_result_fail("Leak from child into parent\n");
383*4bf6a4ebSDavid Hildenbrand 	}
384baa489faSSeongJae Park close_pipe:
385baa489faSSeongJae Park 	close(fds[0]);
386baa489faSSeongJae Park 	close(fds[1]);
387baa489faSSeongJae Park close_comm_pipes:
388baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
389baa489faSSeongJae Park free:
390baa489faSSeongJae Park 	free(old);
391baa489faSSeongJae Park 	free(new);
392baa489faSSeongJae Park }
393baa489faSSeongJae Park 
394*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
395baa489faSSeongJae Park {
396*4bf6a4ebSDavid Hildenbrand 	do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
397baa489faSSeongJae Park }
398baa489faSSeongJae Park 
399*4bf6a4ebSDavid Hildenbrand static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
400baa489faSSeongJae Park {
401*4bf6a4ebSDavid Hildenbrand 	do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
402baa489faSSeongJae Park }
403baa489faSSeongJae Park 
404baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
405baa489faSSeongJae Park static void do_test_iouring(char *mem, size_t size, bool use_fork)
406baa489faSSeongJae Park {
407baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
408baa489faSSeongJae Park 	struct io_uring_cqe *cqe;
409baa489faSSeongJae Park 	struct io_uring_sqe *sqe;
410baa489faSSeongJae Park 	struct io_uring ring;
411baa489faSSeongJae Park 	ssize_t cur, total;
412baa489faSSeongJae Park 	struct iovec iov;
413baa489faSSeongJae Park 	char *buf, *tmp;
414baa489faSSeongJae Park 	int ret, fd;
415baa489faSSeongJae Park 	FILE *file;
416baa489faSSeongJae Park 
417baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
418baa489faSSeongJae Park 	if (ret) {
419baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
420baa489faSSeongJae Park 		return;
421baa489faSSeongJae Park 	}
422baa489faSSeongJae Park 
423baa489faSSeongJae Park 	file = tmpfile();
424baa489faSSeongJae Park 	if (!file) {
425baa489faSSeongJae Park 		ksft_test_result_fail("tmpfile() failed\n");
426baa489faSSeongJae Park 		goto close_comm_pipes;
427baa489faSSeongJae Park 	}
428baa489faSSeongJae Park 	fd = fileno(file);
429baa489faSSeongJae Park 	assert(fd);
430baa489faSSeongJae Park 
431baa489faSSeongJae Park 	tmp = malloc(size);
432baa489faSSeongJae Park 	if (!tmp) {
433baa489faSSeongJae Park 		ksft_test_result_fail("malloc() failed\n");
434baa489faSSeongJae Park 		goto close_file;
435baa489faSSeongJae Park 	}
436baa489faSSeongJae Park 
437baa489faSSeongJae Park 	/* Skip on errors, as we might just lack kernel support. */
438baa489faSSeongJae Park 	ret = io_uring_queue_init(1, &ring, 0);
439baa489faSSeongJae Park 	if (ret < 0) {
440baa489faSSeongJae Park 		ksft_test_result_skip("io_uring_queue_init() failed\n");
441baa489faSSeongJae Park 		goto free_tmp;
442baa489faSSeongJae Park 	}
443baa489faSSeongJae Park 
444baa489faSSeongJae Park 	/*
445baa489faSSeongJae Park 	 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
446baa489faSSeongJae Park 	 * | FOLL_LONGTERM the range.
447baa489faSSeongJae Park 	 *
448baa489faSSeongJae Park 	 * Skip on errors, as we might just lack kernel support or might not
449baa489faSSeongJae Park 	 * have sufficient MEMLOCK permissions.
450baa489faSSeongJae Park 	 */
451baa489faSSeongJae Park 	iov.iov_base = mem;
452baa489faSSeongJae Park 	iov.iov_len = size;
453baa489faSSeongJae Park 	ret = io_uring_register_buffers(&ring, &iov, 1);
454baa489faSSeongJae Park 	if (ret) {
455baa489faSSeongJae Park 		ksft_test_result_skip("io_uring_register_buffers() failed\n");
456baa489faSSeongJae Park 		goto queue_exit;
457baa489faSSeongJae Park 	}
458baa489faSSeongJae Park 
459baa489faSSeongJae Park 	if (use_fork) {
460baa489faSSeongJae Park 		/*
461baa489faSSeongJae Park 		 * fork() and keep the child alive until we're done. Note that
462baa489faSSeongJae Park 		 * we expect the pinned page to not get shared with the child.
463baa489faSSeongJae Park 		 */
464baa489faSSeongJae Park 		ret = fork();
465baa489faSSeongJae Park 		if (ret < 0) {
466baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
467baa489faSSeongJae Park 			goto unregister_buffers;
468baa489faSSeongJae Park 		} else if (!ret) {
469baa489faSSeongJae Park 			write(comm_pipes.child_ready[1], "0", 1);
470baa489faSSeongJae Park 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
471baa489faSSeongJae Park 				;
472baa489faSSeongJae Park 			exit(0);
473baa489faSSeongJae Park 		}
474baa489faSSeongJae Park 
475baa489faSSeongJae Park 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
476baa489faSSeongJae Park 			;
477baa489faSSeongJae Park 	} else {
478baa489faSSeongJae Park 		/*
479baa489faSSeongJae Park 		 * Map the page R/O into the page table. Enable softdirty
480baa489faSSeongJae Park 		 * tracking to stop the page from getting mapped R/W immediately
481baa489faSSeongJae Park 		 * again by mprotect() optimizations. Note that we don't have an
482baa489faSSeongJae Park 		 * easy way to test if that worked (the pagemap does not export
483baa489faSSeongJae Park 		 * if the page is mapped R/O vs. R/W).
484baa489faSSeongJae Park 		 */
485baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
486baa489faSSeongJae Park 		clear_softdirty();
487baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
488baa489faSSeongJae Park 		if (ret) {
489baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
490baa489faSSeongJae Park 			goto unregister_buffers;
491baa489faSSeongJae Park 		}
492baa489faSSeongJae Park 	}
493baa489faSSeongJae Park 
494baa489faSSeongJae Park 	/*
495baa489faSSeongJae Park 	 * Modify the page and write page content as observed by the fixed
496baa489faSSeongJae Park 	 * buffer pin to the file so we can verify it.
497baa489faSSeongJae Park 	 */
498baa489faSSeongJae Park 	memset(mem, 0xff, size);
499baa489faSSeongJae Park 	sqe = io_uring_get_sqe(&ring);
500baa489faSSeongJae Park 	if (!sqe) {
501baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_get_sqe() failed\n");
502baa489faSSeongJae Park 		goto quit_child;
503baa489faSSeongJae Park 	}
504baa489faSSeongJae Park 	io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
505baa489faSSeongJae Park 
506baa489faSSeongJae Park 	ret = io_uring_submit(&ring);
507baa489faSSeongJae Park 	if (ret < 0) {
508baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_submit() failed\n");
509baa489faSSeongJae Park 		goto quit_child;
510baa489faSSeongJae Park 	}
511baa489faSSeongJae Park 
512baa489faSSeongJae Park 	ret = io_uring_wait_cqe(&ring, &cqe);
513baa489faSSeongJae Park 	if (ret < 0) {
514baa489faSSeongJae Park 		ksft_test_result_fail("io_uring_wait_cqe() failed\n");
515baa489faSSeongJae Park 		goto quit_child;
516baa489faSSeongJae Park 	}
517baa489faSSeongJae Park 
518baa489faSSeongJae Park 	if (cqe->res != size) {
519baa489faSSeongJae Park 		ksft_test_result_fail("write_fixed failed\n");
520baa489faSSeongJae Park 		goto quit_child;
521baa489faSSeongJae Park 	}
522baa489faSSeongJae Park 	io_uring_cqe_seen(&ring, cqe);
523baa489faSSeongJae Park 
524baa489faSSeongJae Park 	/* Read back the file content to the temporary buffer. */
525baa489faSSeongJae Park 	total = 0;
526baa489faSSeongJae Park 	while (total < size) {
527baa489faSSeongJae Park 		cur = pread(fd, tmp + total, size - total, total);
528baa489faSSeongJae Park 		if (cur < 0) {
529baa489faSSeongJae Park 			ksft_test_result_fail("pread() failed\n");
530baa489faSSeongJae Park 			goto quit_child;
531baa489faSSeongJae Park 		}
532baa489faSSeongJae Park 		total += cur;
533baa489faSSeongJae Park 	}
534baa489faSSeongJae Park 
535baa489faSSeongJae Park 	/* Finally, check if we read what we expected. */
536baa489faSSeongJae Park 	ksft_test_result(!memcmp(mem, tmp, size),
537baa489faSSeongJae Park 			 "Longterm R/W pin is reliable\n");
538baa489faSSeongJae Park 
539baa489faSSeongJae Park quit_child:
540baa489faSSeongJae Park 	if (use_fork) {
541baa489faSSeongJae Park 		write(comm_pipes.parent_ready[1], "0", 1);
542baa489faSSeongJae Park 		wait(&ret);
543baa489faSSeongJae Park 	}
544baa489faSSeongJae Park unregister_buffers:
545baa489faSSeongJae Park 	io_uring_unregister_buffers(&ring);
546baa489faSSeongJae Park queue_exit:
547baa489faSSeongJae Park 	io_uring_queue_exit(&ring);
548baa489faSSeongJae Park free_tmp:
549baa489faSSeongJae Park 	free(tmp);
550baa489faSSeongJae Park close_file:
551baa489faSSeongJae Park 	fclose(file);
552baa489faSSeongJae Park close_comm_pipes:
553baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
554baa489faSSeongJae Park }
555baa489faSSeongJae Park 
556*4bf6a4ebSDavid Hildenbrand static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
557baa489faSSeongJae Park {
558baa489faSSeongJae Park 	do_test_iouring(mem, size, false);
559baa489faSSeongJae Park }
560baa489faSSeongJae Park 
561*4bf6a4ebSDavid Hildenbrand static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
562baa489faSSeongJae Park {
563baa489faSSeongJae Park 	do_test_iouring(mem, size, true);
564baa489faSSeongJae Park }
565baa489faSSeongJae Park 
566baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
567baa489faSSeongJae Park 
568baa489faSSeongJae Park enum ro_pin_test {
569baa489faSSeongJae Park 	RO_PIN_TEST,
570baa489faSSeongJae Park 	RO_PIN_TEST_SHARED,
571baa489faSSeongJae Park 	RO_PIN_TEST_PREVIOUSLY_SHARED,
572baa489faSSeongJae Park 	RO_PIN_TEST_RO_EXCLUSIVE,
573baa489faSSeongJae Park };
574baa489faSSeongJae Park 
575baa489faSSeongJae Park static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
576baa489faSSeongJae Park 			   bool fast)
577baa489faSSeongJae Park {
578baa489faSSeongJae Park 	struct pin_longterm_test args;
579baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
580baa489faSSeongJae Park 	char *tmp, buf;
581baa489faSSeongJae Park 	__u64 tmp_val;
582baa489faSSeongJae Park 	int ret;
583baa489faSSeongJae Park 
584baa489faSSeongJae Park 	if (gup_fd < 0) {
585baa489faSSeongJae Park 		ksft_test_result_skip("gup_test not available\n");
586baa489faSSeongJae Park 		return;
587baa489faSSeongJae Park 	}
588baa489faSSeongJae Park 
589baa489faSSeongJae Park 	tmp = malloc(size);
590baa489faSSeongJae Park 	if (!tmp) {
591baa489faSSeongJae Park 		ksft_test_result_fail("malloc() failed\n");
592baa489faSSeongJae Park 		return;
593baa489faSSeongJae Park 	}
594baa489faSSeongJae Park 
595baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
596baa489faSSeongJae Park 	if (ret) {
597baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
598baa489faSSeongJae Park 		goto free_tmp;
599baa489faSSeongJae Park 	}
600baa489faSSeongJae Park 
601baa489faSSeongJae Park 	switch (test) {
602baa489faSSeongJae Park 	case RO_PIN_TEST:
603baa489faSSeongJae Park 		break;
604baa489faSSeongJae Park 	case RO_PIN_TEST_SHARED:
605baa489faSSeongJae Park 	case RO_PIN_TEST_PREVIOUSLY_SHARED:
606baa489faSSeongJae Park 		/*
607baa489faSSeongJae Park 		 * Share the pages with our child. As the pages are not pinned,
608baa489faSSeongJae Park 		 * this should just work.
609baa489faSSeongJae Park 		 */
610baa489faSSeongJae Park 		ret = fork();
611baa489faSSeongJae Park 		if (ret < 0) {
612baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
613baa489faSSeongJae Park 			goto close_comm_pipes;
614baa489faSSeongJae Park 		} else if (!ret) {
615baa489faSSeongJae Park 			write(comm_pipes.child_ready[1], "0", 1);
616baa489faSSeongJae Park 			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
617baa489faSSeongJae Park 				;
618baa489faSSeongJae Park 			exit(0);
619baa489faSSeongJae Park 		}
620baa489faSSeongJae Park 
621baa489faSSeongJae Park 		/* Wait until our child is ready. */
622baa489faSSeongJae Park 		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
623baa489faSSeongJae Park 			;
624baa489faSSeongJae Park 
625baa489faSSeongJae Park 		if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
626baa489faSSeongJae Park 			/*
627baa489faSSeongJae Park 			 * Tell the child to quit now and wait until it quit.
628baa489faSSeongJae Park 			 * The pages should now be mapped R/O into our page
629baa489faSSeongJae Park 			 * tables, but they are no longer shared.
630baa489faSSeongJae Park 			 */
631baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
632baa489faSSeongJae Park 			wait(&ret);
633baa489faSSeongJae Park 			if (!WIFEXITED(ret))
634baa489faSSeongJae Park 				ksft_print_msg("[INFO] wait() failed\n");
635baa489faSSeongJae Park 		}
636baa489faSSeongJae Park 		break;
637baa489faSSeongJae Park 	case RO_PIN_TEST_RO_EXCLUSIVE:
638baa489faSSeongJae Park 		/*
639baa489faSSeongJae Park 		 * Map the page R/O into the page table. Enable softdirty
640baa489faSSeongJae Park 		 * tracking to stop the page from getting mapped R/W immediately
641baa489faSSeongJae Park 		 * again by mprotect() optimizations. Note that we don't have an
642baa489faSSeongJae Park 		 * easy way to test if that worked (the pagemap does not export
643baa489faSSeongJae Park 		 * if the page is mapped R/O vs. R/W).
644baa489faSSeongJae Park 		 */
645baa489faSSeongJae Park 		ret = mprotect(mem, size, PROT_READ);
646baa489faSSeongJae Park 		clear_softdirty();
647baa489faSSeongJae Park 		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
648baa489faSSeongJae Park 		if (ret) {
649baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
650baa489faSSeongJae Park 			goto close_comm_pipes;
651baa489faSSeongJae Park 		}
652baa489faSSeongJae Park 		break;
653baa489faSSeongJae Park 	default:
654baa489faSSeongJae Park 		assert(false);
655baa489faSSeongJae Park 	}
656baa489faSSeongJae Park 
657baa489faSSeongJae Park 	/* Take a R/O pin. This should trigger unsharing. */
658baa489faSSeongJae Park 	args.addr = (__u64)(uintptr_t)mem;
659baa489faSSeongJae Park 	args.size = size;
660baa489faSSeongJae Park 	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
661baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
662baa489faSSeongJae Park 	if (ret) {
663baa489faSSeongJae Park 		if (errno == EINVAL)
664baa489faSSeongJae Park 			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
665baa489faSSeongJae Park 		else
666baa489faSSeongJae Park 			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
667baa489faSSeongJae Park 		goto wait;
668baa489faSSeongJae Park 	}
669baa489faSSeongJae Park 
670baa489faSSeongJae Park 	/* Modify the page. */
671baa489faSSeongJae Park 	memset(mem, 0xff, size);
672baa489faSSeongJae Park 
673baa489faSSeongJae Park 	/*
674baa489faSSeongJae Park 	 * Read back the content via the pin to the temporary buffer and
675baa489faSSeongJae Park 	 * test if we observed the modification.
676baa489faSSeongJae Park 	 */
677baa489faSSeongJae Park 	tmp_val = (__u64)(uintptr_t)tmp;
678baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
679baa489faSSeongJae Park 	if (ret)
680baa489faSSeongJae Park 		ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
681baa489faSSeongJae Park 	else
682baa489faSSeongJae Park 		ksft_test_result(!memcmp(mem, tmp, size),
683baa489faSSeongJae Park 				 "Longterm R/O pin is reliable\n");
684baa489faSSeongJae Park 
685baa489faSSeongJae Park 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
686baa489faSSeongJae Park 	if (ret)
687baa489faSSeongJae Park 		ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
688baa489faSSeongJae Park wait:
689baa489faSSeongJae Park 	switch (test) {
690baa489faSSeongJae Park 	case RO_PIN_TEST_SHARED:
691baa489faSSeongJae Park 		write(comm_pipes.parent_ready[1], "0", 1);
692baa489faSSeongJae Park 		wait(&ret);
693baa489faSSeongJae Park 		if (!WIFEXITED(ret))
694baa489faSSeongJae Park 			ksft_print_msg("[INFO] wait() failed\n");
695baa489faSSeongJae Park 		break;
696baa489faSSeongJae Park 	default:
697baa489faSSeongJae Park 		break;
698baa489faSSeongJae Park 	}
699baa489faSSeongJae Park close_comm_pipes:
700baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
701baa489faSSeongJae Park free_tmp:
702baa489faSSeongJae Park 	free(tmp);
703baa489faSSeongJae Park }
704baa489faSSeongJae Park 
705*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
706baa489faSSeongJae Park {
707baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
708baa489faSSeongJae Park }
709baa489faSSeongJae Park 
710*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
711baa489faSSeongJae Park {
712baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
713baa489faSSeongJae Park }
714baa489faSSeongJae Park 
715*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
716*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
717baa489faSSeongJae Park {
718baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
719baa489faSSeongJae Park }
720baa489faSSeongJae Park 
721*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
722*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
723baa489faSSeongJae Park {
724baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
725baa489faSSeongJae Park }
726baa489faSSeongJae Park 
727*4bf6a4ebSDavid Hildenbrand static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
728*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
729baa489faSSeongJae Park {
730baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
731baa489faSSeongJae Park }
732baa489faSSeongJae Park 
733*4bf6a4ebSDavid Hildenbrand static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
734*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
735baa489faSSeongJae Park {
736baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
737baa489faSSeongJae Park }
738baa489faSSeongJae Park 
739*4bf6a4ebSDavid Hildenbrand typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
740baa489faSSeongJae Park 
741baa489faSSeongJae Park static void do_run_with_base_page(test_fn fn, bool swapout)
742baa489faSSeongJae Park {
743baa489faSSeongJae Park 	char *mem;
744baa489faSSeongJae Park 	int ret;
745baa489faSSeongJae Park 
746baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
747baa489faSSeongJae Park 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
748baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
749baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
750baa489faSSeongJae Park 		return;
751baa489faSSeongJae Park 	}
752baa489faSSeongJae Park 
753baa489faSSeongJae Park 	ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
754baa489faSSeongJae Park 	/* Ignore if not around on a kernel. */
755baa489faSSeongJae Park 	if (ret && errno != EINVAL) {
756baa489faSSeongJae Park 		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
757baa489faSSeongJae Park 		goto munmap;
758baa489faSSeongJae Park 	}
759baa489faSSeongJae Park 
760baa489faSSeongJae Park 	/* Populate a base page. */
761baa489faSSeongJae Park 	memset(mem, 0, pagesize);
762baa489faSSeongJae Park 
763baa489faSSeongJae Park 	if (swapout) {
764baa489faSSeongJae Park 		madvise(mem, pagesize, MADV_PAGEOUT);
765baa489faSSeongJae Park 		if (!pagemap_is_swapped(pagemap_fd, mem)) {
766baa489faSSeongJae Park 			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
767baa489faSSeongJae Park 			goto munmap;
768baa489faSSeongJae Park 		}
769baa489faSSeongJae Park 	}
770baa489faSSeongJae Park 
771*4bf6a4ebSDavid Hildenbrand 	fn(mem, pagesize, false);
772baa489faSSeongJae Park munmap:
773baa489faSSeongJae Park 	munmap(mem, pagesize);
774baa489faSSeongJae Park }
775baa489faSSeongJae Park 
776baa489faSSeongJae Park static void run_with_base_page(test_fn fn, const char *desc)
777baa489faSSeongJae Park {
778baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with base page\n", desc);
779baa489faSSeongJae Park 	do_run_with_base_page(fn, false);
780baa489faSSeongJae Park }
781baa489faSSeongJae Park 
782baa489faSSeongJae Park static void run_with_base_page_swap(test_fn fn, const char *desc)
783baa489faSSeongJae Park {
784baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
785baa489faSSeongJae Park 	do_run_with_base_page(fn, true);
786baa489faSSeongJae Park }
787baa489faSSeongJae Park 
788baa489faSSeongJae Park enum thp_run {
789baa489faSSeongJae Park 	THP_RUN_PMD,
790baa489faSSeongJae Park 	THP_RUN_PMD_SWAPOUT,
791baa489faSSeongJae Park 	THP_RUN_PTE,
792baa489faSSeongJae Park 	THP_RUN_PTE_SWAPOUT,
793baa489faSSeongJae Park 	THP_RUN_SINGLE_PTE,
794baa489faSSeongJae Park 	THP_RUN_SINGLE_PTE_SWAPOUT,
795baa489faSSeongJae Park 	THP_RUN_PARTIAL_MREMAP,
796baa489faSSeongJae Park 	THP_RUN_PARTIAL_SHARED,
797baa489faSSeongJae Park };
798baa489faSSeongJae Park 
79912dc16b3SRyan Roberts static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
800baa489faSSeongJae Park {
801baa489faSSeongJae Park 	char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
802baa489faSSeongJae Park 	size_t size, mmap_size, mremap_size;
803baa489faSSeongJae Park 	int ret;
804baa489faSSeongJae Park 
805baa489faSSeongJae Park 	/* For alignment purposes, we need twice the thp size. */
806baa489faSSeongJae Park 	mmap_size = 2 * thpsize;
807baa489faSSeongJae Park 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
808baa489faSSeongJae Park 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
809baa489faSSeongJae Park 	if (mmap_mem == MAP_FAILED) {
810baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
811baa489faSSeongJae Park 		return;
812baa489faSSeongJae Park 	}
813baa489faSSeongJae Park 
814baa489faSSeongJae Park 	/* We need a THP-aligned memory area. */
815baa489faSSeongJae Park 	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
816baa489faSSeongJae Park 
817baa489faSSeongJae Park 	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
818baa489faSSeongJae Park 	if (ret) {
819baa489faSSeongJae Park 		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
820baa489faSSeongJae Park 		goto munmap;
821baa489faSSeongJae Park 	}
822baa489faSSeongJae Park 
823baa489faSSeongJae Park 	/*
82412dc16b3SRyan Roberts 	 * Try to populate a THP. Touch the first sub-page and test if
82512dc16b3SRyan Roberts 	 * we get the last sub-page populated automatically.
826baa489faSSeongJae Park 	 */
827baa489faSSeongJae Park 	mem[0] = 0;
82812dc16b3SRyan Roberts 	if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
829baa489faSSeongJae Park 		ksft_test_result_skip("Did not get a THP populated\n");
830baa489faSSeongJae Park 		goto munmap;
831baa489faSSeongJae Park 	}
832baa489faSSeongJae Park 	memset(mem, 0, thpsize);
833baa489faSSeongJae Park 
834baa489faSSeongJae Park 	size = thpsize;
835baa489faSSeongJae Park 	switch (thp_run) {
836baa489faSSeongJae Park 	case THP_RUN_PMD:
837baa489faSSeongJae Park 	case THP_RUN_PMD_SWAPOUT:
83812dc16b3SRyan Roberts 		assert(thpsize == pmdsize);
839baa489faSSeongJae Park 		break;
840baa489faSSeongJae Park 	case THP_RUN_PTE:
841baa489faSSeongJae Park 	case THP_RUN_PTE_SWAPOUT:
842baa489faSSeongJae Park 		/*
843baa489faSSeongJae Park 		 * Trigger PTE-mapping the THP by temporarily mapping a single
84412dc16b3SRyan Roberts 		 * subpage R/O. This is a noop if the THP is not pmdsize (and
84512dc16b3SRyan Roberts 		 * therefore already PTE-mapped).
846baa489faSSeongJae Park 		 */
847baa489faSSeongJae Park 		ret = mprotect(mem + pagesize, pagesize, PROT_READ);
848baa489faSSeongJae Park 		if (ret) {
849baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
850baa489faSSeongJae Park 			goto munmap;
851baa489faSSeongJae Park 		}
852baa489faSSeongJae Park 		ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
853baa489faSSeongJae Park 		if (ret) {
854baa489faSSeongJae Park 			ksft_test_result_fail("mprotect() failed\n");
855baa489faSSeongJae Park 			goto munmap;
856baa489faSSeongJae Park 		}
857baa489faSSeongJae Park 		break;
858baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE:
859baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE_SWAPOUT:
860baa489faSSeongJae Park 		/*
861baa489faSSeongJae Park 		 * Discard all but a single subpage of that PTE-mapped THP. What
862baa489faSSeongJae Park 		 * remains is a single PTE mapping a single subpage.
863baa489faSSeongJae Park 		 */
864baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
865baa489faSSeongJae Park 		if (ret) {
866baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTNEED failed\n");
867baa489faSSeongJae Park 			goto munmap;
868baa489faSSeongJae Park 		}
869baa489faSSeongJae Park 		size = pagesize;
870baa489faSSeongJae Park 		break;
871baa489faSSeongJae Park 	case THP_RUN_PARTIAL_MREMAP:
872baa489faSSeongJae Park 		/*
873baa489faSSeongJae Park 		 * Remap half of the THP. We need some new memory location
874baa489faSSeongJae Park 		 * for that.
875baa489faSSeongJae Park 		 */
876baa489faSSeongJae Park 		mremap_size = thpsize / 2;
877baa489faSSeongJae Park 		mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
878baa489faSSeongJae Park 				  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
879baa489faSSeongJae Park 		if (mem == MAP_FAILED) {
880baa489faSSeongJae Park 			ksft_test_result_fail("mmap() failed\n");
881baa489faSSeongJae Park 			goto munmap;
882baa489faSSeongJae Park 		}
883baa489faSSeongJae Park 		tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
884baa489faSSeongJae Park 			     MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
885baa489faSSeongJae Park 		if (tmp != mremap_mem) {
886baa489faSSeongJae Park 			ksft_test_result_fail("mremap() failed\n");
887baa489faSSeongJae Park 			goto munmap;
888baa489faSSeongJae Park 		}
889baa489faSSeongJae Park 		size = mremap_size;
890baa489faSSeongJae Park 		break;
891baa489faSSeongJae Park 	case THP_RUN_PARTIAL_SHARED:
892baa489faSSeongJae Park 		/*
893baa489faSSeongJae Park 		 * Share the first page of the THP with a child and quit the
894baa489faSSeongJae Park 		 * child. This will result in some parts of the THP never
895baa489faSSeongJae Park 		 * have been shared.
896baa489faSSeongJae Park 		 */
897baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
898baa489faSSeongJae Park 		if (ret) {
899baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
900baa489faSSeongJae Park 			goto munmap;
901baa489faSSeongJae Park 		}
902baa489faSSeongJae Park 		ret = fork();
903baa489faSSeongJae Park 		if (ret < 0) {
904baa489faSSeongJae Park 			ksft_test_result_fail("fork() failed\n");
905baa489faSSeongJae Park 			goto munmap;
906baa489faSSeongJae Park 		} else if (!ret) {
907baa489faSSeongJae Park 			exit(0);
908baa489faSSeongJae Park 		}
909baa489faSSeongJae Park 		wait(&ret);
910baa489faSSeongJae Park 		/* Allow for sharing all pages again. */
911baa489faSSeongJae Park 		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
912baa489faSSeongJae Park 		if (ret) {
913baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DOFORK failed\n");
914baa489faSSeongJae Park 			goto munmap;
915baa489faSSeongJae Park 		}
916baa489faSSeongJae Park 		break;
917baa489faSSeongJae Park 	default:
918baa489faSSeongJae Park 		assert(false);
919baa489faSSeongJae Park 	}
920baa489faSSeongJae Park 
921baa489faSSeongJae Park 	switch (thp_run) {
922baa489faSSeongJae Park 	case THP_RUN_PMD_SWAPOUT:
923baa489faSSeongJae Park 	case THP_RUN_PTE_SWAPOUT:
924baa489faSSeongJae Park 	case THP_RUN_SINGLE_PTE_SWAPOUT:
925baa489faSSeongJae Park 		madvise(mem, size, MADV_PAGEOUT);
926baa489faSSeongJae Park 		if (!range_is_swapped(mem, size)) {
927baa489faSSeongJae Park 			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
928baa489faSSeongJae Park 			goto munmap;
929baa489faSSeongJae Park 		}
930baa489faSSeongJae Park 		break;
931baa489faSSeongJae Park 	default:
932baa489faSSeongJae Park 		break;
933baa489faSSeongJae Park 	}
934baa489faSSeongJae Park 
935*4bf6a4ebSDavid Hildenbrand 	fn(mem, size, false);
936baa489faSSeongJae Park munmap:
937baa489faSSeongJae Park 	munmap(mmap_mem, mmap_size);
938baa489faSSeongJae Park 	if (mremap_mem != MAP_FAILED)
939baa489faSSeongJae Park 		munmap(mremap_mem, mremap_size);
940baa489faSSeongJae Park }
941baa489faSSeongJae Park 
94212dc16b3SRyan Roberts static void run_with_thp(test_fn fn, const char *desc, size_t size)
943baa489faSSeongJae Park {
94412dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
94512dc16b3SRyan Roberts 		desc, size / 1024);
94612dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PMD, size);
947baa489faSSeongJae Park }
948baa489faSSeongJae Park 
94912dc16b3SRyan Roberts static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
950baa489faSSeongJae Park {
95112dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
95212dc16b3SRyan Roberts 		desc, size / 1024);
95312dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
954baa489faSSeongJae Park }
955baa489faSSeongJae Park 
95612dc16b3SRyan Roberts static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
957baa489faSSeongJae Park {
95812dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
95912dc16b3SRyan Roberts 		desc, size / 1024);
96012dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PTE, size);
961baa489faSSeongJae Park }
962baa489faSSeongJae Park 
96312dc16b3SRyan Roberts static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
964baa489faSSeongJae Park {
96512dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
96612dc16b3SRyan Roberts 		desc, size / 1024);
96712dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
968baa489faSSeongJae Park }
969baa489faSSeongJae Park 
97012dc16b3SRyan Roberts static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
971baa489faSSeongJae Park {
97212dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
97312dc16b3SRyan Roberts 		desc, size / 1024);
97412dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
975baa489faSSeongJae Park }
976baa489faSSeongJae Park 
97712dc16b3SRyan Roberts static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
978baa489faSSeongJae Park {
97912dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
98012dc16b3SRyan Roberts 		desc, size / 1024);
98112dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
982baa489faSSeongJae Park }
983baa489faSSeongJae Park 
98412dc16b3SRyan Roberts static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
985baa489faSSeongJae Park {
98612dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
98712dc16b3SRyan Roberts 		desc, size / 1024);
98812dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
989baa489faSSeongJae Park }
990baa489faSSeongJae Park 
99112dc16b3SRyan Roberts static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
992baa489faSSeongJae Park {
99312dc16b3SRyan Roberts 	ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
99412dc16b3SRyan Roberts 		desc, size / 1024);
99512dc16b3SRyan Roberts 	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
996baa489faSSeongJae Park }
997baa489faSSeongJae Park 
998baa489faSSeongJae Park static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
999baa489faSSeongJae Park {
1000baa489faSSeongJae Park 	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
1001baa489faSSeongJae Park 	char *mem, *dummy;
1002baa489faSSeongJae Park 
1003baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
1004baa489faSSeongJae Park 		       hugetlbsize / 1024);
1005baa489faSSeongJae Park 
1006baa489faSSeongJae Park 	flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
1007baa489faSSeongJae Park 
1008baa489faSSeongJae Park 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1009baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1010baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1011baa489faSSeongJae Park 		return;
1012baa489faSSeongJae Park 	}
1013baa489faSSeongJae Park 
1014baa489faSSeongJae Park 	/* Populate an huge page. */
1015baa489faSSeongJae Park 	memset(mem, 0, hugetlbsize);
1016baa489faSSeongJae Park 
1017baa489faSSeongJae Park 	/*
1018baa489faSSeongJae Park 	 * We need a total of two hugetlb pages to handle COW/unsharing
1019baa489faSSeongJae Park 	 * properly, otherwise we might get zapped by a SIGBUS.
1020baa489faSSeongJae Park 	 */
1021baa489faSSeongJae Park 	dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1022baa489faSSeongJae Park 	if (dummy == MAP_FAILED) {
1023baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1024baa489faSSeongJae Park 		goto munmap;
1025baa489faSSeongJae Park 	}
1026baa489faSSeongJae Park 	munmap(dummy, hugetlbsize);
1027baa489faSSeongJae Park 
1028*4bf6a4ebSDavid Hildenbrand 	fn(mem, hugetlbsize, true);
1029baa489faSSeongJae Park munmap:
1030baa489faSSeongJae Park 	munmap(mem, hugetlbsize);
1031baa489faSSeongJae Park }
1032baa489faSSeongJae Park 
1033baa489faSSeongJae Park struct test_case {
1034baa489faSSeongJae Park 	const char *desc;
1035baa489faSSeongJae Park 	test_fn fn;
1036baa489faSSeongJae Park };
1037baa489faSSeongJae Park 
1038baa489faSSeongJae Park /*
1039baa489faSSeongJae Park  * Test cases that are specific to anonymous pages: pages in private mappings
1040baa489faSSeongJae Park  * that may get shared via COW during fork().
1041baa489faSSeongJae Park  */
1042baa489faSSeongJae Park static const struct test_case anon_test_cases[] = {
1043baa489faSSeongJae Park 	/*
1044baa489faSSeongJae Park 	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1045baa489faSSeongJae Park 	 * either the child can observe modifications by the parent or the
1046baa489faSSeongJae Park 	 * other way around.
1047baa489faSSeongJae Park 	 */
1048baa489faSSeongJae Park 	{
1049baa489faSSeongJae Park 		"Basic COW after fork()",
1050baa489faSSeongJae Park 		test_cow_in_parent,
1051baa489faSSeongJae Park 	},
1052baa489faSSeongJae Park 	/*
1053baa489faSSeongJae Park 	 * Basic test, but do an additional mprotect(PROT_READ)+
1054baa489faSSeongJae Park 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1055baa489faSSeongJae Park 	 */
1056baa489faSSeongJae Park 	{
1057baa489faSSeongJae Park 		"Basic COW after fork() with mprotect() optimization",
1058baa489faSSeongJae Park 		test_cow_in_parent_mprotect,
1059baa489faSSeongJae Park 	},
1060baa489faSSeongJae Park 	/*
1061baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1062baa489faSSeongJae Park 	 * we miss to break COW, the child observes modifications by the parent.
1063baa489faSSeongJae Park 	 * This is CVE-2020-29374 reported by Jann Horn.
1064baa489faSSeongJae Park 	 */
1065baa489faSSeongJae Park 	{
1066baa489faSSeongJae Park 		"vmsplice() + unmap in child",
1067*4bf6a4ebSDavid Hildenbrand 		test_vmsplice_in_child,
1068baa489faSSeongJae Park 	},
1069baa489faSSeongJae Park 	/*
1070baa489faSSeongJae Park 	 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1071baa489faSSeongJae Park 	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1072baa489faSSeongJae Park 	 */
1073baa489faSSeongJae Park 	{
1074baa489faSSeongJae Park 		"vmsplice() + unmap in child with mprotect() optimization",
1075*4bf6a4ebSDavid Hildenbrand 		test_vmsplice_in_child_mprotect,
1076baa489faSSeongJae Park 	},
1077baa489faSSeongJae Park 	/*
1078baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1079baa489faSSeongJae Park 	 * fork(); modify in the child. If we miss to break COW, the parent
1080baa489faSSeongJae Park 	 * observes modifications by the child.
1081baa489faSSeongJae Park 	 */
1082baa489faSSeongJae Park 	{
1083baa489faSSeongJae Park 		"vmsplice() before fork(), unmap in parent after fork()",
1084baa489faSSeongJae Park 		test_vmsplice_before_fork,
1085baa489faSSeongJae Park 	},
1086baa489faSSeongJae Park 	/*
1087baa489faSSeongJae Park 	 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1088baa489faSSeongJae Park 	 * child. If we miss to break COW, the parent observes modifications by
1089baa489faSSeongJae Park 	 * the child.
1090baa489faSSeongJae Park 	 */
1091baa489faSSeongJae Park 	{
1092baa489faSSeongJae Park 		"vmsplice() + unmap in parent after fork()",
1093baa489faSSeongJae Park 		test_vmsplice_after_fork,
1094baa489faSSeongJae Park 	},
1095baa489faSSeongJae Park #ifdef LOCAL_CONFIG_HAVE_LIBURING
1096baa489faSSeongJae Park 	/*
1097baa489faSSeongJae Park 	 * Take a R/W longterm pin and then map the page R/O into the page
1098baa489faSSeongJae Park 	 * table to trigger a write fault on next access. When modifying the
1099baa489faSSeongJae Park 	 * page, the page content must be visible via the pin.
1100baa489faSSeongJae Park 	 */
1101baa489faSSeongJae Park 	{
1102baa489faSSeongJae Park 		"R/O-mapping a page registered as iouring fixed buffer",
1103baa489faSSeongJae Park 		test_iouring_ro,
1104baa489faSSeongJae Park 	},
1105baa489faSSeongJae Park 	/*
1106baa489faSSeongJae Park 	 * Take a R/W longterm pin and then fork() a child. When modifying the
1107baa489faSSeongJae Park 	 * page, the page content must be visible via the pin. We expect the
1108baa489faSSeongJae Park 	 * pinned page to not get shared with the child.
1109baa489faSSeongJae Park 	 */
1110baa489faSSeongJae Park 	{
1111baa489faSSeongJae Park 		"fork() with an iouring fixed buffer",
1112baa489faSSeongJae Park 		test_iouring_fork,
1113baa489faSSeongJae Park 	},
1114baa489faSSeongJae Park 
1115baa489faSSeongJae Park #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1116baa489faSSeongJae Park 	/*
1117baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1118baa489faSSeongJae Park 	 * When modifying the page via the page table, the page content change
1119baa489faSSeongJae Park 	 * must be visible via the pin.
1120baa489faSSeongJae Park 	 */
1121baa489faSSeongJae Park 	{
1122baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped shared page",
1123baa489faSSeongJae Park 		test_ro_pin_on_shared,
1124baa489faSSeongJae Park 	},
1125baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1126baa489faSSeongJae Park 	{
1127baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped shared page",
1128baa489faSSeongJae Park 		test_ro_fast_pin_on_shared,
1129baa489faSSeongJae Park 	},
1130baa489faSSeongJae Park 	/*
1131baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1132baa489faSSeongJae Park 	 * was previously shared. When modifying the page via the page table,
1133baa489faSSeongJae Park 	 * the page content change must be visible via the pin.
1134baa489faSSeongJae Park 	 */
1135baa489faSSeongJae Park 	{
1136baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped previously-shared page",
1137baa489faSSeongJae Park 		test_ro_pin_on_ro_previously_shared,
1138baa489faSSeongJae Park 	},
1139baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1140baa489faSSeongJae Park 	{
1141baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped previously-shared page",
1142baa489faSSeongJae Park 		test_ro_fast_pin_on_ro_previously_shared,
1143baa489faSSeongJae Park 	},
1144baa489faSSeongJae Park 	/*
1145baa489faSSeongJae Park 	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1146baa489faSSeongJae Park 	 * When modifying the page via the page table, the page content change
1147baa489faSSeongJae Park 	 * must be visible via the pin.
1148baa489faSSeongJae Park 	 */
1149baa489faSSeongJae Park 	{
1150baa489faSSeongJae Park 		"R/O GUP pin on R/O-mapped exclusive page",
1151baa489faSSeongJae Park 		test_ro_pin_on_ro_exclusive,
1152baa489faSSeongJae Park 	},
1153baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1154baa489faSSeongJae Park 	{
1155baa489faSSeongJae Park 		"R/O GUP-fast pin on R/O-mapped exclusive page",
1156baa489faSSeongJae Park 		test_ro_fast_pin_on_ro_exclusive,
1157baa489faSSeongJae Park 	},
1158baa489faSSeongJae Park };
1159baa489faSSeongJae Park 
1160baa489faSSeongJae Park static void run_anon_test_case(struct test_case const *test_case)
1161baa489faSSeongJae Park {
1162baa489faSSeongJae Park 	int i;
1163baa489faSSeongJae Park 
1164baa489faSSeongJae Park 	run_with_base_page(test_case->fn, test_case->desc);
1165baa489faSSeongJae Park 	run_with_base_page_swap(test_case->fn, test_case->desc);
1166c0f79103SRyan Roberts 	for (i = 0; i < nr_thpsizes; i++) {
1167c0f79103SRyan Roberts 		size_t size = thpsizes[i];
1168c0f79103SRyan Roberts 		struct thp_settings settings = *thp_current_settings();
1169c0f79103SRyan Roberts 
1170c0f79103SRyan Roberts 		settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
1171c0f79103SRyan Roberts 		settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
1172c0f79103SRyan Roberts 		thp_push_settings(&settings);
1173c0f79103SRyan Roberts 
1174c0f79103SRyan Roberts 		if (size == pmdsize) {
1175c0f79103SRyan Roberts 			run_with_thp(test_case->fn, test_case->desc, size);
1176c0f79103SRyan Roberts 			run_with_thp_swap(test_case->fn, test_case->desc, size);
1177c0f79103SRyan Roberts 		}
1178c0f79103SRyan Roberts 
1179c0f79103SRyan Roberts 		run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
1180c0f79103SRyan Roberts 		run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
1181c0f79103SRyan Roberts 		run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
1182c0f79103SRyan Roberts 		run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
1183c0f79103SRyan Roberts 		run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
1184c0f79103SRyan Roberts 		run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
1185c0f79103SRyan Roberts 
1186c0f79103SRyan Roberts 		thp_pop_settings();
1187baa489faSSeongJae Park 	}
1188baa489faSSeongJae Park 	for (i = 0; i < nr_hugetlbsizes; i++)
1189baa489faSSeongJae Park 		run_with_hugetlb(test_case->fn, test_case->desc,
1190baa489faSSeongJae Park 				 hugetlbsizes[i]);
1191baa489faSSeongJae Park }
1192baa489faSSeongJae Park 
1193baa489faSSeongJae Park static void run_anon_test_cases(void)
1194baa489faSSeongJae Park {
1195baa489faSSeongJae Park 	int i;
1196baa489faSSeongJae Park 
1197baa489faSSeongJae Park 	ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1198baa489faSSeongJae Park 
1199baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1200baa489faSSeongJae Park 		run_anon_test_case(&anon_test_cases[i]);
1201baa489faSSeongJae Park }
1202baa489faSSeongJae Park 
1203baa489faSSeongJae Park static int tests_per_anon_test_case(void)
1204baa489faSSeongJae Park {
1205baa489faSSeongJae Park 	int tests = 2 + nr_hugetlbsizes;
1206baa489faSSeongJae Park 
1207c0f79103SRyan Roberts 	tests += 6 * nr_thpsizes;
120812dc16b3SRyan Roberts 	if (pmdsize)
1209c0f79103SRyan Roberts 		tests += 2;
1210baa489faSSeongJae Park 	return tests;
1211baa489faSSeongJae Park }
1212baa489faSSeongJae Park 
1213baa489faSSeongJae Park enum anon_thp_collapse_test {
1214baa489faSSeongJae Park 	ANON_THP_COLLAPSE_UNSHARED,
1215baa489faSSeongJae Park 	ANON_THP_COLLAPSE_FULLY_SHARED,
1216baa489faSSeongJae Park 	ANON_THP_COLLAPSE_LOWER_SHARED,
1217baa489faSSeongJae Park 	ANON_THP_COLLAPSE_UPPER_SHARED,
1218baa489faSSeongJae Park };
1219baa489faSSeongJae Park 
1220baa489faSSeongJae Park static void do_test_anon_thp_collapse(char *mem, size_t size,
1221baa489faSSeongJae Park 				      enum anon_thp_collapse_test test)
1222baa489faSSeongJae Park {
1223baa489faSSeongJae Park 	struct comm_pipes comm_pipes;
1224baa489faSSeongJae Park 	char buf;
1225baa489faSSeongJae Park 	int ret;
1226baa489faSSeongJae Park 
1227baa489faSSeongJae Park 	ret = setup_comm_pipes(&comm_pipes);
1228baa489faSSeongJae Park 	if (ret) {
1229baa489faSSeongJae Park 		ksft_test_result_fail("pipe() failed\n");
1230baa489faSSeongJae Park 		return;
1231baa489faSSeongJae Park 	}
1232baa489faSSeongJae Park 
1233baa489faSSeongJae Park 	/*
1234baa489faSSeongJae Park 	 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1235baa489faSSeongJae Park 	 * R/O, such that we can try collapsing it later.
1236baa489faSSeongJae Park 	 */
1237baa489faSSeongJae Park 	ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1238baa489faSSeongJae Park 	if (ret) {
1239baa489faSSeongJae Park 		ksft_test_result_fail("mprotect() failed\n");
1240baa489faSSeongJae Park 		goto close_comm_pipes;
1241baa489faSSeongJae Park 	}
1242baa489faSSeongJae Park 	ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1243baa489faSSeongJae Park 	if (ret) {
1244baa489faSSeongJae Park 		ksft_test_result_fail("mprotect() failed\n");
1245baa489faSSeongJae Park 		goto close_comm_pipes;
1246baa489faSSeongJae Park 	}
1247baa489faSSeongJae Park 
1248baa489faSSeongJae Park 	switch (test) {
1249baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UNSHARED:
1250baa489faSSeongJae Park 		/* Collapse before actually COW-sharing the page. */
1251baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_COLLAPSE);
1252baa489faSSeongJae Park 		if (ret) {
1253baa489faSSeongJae Park 			ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1254baa489faSSeongJae Park 					      strerror(errno));
1255baa489faSSeongJae Park 			goto close_comm_pipes;
1256baa489faSSeongJae Park 		}
1257baa489faSSeongJae Park 		break;
1258baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1259baa489faSSeongJae Park 		/* COW-share the full PTE-mapped THP. */
1260baa489faSSeongJae Park 		break;
1261baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1262baa489faSSeongJae Park 		/* Don't COW-share the upper part of the THP. */
1263baa489faSSeongJae Park 		ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1264baa489faSSeongJae Park 		if (ret) {
1265baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
1266baa489faSSeongJae Park 			goto close_comm_pipes;
1267baa489faSSeongJae Park 		}
1268baa489faSSeongJae Park 		break;
1269baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1270baa489faSSeongJae Park 		/* Don't COW-share the lower part of the THP. */
1271baa489faSSeongJae Park 		ret = madvise(mem, size / 2, MADV_DONTFORK);
1272baa489faSSeongJae Park 		if (ret) {
1273baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DONTFORK failed\n");
1274baa489faSSeongJae Park 			goto close_comm_pipes;
1275baa489faSSeongJae Park 		}
1276baa489faSSeongJae Park 		break;
1277baa489faSSeongJae Park 	default:
1278baa489faSSeongJae Park 		assert(false);
1279baa489faSSeongJae Park 	}
1280baa489faSSeongJae Park 
1281baa489faSSeongJae Park 	ret = fork();
1282baa489faSSeongJae Park 	if (ret < 0) {
1283baa489faSSeongJae Park 		ksft_test_result_fail("fork() failed\n");
1284baa489faSSeongJae Park 		goto close_comm_pipes;
1285baa489faSSeongJae Park 	} else if (!ret) {
1286baa489faSSeongJae Park 		switch (test) {
1287baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_UNSHARED:
1288baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_FULLY_SHARED:
1289baa489faSSeongJae Park 			exit(child_memcmp_fn(mem, size, &comm_pipes));
1290baa489faSSeongJae Park 			break;
1291baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_LOWER_SHARED:
1292baa489faSSeongJae Park 			exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1293baa489faSSeongJae Park 			break;
1294baa489faSSeongJae Park 		case ANON_THP_COLLAPSE_UPPER_SHARED:
1295baa489faSSeongJae Park 			exit(child_memcmp_fn(mem + size / 2, size / 2,
1296baa489faSSeongJae Park 					     &comm_pipes));
1297baa489faSSeongJae Park 			break;
1298baa489faSSeongJae Park 		default:
1299baa489faSSeongJae Park 			assert(false);
1300baa489faSSeongJae Park 		}
1301baa489faSSeongJae Park 	}
1302baa489faSSeongJae Park 
1303baa489faSSeongJae Park 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1304baa489faSSeongJae Park 		;
1305baa489faSSeongJae Park 
1306baa489faSSeongJae Park 	switch (test) {
1307baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UNSHARED:
1308baa489faSSeongJae Park 		break;
1309baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_UPPER_SHARED:
1310baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_LOWER_SHARED:
1311baa489faSSeongJae Park 		/*
1312baa489faSSeongJae Park 		 * Revert MADV_DONTFORK such that we merge the VMAs and are
1313baa489faSSeongJae Park 		 * able to actually collapse.
1314baa489faSSeongJae Park 		 */
1315baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_DOFORK);
1316baa489faSSeongJae Park 		if (ret) {
1317baa489faSSeongJae Park 			ksft_test_result_fail("MADV_DOFORK failed\n");
1318baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
1319baa489faSSeongJae Park 			wait(&ret);
1320baa489faSSeongJae Park 			goto close_comm_pipes;
1321baa489faSSeongJae Park 		}
1322baa489faSSeongJae Park 		/* FALLTHROUGH */
1323baa489faSSeongJae Park 	case ANON_THP_COLLAPSE_FULLY_SHARED:
1324baa489faSSeongJae Park 		/* Collapse before anyone modified the COW-shared page. */
1325baa489faSSeongJae Park 		ret = madvise(mem, size, MADV_COLLAPSE);
1326baa489faSSeongJae Park 		if (ret) {
1327baa489faSSeongJae Park 			ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1328baa489faSSeongJae Park 					      strerror(errno));
1329baa489faSSeongJae Park 			write(comm_pipes.parent_ready[1], "0", 1);
1330baa489faSSeongJae Park 			wait(&ret);
1331baa489faSSeongJae Park 			goto close_comm_pipes;
1332baa489faSSeongJae Park 		}
1333baa489faSSeongJae Park 		break;
1334baa489faSSeongJae Park 	default:
1335baa489faSSeongJae Park 		assert(false);
1336baa489faSSeongJae Park 	}
1337baa489faSSeongJae Park 
1338baa489faSSeongJae Park 	/* Modify the page. */
1339baa489faSSeongJae Park 	memset(mem, 0xff, size);
1340baa489faSSeongJae Park 	write(comm_pipes.parent_ready[1], "0", 1);
1341baa489faSSeongJae Park 
1342baa489faSSeongJae Park 	wait(&ret);
1343baa489faSSeongJae Park 	if (WIFEXITED(ret))
1344baa489faSSeongJae Park 		ret = WEXITSTATUS(ret);
1345baa489faSSeongJae Park 	else
1346baa489faSSeongJae Park 		ret = -EINVAL;
1347baa489faSSeongJae Park 
1348baa489faSSeongJae Park 	ksft_test_result(!ret, "No leak from parent into child\n");
1349baa489faSSeongJae Park close_comm_pipes:
1350baa489faSSeongJae Park 	close_comm_pipes(&comm_pipes);
1351baa489faSSeongJae Park }
1352baa489faSSeongJae Park 
1353*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_unshared(char *mem, size_t size,
1354*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
1355baa489faSSeongJae Park {
1356*4bf6a4ebSDavid Hildenbrand 	assert(!is_hugetlb);
1357baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1358baa489faSSeongJae Park }
1359baa489faSSeongJae Park 
1360*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
1361*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
1362baa489faSSeongJae Park {
1363*4bf6a4ebSDavid Hildenbrand 	assert(!is_hugetlb);
1364baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1365baa489faSSeongJae Park }
1366baa489faSSeongJae Park 
1367*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
1368*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
1369baa489faSSeongJae Park {
1370*4bf6a4ebSDavid Hildenbrand 	assert(!is_hugetlb);
1371baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1372baa489faSSeongJae Park }
1373baa489faSSeongJae Park 
1374*4bf6a4ebSDavid Hildenbrand static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
1375*4bf6a4ebSDavid Hildenbrand 		bool is_hugetlb)
1376baa489faSSeongJae Park {
1377*4bf6a4ebSDavid Hildenbrand 	assert(!is_hugetlb);
1378baa489faSSeongJae Park 	do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1379baa489faSSeongJae Park }
1380baa489faSSeongJae Park 
1381baa489faSSeongJae Park /*
1382baa489faSSeongJae Park  * Test cases that are specific to anonymous THP: pages in private mappings
1383baa489faSSeongJae Park  * that may get shared via COW during fork().
1384baa489faSSeongJae Park  */
1385baa489faSSeongJae Park static const struct test_case anon_thp_test_cases[] = {
1386baa489faSSeongJae Park 	/*
1387baa489faSSeongJae Park 	 * Basic COW test for fork() without any GUP when collapsing a THP
1388baa489faSSeongJae Park 	 * before fork().
1389baa489faSSeongJae Park 	 *
1390baa489faSSeongJae Park 	 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1391baa489faSSeongJae Park 	 * collapse") might easily get COW handling wrong when not collapsing
1392baa489faSSeongJae Park 	 * exclusivity information properly.
1393baa489faSSeongJae Park 	 */
1394baa489faSSeongJae Park 	{
1395baa489faSSeongJae Park 		"Basic COW after fork() when collapsing before fork()",
1396baa489faSSeongJae Park 		test_anon_thp_collapse_unshared,
1397baa489faSSeongJae Park 	},
1398baa489faSSeongJae Park 	/* Basic COW test, but collapse after COW-sharing a full THP. */
1399baa489faSSeongJae Park 	{
1400baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (fully shared)",
1401baa489faSSeongJae Park 		test_anon_thp_collapse_fully_shared,
1402baa489faSSeongJae Park 	},
1403baa489faSSeongJae Park 	/*
1404baa489faSSeongJae Park 	 * Basic COW test, but collapse after COW-sharing the lower half of a
1405baa489faSSeongJae Park 	 * THP.
1406baa489faSSeongJae Park 	 */
1407baa489faSSeongJae Park 	{
1408baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (lower shared)",
1409baa489faSSeongJae Park 		test_anon_thp_collapse_lower_shared,
1410baa489faSSeongJae Park 	},
1411baa489faSSeongJae Park 	/*
1412baa489faSSeongJae Park 	 * Basic COW test, but collapse after COW-sharing the upper half of a
1413baa489faSSeongJae Park 	 * THP.
1414baa489faSSeongJae Park 	 */
1415baa489faSSeongJae Park 	{
1416baa489faSSeongJae Park 		"Basic COW after fork() when collapsing after fork() (upper shared)",
1417baa489faSSeongJae Park 		test_anon_thp_collapse_upper_shared,
1418baa489faSSeongJae Park 	},
1419baa489faSSeongJae Park };
1420baa489faSSeongJae Park 
1421baa489faSSeongJae Park static void run_anon_thp_test_cases(void)
1422baa489faSSeongJae Park {
1423baa489faSSeongJae Park 	int i;
1424baa489faSSeongJae Park 
142512dc16b3SRyan Roberts 	if (!pmdsize)
1426baa489faSSeongJae Park 		return;
1427baa489faSSeongJae Park 
1428baa489faSSeongJae Park 	ksft_print_msg("[INFO] Anonymous THP tests\n");
1429baa489faSSeongJae Park 
1430baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1431baa489faSSeongJae Park 		struct test_case const *test_case = &anon_thp_test_cases[i];
1432baa489faSSeongJae Park 
1433baa489faSSeongJae Park 		ksft_print_msg("[RUN] %s\n", test_case->desc);
143412dc16b3SRyan Roberts 		do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
1435baa489faSSeongJae Park 	}
1436baa489faSSeongJae Park }
1437baa489faSSeongJae Park 
1438baa489faSSeongJae Park static int tests_per_anon_thp_test_case(void)
1439baa489faSSeongJae Park {
144012dc16b3SRyan Roberts 	return pmdsize ? 1 : 0;
1441baa489faSSeongJae Park }
1442baa489faSSeongJae Park 
1443baa489faSSeongJae Park typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1444baa489faSSeongJae Park 
1445baa489faSSeongJae Park static void test_cow(char *mem, const char *smem, size_t size)
1446baa489faSSeongJae Park {
1447baa489faSSeongJae Park 	char *old = malloc(size);
1448baa489faSSeongJae Park 
1449baa489faSSeongJae Park 	/* Backup the original content. */
1450baa489faSSeongJae Park 	memcpy(old, smem, size);
1451baa489faSSeongJae Park 
1452baa489faSSeongJae Park 	/* Modify the page. */
1453baa489faSSeongJae Park 	memset(mem, 0xff, size);
1454baa489faSSeongJae Park 
1455baa489faSSeongJae Park 	/* See if we still read the old values via the other mapping. */
1456baa489faSSeongJae Park 	ksft_test_result(!memcmp(smem, old, size),
1457baa489faSSeongJae Park 			 "Other mapping not modified\n");
1458baa489faSSeongJae Park 	free(old);
1459baa489faSSeongJae Park }
1460baa489faSSeongJae Park 
1461baa489faSSeongJae Park static void test_ro_pin(char *mem, const char *smem, size_t size)
1462baa489faSSeongJae Park {
1463baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1464baa489faSSeongJae Park }
1465baa489faSSeongJae Park 
1466baa489faSSeongJae Park static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1467baa489faSSeongJae Park {
1468baa489faSSeongJae Park 	do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1469baa489faSSeongJae Park }
1470baa489faSSeongJae Park 
1471baa489faSSeongJae Park static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1472baa489faSSeongJae Park {
1473baa489faSSeongJae Park 	char *mem, *smem, tmp;
1474baa489faSSeongJae Park 
1475baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
1476baa489faSSeongJae Park 
1477baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1478baa489faSSeongJae Park 		   MAP_PRIVATE | MAP_ANON, -1, 0);
1479baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1480baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1481baa489faSSeongJae Park 		return;
1482baa489faSSeongJae Park 	}
1483baa489faSSeongJae Park 
1484baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1485baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1486baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1487baa489faSSeongJae Park 		goto munmap;
1488baa489faSSeongJae Park 	}
1489baa489faSSeongJae Park 
1490baa489faSSeongJae Park 	/* Read from the page to populate the shared zeropage. */
1491baa489faSSeongJae Park 	tmp = *mem + *smem;
1492baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1493baa489faSSeongJae Park 
1494baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1495baa489faSSeongJae Park munmap:
1496baa489faSSeongJae Park 	munmap(mem, pagesize);
1497baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1498baa489faSSeongJae Park 		munmap(smem, pagesize);
1499baa489faSSeongJae Park }
1500baa489faSSeongJae Park 
1501baa489faSSeongJae Park static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1502baa489faSSeongJae Park {
1503baa489faSSeongJae Park 	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1504baa489faSSeongJae Park 	size_t mmap_size;
1505baa489faSSeongJae Park 	int ret;
1506baa489faSSeongJae Park 
1507baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
1508baa489faSSeongJae Park 
1509baa489faSSeongJae Park 	if (!has_huge_zeropage) {
1510baa489faSSeongJae Park 		ksft_test_result_skip("Huge zeropage not enabled\n");
1511baa489faSSeongJae Park 		return;
1512baa489faSSeongJae Park 	}
1513baa489faSSeongJae Park 
1514baa489faSSeongJae Park 	/* For alignment purposes, we need twice the thp size. */
151512dc16b3SRyan Roberts 	mmap_size = 2 * pmdsize;
1516baa489faSSeongJae Park 	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1517baa489faSSeongJae Park 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1518baa489faSSeongJae Park 	if (mmap_mem == MAP_FAILED) {
1519baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1520baa489faSSeongJae Park 		return;
1521baa489faSSeongJae Park 	}
1522baa489faSSeongJae Park 	mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1523baa489faSSeongJae Park 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1524baa489faSSeongJae Park 	if (mmap_smem == MAP_FAILED) {
1525baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1526baa489faSSeongJae Park 		goto munmap;
1527baa489faSSeongJae Park 	}
1528baa489faSSeongJae Park 
1529baa489faSSeongJae Park 	/* We need a THP-aligned memory area. */
153012dc16b3SRyan Roberts 	mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
153112dc16b3SRyan Roberts 	smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
1532baa489faSSeongJae Park 
153312dc16b3SRyan Roberts 	ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
153412dc16b3SRyan Roberts 	ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
1535baa489faSSeongJae Park 	if (ret) {
1536baa489faSSeongJae Park 		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
1537baa489faSSeongJae Park 		goto munmap;
1538baa489faSSeongJae Park 	}
1539baa489faSSeongJae Park 
1540baa489faSSeongJae Park 	/*
1541baa489faSSeongJae Park 	 * Read from the memory to populate the huge shared zeropage. Read from
1542baa489faSSeongJae Park 	 * the first sub-page and test if we get another sub-page populated
1543baa489faSSeongJae Park 	 * automatically.
1544baa489faSSeongJae Park 	 */
1545baa489faSSeongJae Park 	tmp = *mem + *smem;
1546baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1547baa489faSSeongJae Park 	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1548baa489faSSeongJae Park 	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1549baa489faSSeongJae Park 		ksft_test_result_skip("Did not get THPs populated\n");
1550baa489faSSeongJae Park 		goto munmap;
1551baa489faSSeongJae Park 	}
1552baa489faSSeongJae Park 
155312dc16b3SRyan Roberts 	fn(mem, smem, pmdsize);
1554baa489faSSeongJae Park munmap:
1555baa489faSSeongJae Park 	munmap(mmap_mem, mmap_size);
1556baa489faSSeongJae Park 	if (mmap_smem != MAP_FAILED)
1557baa489faSSeongJae Park 		munmap(mmap_smem, mmap_size);
1558baa489faSSeongJae Park }
1559baa489faSSeongJae Park 
1560baa489faSSeongJae Park static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1561baa489faSSeongJae Park {
1562baa489faSSeongJae Park 	char *mem, *smem, tmp;
1563baa489faSSeongJae Park 	int fd;
1564baa489faSSeongJae Park 
1565baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
1566baa489faSSeongJae Park 
1567baa489faSSeongJae Park 	fd = memfd_create("test", 0);
1568baa489faSSeongJae Park 	if (fd < 0) {
1569baa489faSSeongJae Park 		ksft_test_result_fail("memfd_create() failed\n");
1570baa489faSSeongJae Park 		return;
1571baa489faSSeongJae Park 	}
1572baa489faSSeongJae Park 
1573baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1574baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, pagesize)) {
1575baa489faSSeongJae Park 		ksft_test_result_fail("fallocate() failed\n");
1576baa489faSSeongJae Park 		goto close;
1577baa489faSSeongJae Park 	}
1578baa489faSSeongJae Park 
1579baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1580baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1581baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1582baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1583baa489faSSeongJae Park 		goto close;
1584baa489faSSeongJae Park 	}
1585baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1586baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1587baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1588baa489faSSeongJae Park 		goto munmap;
1589baa489faSSeongJae Park 	}
1590baa489faSSeongJae Park 
1591baa489faSSeongJae Park 	/* Fault the page in. */
1592baa489faSSeongJae Park 	tmp = *mem + *smem;
1593baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1594baa489faSSeongJae Park 
1595baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1596baa489faSSeongJae Park munmap:
1597baa489faSSeongJae Park 	munmap(mem, pagesize);
1598baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1599baa489faSSeongJae Park 		munmap(smem, pagesize);
1600baa489faSSeongJae Park close:
1601baa489faSSeongJae Park 	close(fd);
1602baa489faSSeongJae Park }
1603baa489faSSeongJae Park 
1604baa489faSSeongJae Park static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1605baa489faSSeongJae Park {
1606baa489faSSeongJae Park 	char *mem, *smem, tmp;
1607baa489faSSeongJae Park 	FILE *file;
1608baa489faSSeongJae Park 	int fd;
1609baa489faSSeongJae Park 
1610baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
1611baa489faSSeongJae Park 
1612baa489faSSeongJae Park 	file = tmpfile();
1613baa489faSSeongJae Park 	if (!file) {
1614baa489faSSeongJae Park 		ksft_test_result_fail("tmpfile() failed\n");
1615baa489faSSeongJae Park 		return;
1616baa489faSSeongJae Park 	}
1617baa489faSSeongJae Park 
1618baa489faSSeongJae Park 	fd = fileno(file);
1619baa489faSSeongJae Park 	if (fd < 0) {
1620baa489faSSeongJae Park 		ksft_test_result_skip("fileno() failed\n");
1621baa489faSSeongJae Park 		return;
1622baa489faSSeongJae Park 	}
1623baa489faSSeongJae Park 
1624baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1625baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, pagesize)) {
1626baa489faSSeongJae Park 		ksft_test_result_fail("fallocate() failed\n");
1627baa489faSSeongJae Park 		goto close;
1628baa489faSSeongJae Park 	}
1629baa489faSSeongJae Park 
1630baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1631baa489faSSeongJae Park 	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1632baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1633baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1634baa489faSSeongJae Park 		goto close;
1635baa489faSSeongJae Park 	}
1636baa489faSSeongJae Park 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1637baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1638baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1639baa489faSSeongJae Park 		goto munmap;
1640baa489faSSeongJae Park 	}
1641baa489faSSeongJae Park 
1642baa489faSSeongJae Park 	/* Fault the page in. */
1643baa489faSSeongJae Park 	tmp = *mem + *smem;
1644baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1645baa489faSSeongJae Park 
1646baa489faSSeongJae Park 	fn(mem, smem, pagesize);
1647baa489faSSeongJae Park munmap:
1648baa489faSSeongJae Park 	munmap(mem, pagesize);
1649baa489faSSeongJae Park 	if (smem != MAP_FAILED)
1650baa489faSSeongJae Park 		munmap(smem, pagesize);
1651baa489faSSeongJae Park close:
1652baa489faSSeongJae Park 	fclose(file);
1653baa489faSSeongJae Park }
1654baa489faSSeongJae Park 
1655baa489faSSeongJae Park static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1656baa489faSSeongJae Park 				   size_t hugetlbsize)
1657baa489faSSeongJae Park {
1658baa489faSSeongJae Park 	int flags = MFD_HUGETLB;
1659baa489faSSeongJae Park 	char *mem, *smem, tmp;
1660baa489faSSeongJae Park 	int fd;
1661baa489faSSeongJae Park 
1662baa489faSSeongJae Park 	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
1663baa489faSSeongJae Park 		       hugetlbsize / 1024);
1664baa489faSSeongJae Park 
1665baa489faSSeongJae Park 	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1666baa489faSSeongJae Park 
1667baa489faSSeongJae Park 	fd = memfd_create("test", flags);
1668baa489faSSeongJae Park 	if (fd < 0) {
1669baa489faSSeongJae Park 		ksft_test_result_skip("memfd_create() failed\n");
1670baa489faSSeongJae Park 		return;
1671baa489faSSeongJae Park 	}
1672baa489faSSeongJae Park 
1673baa489faSSeongJae Park 	/* File consists of a single page filled with zeroes. */
1674baa489faSSeongJae Park 	if (fallocate(fd, 0, 0, hugetlbsize)) {
1675baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1676baa489faSSeongJae Park 		goto close;
1677baa489faSSeongJae Park 	}
1678baa489faSSeongJae Park 
1679baa489faSSeongJae Park 	/* Create a private mapping of the memfd. */
1680baa489faSSeongJae Park 	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1681baa489faSSeongJae Park 		   0);
1682baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1683baa489faSSeongJae Park 		ksft_test_result_skip("need more free huge pages\n");
1684baa489faSSeongJae Park 		goto close;
1685baa489faSSeongJae Park 	}
1686baa489faSSeongJae Park 	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1687baa489faSSeongJae Park 	if (mem == MAP_FAILED) {
1688baa489faSSeongJae Park 		ksft_test_result_fail("mmap() failed\n");
1689baa489faSSeongJae Park 		goto munmap;
1690baa489faSSeongJae Park 	}
1691baa489faSSeongJae Park 
1692baa489faSSeongJae Park 	/* Fault the page in. */
1693baa489faSSeongJae Park 	tmp = *mem + *smem;
1694baa489faSSeongJae Park 	asm volatile("" : "+r" (tmp));
1695baa489faSSeongJae Park 
1696baa489faSSeongJae Park 	fn(mem, smem, hugetlbsize);
1697baa489faSSeongJae Park munmap:
1698baa489faSSeongJae Park 	munmap(mem, hugetlbsize);
1699baa489faSSeongJae Park 	if (mem != MAP_FAILED)
1700baa489faSSeongJae Park 		munmap(smem, hugetlbsize);
1701baa489faSSeongJae Park close:
1702baa489faSSeongJae Park 	close(fd);
1703baa489faSSeongJae Park }
1704baa489faSSeongJae Park 
1705baa489faSSeongJae Park struct non_anon_test_case {
1706baa489faSSeongJae Park 	const char *desc;
1707baa489faSSeongJae Park 	non_anon_test_fn fn;
1708baa489faSSeongJae Park };
1709baa489faSSeongJae Park 
1710baa489faSSeongJae Park /*
1711baa489faSSeongJae Park  * Test cases that target any pages in private mappings that are not anonymous:
1712baa489faSSeongJae Park  * pages that may get shared via COW ndependent of fork(). This includes
1713baa489faSSeongJae Park  * the shared zeropage(s), pagecache pages, ...
1714baa489faSSeongJae Park  */
1715baa489faSSeongJae Park static const struct non_anon_test_case non_anon_test_cases[] = {
1716baa489faSSeongJae Park 	/*
1717baa489faSSeongJae Park 	 * Basic COW test without any GUP. If we miss to break COW, changes are
1718baa489faSSeongJae Park 	 * visible via other private/shared mappings.
1719baa489faSSeongJae Park 	 */
1720baa489faSSeongJae Park 	{
1721baa489faSSeongJae Park 		"Basic COW",
1722baa489faSSeongJae Park 		test_cow,
1723baa489faSSeongJae Park 	},
1724baa489faSSeongJae Park 	/*
1725baa489faSSeongJae Park 	 * Take a R/O longterm pin. When modifying the page via the page table,
1726baa489faSSeongJae Park 	 * the page content change must be visible via the pin.
1727baa489faSSeongJae Park 	 */
1728baa489faSSeongJae Park 	{
1729baa489faSSeongJae Park 		"R/O longterm GUP pin",
1730baa489faSSeongJae Park 		test_ro_pin,
1731baa489faSSeongJae Park 	},
1732baa489faSSeongJae Park 	/* Same as above, but using GUP-fast. */
1733baa489faSSeongJae Park 	{
1734baa489faSSeongJae Park 		"R/O longterm GUP-fast pin",
1735baa489faSSeongJae Park 		test_ro_fast_pin,
1736baa489faSSeongJae Park 	},
1737baa489faSSeongJae Park };
1738baa489faSSeongJae Park 
1739baa489faSSeongJae Park static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1740baa489faSSeongJae Park {
1741baa489faSSeongJae Park 	int i;
1742baa489faSSeongJae Park 
1743baa489faSSeongJae Park 	run_with_zeropage(test_case->fn, test_case->desc);
1744baa489faSSeongJae Park 	run_with_memfd(test_case->fn, test_case->desc);
1745baa489faSSeongJae Park 	run_with_tmpfile(test_case->fn, test_case->desc);
174612dc16b3SRyan Roberts 	if (pmdsize)
1747baa489faSSeongJae Park 		run_with_huge_zeropage(test_case->fn, test_case->desc);
1748baa489faSSeongJae Park 	for (i = 0; i < nr_hugetlbsizes; i++)
1749baa489faSSeongJae Park 		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1750baa489faSSeongJae Park 				       hugetlbsizes[i]);
1751baa489faSSeongJae Park }
1752baa489faSSeongJae Park 
1753baa489faSSeongJae Park static void run_non_anon_test_cases(void)
1754baa489faSSeongJae Park {
1755baa489faSSeongJae Park 	int i;
1756baa489faSSeongJae Park 
1757baa489faSSeongJae Park 	ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1758baa489faSSeongJae Park 
1759baa489faSSeongJae Park 	for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1760baa489faSSeongJae Park 		run_non_anon_test_case(&non_anon_test_cases[i]);
1761baa489faSSeongJae Park }
1762baa489faSSeongJae Park 
1763baa489faSSeongJae Park static int tests_per_non_anon_test_case(void)
1764baa489faSSeongJae Park {
1765baa489faSSeongJae Park 	int tests = 3 + nr_hugetlbsizes;
1766baa489faSSeongJae Park 
176712dc16b3SRyan Roberts 	if (pmdsize)
1768baa489faSSeongJae Park 		tests += 1;
1769baa489faSSeongJae Park 	return tests;
1770baa489faSSeongJae Park }
1771baa489faSSeongJae Park 
1772baa489faSSeongJae Park int main(int argc, char **argv)
1773baa489faSSeongJae Park {
1774baa489faSSeongJae Park 	int err;
1775c0f79103SRyan Roberts 	struct thp_settings default_settings;
1776baa489faSSeongJae Park 
1777a6fcd57cSDavid Hildenbrand 	ksft_print_header();
1778a6fcd57cSDavid Hildenbrand 
1779baa489faSSeongJae Park 	pagesize = getpagesize();
178012dc16b3SRyan Roberts 	pmdsize = read_pmd_pagesize();
178112dc16b3SRyan Roberts 	if (pmdsize) {
1782c0f79103SRyan Roberts 		/* Only if THP is supported. */
1783c0f79103SRyan Roberts 		thp_read_settings(&default_settings);
1784c0f79103SRyan Roberts 		default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
1785c0f79103SRyan Roberts 		thp_save_settings();
1786c0f79103SRyan Roberts 		thp_push_settings(&default_settings);
1787c0f79103SRyan Roberts 
178812dc16b3SRyan Roberts 		ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
178912dc16b3SRyan Roberts 			       pmdsize / 1024);
1790c0f79103SRyan Roberts 		nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
179112dc16b3SRyan Roberts 	}
179281b1e3f9SDavid Hildenbrand 	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
179381b1e3f9SDavid Hildenbrand 						    ARRAY_SIZE(hugetlbsizes));
1794baa489faSSeongJae Park 	detect_huge_zeropage();
1795baa489faSSeongJae Park 
1796baa489faSSeongJae Park 	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1797baa489faSSeongJae Park 		      ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1798baa489faSSeongJae Park 		      ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1799baa489faSSeongJae Park 
1800baa489faSSeongJae Park 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1801baa489faSSeongJae Park 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1802baa489faSSeongJae Park 	if (pagemap_fd < 0)
1803baa489faSSeongJae Park 		ksft_exit_fail_msg("opening pagemap failed\n");
1804baa489faSSeongJae Park 
1805baa489faSSeongJae Park 	run_anon_test_cases();
1806baa489faSSeongJae Park 	run_anon_thp_test_cases();
1807baa489faSSeongJae Park 	run_non_anon_test_cases();
1808baa489faSSeongJae Park 
1809c0f79103SRyan Roberts 	if (pmdsize) {
1810c0f79103SRyan Roberts 		/* Only if THP is supported. */
1811c0f79103SRyan Roberts 		thp_restore_settings();
1812c0f79103SRyan Roberts 	}
1813c0f79103SRyan Roberts 
1814baa489faSSeongJae Park 	err = ksft_get_fail_cnt();
1815baa489faSSeongJae Park 	if (err)
1816baa489faSSeongJae Park 		ksft_exit_fail_msg("%d out of %d tests failed\n",
1817baa489faSSeongJae Park 				   err, ksft_test_num());
181869e545edSNathan Chancellor 	ksft_exit_pass();
1819baa489faSSeongJae Park }
1820