xref: /linux/tools/testing/selftests/memfd/memfd_test.c (revision f9aa1fb9f8c0542f5f6e6e620de320995d5622ad)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <signal.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/stat.h>
19 #include <sys/syscall.h>
20 #include <sys/wait.h>
21 #include <unistd.h>
22 #include <ctype.h>
23 
24 #include "common.h"
25 
26 #define MEMFD_STR	"memfd:"
27 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
28 #define SHARED_FT_STR	"(shared file-table)"
29 
30 #define MFD_DEF_SIZE 8192
31 #define STACK_SIZE 65536
32 
33 #define F_SEAL_EXEC	0x0020
34 
35 #define F_WX_SEALS (F_SEAL_SHRINK | \
36 		    F_SEAL_GROW | \
37 		    F_SEAL_WRITE | \
38 		    F_SEAL_FUTURE_WRITE | \
39 		    F_SEAL_EXEC)
40 
41 #define MFD_NOEXEC_SEAL	0x0008U
42 
43 /*
44  * Default is not to test hugetlbfs
45  */
46 static size_t mfd_def_size = MFD_DEF_SIZE;
47 static const char *memfd_str = MEMFD_STR;
48 
49 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
50 {
51 	char buf1[PATH_MAX];
52 	int size;
53 	ssize_t nbytes;
54 
55 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
56 	if (size < 0) {
57 		printf("snprintf(%d) failed on %m\n", fd);
58 		abort();
59 	}
60 
61 	/*
62 	 * reserver one byte for string termination.
63 	 */
64 	nbytes = readlink(buf1, buf, bufsize-1);
65 	if (nbytes == -1) {
66 		printf("readlink(%s) failed %m\n", buf1);
67 		abort();
68 	}
69 	buf[nbytes] = '\0';
70 	return nbytes;
71 }
72 
73 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
74 {
75 	int r, fd;
76 
77 	fd = sys_memfd_create(name, flags);
78 	if (fd < 0) {
79 		printf("memfd_create(\"%s\", %u) failed: %m\n",
80 		       name, flags);
81 		abort();
82 	}
83 
84 	r = ftruncate(fd, sz);
85 	if (r < 0) {
86 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
87 		abort();
88 	}
89 
90 	return fd;
91 }
92 
93 static void sysctl_assert_write(const char *val)
94 {
95 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
96 
97 	if (fd < 0) {
98 		printf("open sysctl failed: %m\n");
99 		abort();
100 	}
101 
102 	if (write(fd, val, strlen(val)) < 0) {
103 		printf("write sysctl %s failed: %m\n", val);
104 		abort();
105 	}
106 }
107 
108 static void sysctl_fail_write(const char *val)
109 {
110 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
111 
112 	if (fd < 0) {
113 		printf("open sysctl failed: %m\n");
114 		abort();
115 	}
116 
117 	if (write(fd, val, strlen(val)) >= 0) {
118 		printf("write sysctl %s succeeded, but failure expected\n",
119 				val);
120 		abort();
121 	}
122 }
123 
124 static void sysctl_assert_equal(const char *val)
125 {
126 	char *p, buf[128] = {};
127 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
128 
129 	if (fd < 0) {
130 		printf("open sysctl failed: %m\n");
131 		abort();
132 	}
133 
134 	if (read(fd, buf, sizeof(buf)) < 0) {
135 		printf("read sysctl failed: %m\n");
136 		abort();
137 	}
138 
139 	/* Strip trailing whitespace. */
140 	p = buf;
141 	while (!isspace(*p))
142 		p++;
143 	*p = '\0';
144 
145 	if (strcmp(buf, val) != 0) {
146 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
147 		abort();
148 	}
149 }
150 
151 static int mfd_assert_reopen_fd(int fd_in)
152 {
153 	int fd;
154 	char path[100];
155 
156 	sprintf(path, "/proc/self/fd/%d", fd_in);
157 
158 	fd = open(path, O_RDWR);
159 	if (fd < 0) {
160 		printf("re-open of existing fd %d failed\n", fd_in);
161 		abort();
162 	}
163 
164 	return fd;
165 }
166 
167 static void mfd_fail_new(const char *name, unsigned int flags)
168 {
169 	int r;
170 
171 	r = sys_memfd_create(name, flags);
172 	if (r >= 0) {
173 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
174 		       name, flags);
175 		close(r);
176 		abort();
177 	}
178 }
179 
180 static unsigned int mfd_assert_get_seals(int fd)
181 {
182 	int r;
183 
184 	r = fcntl(fd, F_GET_SEALS);
185 	if (r < 0) {
186 		printf("GET_SEALS(%d) failed: %m\n", fd);
187 		abort();
188 	}
189 
190 	return (unsigned int)r;
191 }
192 
193 static void mfd_assert_has_seals(int fd, unsigned int seals)
194 {
195 	char buf[PATH_MAX];
196 	unsigned int s;
197 	fd2name(fd, buf, PATH_MAX);
198 
199 	s = mfd_assert_get_seals(fd);
200 	if (s != seals) {
201 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
202 		abort();
203 	}
204 }
205 
206 static void mfd_assert_add_seals(int fd, unsigned int seals)
207 {
208 	int r;
209 	unsigned int s;
210 
211 	s = mfd_assert_get_seals(fd);
212 	r = fcntl(fd, F_ADD_SEALS, seals);
213 	if (r < 0) {
214 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
215 		abort();
216 	}
217 }
218 
219 static void mfd_fail_add_seals(int fd, unsigned int seals)
220 {
221 	int r;
222 	unsigned int s;
223 
224 	r = fcntl(fd, F_GET_SEALS);
225 	if (r < 0)
226 		s = 0;
227 	else
228 		s = (unsigned int)r;
229 
230 	r = fcntl(fd, F_ADD_SEALS, seals);
231 	if (r >= 0) {
232 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
233 				fd, s, seals);
234 		abort();
235 	}
236 }
237 
238 static void mfd_assert_size(int fd, size_t size)
239 {
240 	struct stat st;
241 	int r;
242 
243 	r = fstat(fd, &st);
244 	if (r < 0) {
245 		printf("fstat(%d) failed: %m\n", fd);
246 		abort();
247 	} else if (st.st_size != size) {
248 		printf("wrong file size %lld, but expected %lld\n",
249 		       (long long)st.st_size, (long long)size);
250 		abort();
251 	}
252 }
253 
254 static int mfd_assert_dup(int fd)
255 {
256 	int r;
257 
258 	r = dup(fd);
259 	if (r < 0) {
260 		printf("dup(%d) failed: %m\n", fd);
261 		abort();
262 	}
263 
264 	return r;
265 }
266 
267 static void *mfd_assert_mmap_shared(int fd)
268 {
269 	void *p;
270 
271 	p = mmap(NULL,
272 		 mfd_def_size,
273 		 PROT_READ | PROT_WRITE,
274 		 MAP_SHARED,
275 		 fd,
276 		 0);
277 	if (p == MAP_FAILED) {
278 		printf("mmap() failed: %m\n");
279 		abort();
280 	}
281 
282 	return p;
283 }
284 
285 static void *mfd_assert_mmap_private(int fd)
286 {
287 	void *p;
288 
289 	p = mmap(NULL,
290 		 mfd_def_size,
291 		 PROT_READ,
292 		 MAP_PRIVATE,
293 		 fd,
294 		 0);
295 	if (p == MAP_FAILED) {
296 		printf("mmap() failed: %m\n");
297 		abort();
298 	}
299 
300 	return p;
301 }
302 
303 static int mfd_assert_open(int fd, int flags, mode_t mode)
304 {
305 	char buf[512];
306 	int r;
307 
308 	sprintf(buf, "/proc/self/fd/%d", fd);
309 	r = open(buf, flags, mode);
310 	if (r < 0) {
311 		printf("open(%s) failed: %m\n", buf);
312 		abort();
313 	}
314 
315 	return r;
316 }
317 
318 static void mfd_fail_open(int fd, int flags, mode_t mode)
319 {
320 	char buf[512];
321 	int r;
322 
323 	sprintf(buf, "/proc/self/fd/%d", fd);
324 	r = open(buf, flags, mode);
325 	if (r >= 0) {
326 		printf("open(%s) didn't fail as expected\n", buf);
327 		abort();
328 	}
329 }
330 
331 static void mfd_assert_read(int fd)
332 {
333 	char buf[16];
334 	void *p;
335 	ssize_t l;
336 
337 	l = read(fd, buf, sizeof(buf));
338 	if (l != sizeof(buf)) {
339 		printf("read() failed: %m\n");
340 		abort();
341 	}
342 
343 	/* verify PROT_READ *is* allowed */
344 	p = mmap(NULL,
345 		 mfd_def_size,
346 		 PROT_READ,
347 		 MAP_PRIVATE,
348 		 fd,
349 		 0);
350 	if (p == MAP_FAILED) {
351 		printf("mmap() failed: %m\n");
352 		abort();
353 	}
354 	munmap(p, mfd_def_size);
355 
356 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
357 	p = mmap(NULL,
358 		 mfd_def_size,
359 		 PROT_READ | PROT_WRITE,
360 		 MAP_PRIVATE,
361 		 fd,
362 		 0);
363 	if (p == MAP_FAILED) {
364 		printf("mmap() failed: %m\n");
365 		abort();
366 	}
367 	munmap(p, mfd_def_size);
368 }
369 
370 /* Test that PROT_READ + MAP_SHARED mappings work. */
371 static void mfd_assert_read_shared(int fd)
372 {
373 	void *p;
374 
375 	/* verify PROT_READ and MAP_SHARED *is* allowed */
376 	p = mmap(NULL,
377 		 mfd_def_size,
378 		 PROT_READ,
379 		 MAP_SHARED,
380 		 fd,
381 		 0);
382 	if (p == MAP_FAILED) {
383 		printf("mmap() failed: %m\n");
384 		abort();
385 	}
386 	munmap(p, mfd_def_size);
387 }
388 
389 static void mfd_assert_fork_private_write(int fd)
390 {
391 	int *p;
392 	pid_t pid;
393 
394 	p = mmap(NULL,
395 		 mfd_def_size,
396 		 PROT_READ | PROT_WRITE,
397 		 MAP_PRIVATE,
398 		 fd,
399 		 0);
400 	if (p == MAP_FAILED) {
401 		printf("mmap() failed: %m\n");
402 		abort();
403 	}
404 
405 	p[0] = 22;
406 
407 	pid = fork();
408 	if (pid == 0) {
409 		p[0] = 33;
410 		exit(0);
411 	} else {
412 		waitpid(pid, NULL, 0);
413 
414 		if (p[0] != 22) {
415 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
416 			abort();
417 		}
418 	}
419 
420 	munmap(p, mfd_def_size);
421 }
422 
423 static void mfd_assert_write(int fd)
424 {
425 	ssize_t l;
426 	void *p;
427 	int r;
428 
429 	/*
430 	 * huegtlbfs does not support write, but we want to
431 	 * verify everything else here.
432 	 */
433 	if (!hugetlbfs_test) {
434 		/* verify write() succeeds */
435 		l = write(fd, "\0\0\0\0", 4);
436 		if (l != 4) {
437 			printf("write() failed: %m\n");
438 			abort();
439 		}
440 	}
441 
442 	/* verify PROT_READ | PROT_WRITE is allowed */
443 	p = mmap(NULL,
444 		 mfd_def_size,
445 		 PROT_READ | PROT_WRITE,
446 		 MAP_SHARED,
447 		 fd,
448 		 0);
449 	if (p == MAP_FAILED) {
450 		printf("mmap() failed: %m\n");
451 		abort();
452 	}
453 	*(char *)p = 0;
454 	munmap(p, mfd_def_size);
455 
456 	/* verify PROT_WRITE is allowed */
457 	p = mmap(NULL,
458 		 mfd_def_size,
459 		 PROT_WRITE,
460 		 MAP_SHARED,
461 		 fd,
462 		 0);
463 	if (p == MAP_FAILED) {
464 		printf("mmap() failed: %m\n");
465 		abort();
466 	}
467 	*(char *)p = 0;
468 	munmap(p, mfd_def_size);
469 
470 	/* verify PROT_READ with MAP_SHARED is allowed and a following
471 	 * mprotect(PROT_WRITE) allows writing */
472 	p = mmap(NULL,
473 		 mfd_def_size,
474 		 PROT_READ,
475 		 MAP_SHARED,
476 		 fd,
477 		 0);
478 	if (p == MAP_FAILED) {
479 		printf("mmap() failed: %m\n");
480 		abort();
481 	}
482 
483 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
484 	if (r < 0) {
485 		printf("mprotect() failed: %m\n");
486 		abort();
487 	}
488 
489 	*(char *)p = 0;
490 	munmap(p, mfd_def_size);
491 
492 	/* verify PUNCH_HOLE works */
493 	r = fallocate(fd,
494 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
495 		      0,
496 		      mfd_def_size);
497 	if (r < 0) {
498 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
499 		abort();
500 	}
501 }
502 
503 static void mfd_fail_write(int fd)
504 {
505 	ssize_t l;
506 	void *p;
507 	int r;
508 
509 	/* verify write() fails */
510 	l = write(fd, "data", 4);
511 	if (l != -EPERM) {
512 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
513 		abort();
514 	}
515 
516 	/* verify PROT_READ | PROT_WRITE is not allowed */
517 	p = mmap(NULL,
518 		 mfd_def_size,
519 		 PROT_READ | PROT_WRITE,
520 		 MAP_SHARED,
521 		 fd,
522 		 0);
523 	if (p != MAP_FAILED) {
524 		printf("mmap() didn't fail as expected\n");
525 		abort();
526 	}
527 
528 	/* verify PROT_WRITE is not allowed */
529 	p = mmap(NULL,
530 		 mfd_def_size,
531 		 PROT_WRITE,
532 		 MAP_SHARED,
533 		 fd,
534 		 0);
535 	if (p != MAP_FAILED) {
536 		printf("mmap() didn't fail as expected\n");
537 		abort();
538 	}
539 
540 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
541 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
542 	p = mmap(NULL,
543 		 mfd_def_size,
544 		 PROT_READ,
545 		 MAP_SHARED,
546 		 fd,
547 		 0);
548 	if (p != MAP_FAILED) {
549 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
550 		if (r >= 0) {
551 			printf("mmap()+mprotect() didn't fail as expected\n");
552 			abort();
553 		}
554 		munmap(p, mfd_def_size);
555 	}
556 
557 	/* verify PUNCH_HOLE fails */
558 	r = fallocate(fd,
559 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
560 		      0,
561 		      mfd_def_size);
562 	if (r >= 0) {
563 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
564 		abort();
565 	}
566 }
567 
568 static void mfd_assert_shrink(int fd)
569 {
570 	int r, fd2;
571 
572 	r = ftruncate(fd, mfd_def_size / 2);
573 	if (r < 0) {
574 		printf("ftruncate(SHRINK) failed: %m\n");
575 		abort();
576 	}
577 
578 	mfd_assert_size(fd, mfd_def_size / 2);
579 
580 	fd2 = mfd_assert_open(fd,
581 			      O_RDWR | O_CREAT | O_TRUNC,
582 			      S_IRUSR | S_IWUSR);
583 	close(fd2);
584 
585 	mfd_assert_size(fd, 0);
586 }
587 
588 static void mfd_fail_shrink(int fd)
589 {
590 	int r;
591 
592 	r = ftruncate(fd, mfd_def_size / 2);
593 	if (r >= 0) {
594 		printf("ftruncate(SHRINK) didn't fail as expected\n");
595 		abort();
596 	}
597 
598 	mfd_fail_open(fd,
599 		      O_RDWR | O_CREAT | O_TRUNC,
600 		      S_IRUSR | S_IWUSR);
601 }
602 
603 static void mfd_assert_grow(int fd)
604 {
605 	int r;
606 
607 	r = ftruncate(fd, mfd_def_size * 2);
608 	if (r < 0) {
609 		printf("ftruncate(GROW) failed: %m\n");
610 		abort();
611 	}
612 
613 	mfd_assert_size(fd, mfd_def_size * 2);
614 
615 	r = fallocate(fd,
616 		      0,
617 		      0,
618 		      mfd_def_size * 4);
619 	if (r < 0) {
620 		printf("fallocate(ALLOC) failed: %m\n");
621 		abort();
622 	}
623 
624 	mfd_assert_size(fd, mfd_def_size * 4);
625 }
626 
627 static void mfd_fail_grow(int fd)
628 {
629 	int r;
630 
631 	r = ftruncate(fd, mfd_def_size * 2);
632 	if (r >= 0) {
633 		printf("ftruncate(GROW) didn't fail as expected\n");
634 		abort();
635 	}
636 
637 	r = fallocate(fd,
638 		      0,
639 		      0,
640 		      mfd_def_size * 4);
641 	if (r >= 0) {
642 		printf("fallocate(ALLOC) didn't fail as expected\n");
643 		abort();
644 	}
645 }
646 
647 static void mfd_assert_grow_write(int fd)
648 {
649 	static char *buf;
650 	ssize_t l;
651 
652 	/* hugetlbfs does not support write */
653 	if (hugetlbfs_test)
654 		return;
655 
656 	buf = malloc(mfd_def_size * 8);
657 	if (!buf) {
658 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
659 		abort();
660 	}
661 
662 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
663 	if (l != (mfd_def_size * 8)) {
664 		printf("pwrite() failed: %m\n");
665 		abort();
666 	}
667 
668 	mfd_assert_size(fd, mfd_def_size * 8);
669 }
670 
671 static void mfd_fail_grow_write(int fd)
672 {
673 	static char *buf;
674 	ssize_t l;
675 
676 	/* hugetlbfs does not support write */
677 	if (hugetlbfs_test)
678 		return;
679 
680 	buf = malloc(mfd_def_size * 8);
681 	if (!buf) {
682 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
683 		abort();
684 	}
685 
686 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
687 	if (l == (mfd_def_size * 8)) {
688 		printf("pwrite() didn't fail as expected\n");
689 		abort();
690 	}
691 }
692 
693 static void mfd_assert_mode(int fd, int mode)
694 {
695 	struct stat st;
696 	char buf[PATH_MAX];
697 
698 	fd2name(fd, buf, PATH_MAX);
699 
700 	if (fstat(fd, &st) < 0) {
701 		printf("fstat(%s) failed: %m\n", buf);
702 		abort();
703 	}
704 
705 	if ((st.st_mode & 07777) != mode) {
706 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
707 		       buf, (int)st.st_mode & 07777, mode);
708 		abort();
709 	}
710 }
711 
712 static void mfd_assert_chmod(int fd, int mode)
713 {
714 	char buf[PATH_MAX];
715 
716 	fd2name(fd, buf, PATH_MAX);
717 
718 	if (fchmod(fd, mode) < 0) {
719 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
720 		abort();
721 	}
722 
723 	mfd_assert_mode(fd, mode);
724 }
725 
726 static void mfd_fail_chmod(int fd, int mode)
727 {
728 	struct stat st;
729 	char buf[PATH_MAX];
730 
731 	fd2name(fd, buf, PATH_MAX);
732 
733 	if (fstat(fd, &st) < 0) {
734 		printf("fstat(%s) failed: %m\n", buf);
735 		abort();
736 	}
737 
738 	if (fchmod(fd, mode) == 0) {
739 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
740 		       buf, mode);
741 		abort();
742 	}
743 
744 	/* verify that file mode bits did not change */
745 	mfd_assert_mode(fd, st.st_mode & 07777);
746 }
747 
748 static int idle_thread_fn(void *arg)
749 {
750 	sigset_t set;
751 	int sig;
752 
753 	/* dummy waiter; SIGTERM terminates us anyway */
754 	sigemptyset(&set);
755 	sigaddset(&set, SIGTERM);
756 	sigwait(&set, &sig);
757 
758 	return 0;
759 }
760 
761 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
762 {
763 	uint8_t *stack;
764 	pid_t pid;
765 
766 	stack = malloc(STACK_SIZE);
767 	if (!stack) {
768 		printf("malloc(STACK_SIZE) failed: %m\n");
769 		abort();
770 	}
771 
772 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
773 	if (pid < 0) {
774 		printf("clone() failed: %m\n");
775 		abort();
776 	}
777 
778 	return pid;
779 }
780 
781 static void join_thread(pid_t pid)
782 {
783 	int wstatus;
784 
785 	if (waitpid(pid, &wstatus, 0) < 0) {
786 		printf("newpid thread: waitpid() failed: %m\n");
787 		abort();
788 	}
789 
790 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
791 		printf("newpid thread: exited with non-zero error code %d\n",
792 		       WEXITSTATUS(wstatus));
793 		abort();
794 	}
795 
796 	if (WIFSIGNALED(wstatus)) {
797 		printf("newpid thread: killed by signal %d\n",
798 		       WTERMSIG(wstatus));
799 		abort();
800 	}
801 }
802 
803 static pid_t spawn_idle_thread(unsigned int flags)
804 {
805 	return spawn_thread(flags, idle_thread_fn, NULL);
806 }
807 
808 static void join_idle_thread(pid_t pid)
809 {
810 	kill(pid, SIGTERM);
811 	waitpid(pid, NULL, 0);
812 }
813 
814 /*
815  * Test memfd_create() syscall
816  * Verify syscall-argument validation, including name checks, flag validation
817  * and more.
818  */
819 static void test_create(void)
820 {
821 	char buf[2048];
822 	int fd;
823 
824 	printf("%s CREATE\n", memfd_str);
825 
826 	/* test NULL name */
827 	mfd_fail_new(NULL, 0);
828 
829 	/* test over-long name (not zero-terminated) */
830 	memset(buf, 0xff, sizeof(buf));
831 	mfd_fail_new(buf, 0);
832 
833 	/* test over-long zero-terminated name */
834 	memset(buf, 0xff, sizeof(buf));
835 	buf[sizeof(buf) - 1] = 0;
836 	mfd_fail_new(buf, 0);
837 
838 	/* verify "" is a valid name */
839 	fd = mfd_assert_new("", 0, 0);
840 	close(fd);
841 
842 	/* verify invalid O_* open flags */
843 	mfd_fail_new("", 0x0100);
844 	mfd_fail_new("", ~MFD_CLOEXEC);
845 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
846 	mfd_fail_new("", ~0);
847 	mfd_fail_new("", 0x80000000U);
848 
849 	/* verify EXEC and NOEXEC_SEAL can't both be set */
850 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
851 
852 	/* verify MFD_CLOEXEC is allowed */
853 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
854 	close(fd);
855 
856 	/* verify MFD_ALLOW_SEALING is allowed */
857 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
858 	close(fd);
859 
860 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
861 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
862 	close(fd);
863 }
864 
865 /*
866  * Test basic sealing
867  * A very basic sealing test to see whether setting/retrieving seals works.
868  */
869 static void test_basic(void)
870 {
871 	int fd;
872 
873 	printf("%s BASIC\n", memfd_str);
874 
875 	fd = mfd_assert_new("kern_memfd_basic",
876 			    mfd_def_size,
877 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
878 
879 	/* add basic seals */
880 	mfd_assert_has_seals(fd, 0);
881 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
882 				 F_SEAL_WRITE);
883 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
884 				 F_SEAL_WRITE);
885 
886 	/* add them again */
887 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
888 				 F_SEAL_WRITE);
889 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
890 				 F_SEAL_WRITE);
891 
892 	/* add more seals and seal against sealing */
893 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
894 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
895 				 F_SEAL_GROW |
896 				 F_SEAL_WRITE |
897 				 F_SEAL_SEAL);
898 
899 	/* verify that sealing no longer works */
900 	mfd_fail_add_seals(fd, F_SEAL_GROW);
901 	mfd_fail_add_seals(fd, 0);
902 
903 	close(fd);
904 
905 	/* verify sealing does not work without MFD_ALLOW_SEALING */
906 	fd = mfd_assert_new("kern_memfd_basic",
907 			    mfd_def_size,
908 			    MFD_CLOEXEC);
909 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
910 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
911 			       F_SEAL_GROW |
912 			       F_SEAL_WRITE);
913 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
914 	close(fd);
915 }
916 
917 /*
918  * Test SEAL_WRITE
919  * Test whether SEAL_WRITE actually prevents modifications.
920  */
921 static void test_seal_write(void)
922 {
923 	int fd;
924 
925 	printf("%s SEAL-WRITE\n", memfd_str);
926 
927 	fd = mfd_assert_new("kern_memfd_seal_write",
928 			    mfd_def_size,
929 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
930 	mfd_assert_has_seals(fd, 0);
931 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
932 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
933 
934 	mfd_assert_read(fd);
935 	mfd_fail_write(fd);
936 	mfd_assert_shrink(fd);
937 	mfd_assert_grow(fd);
938 	mfd_fail_grow_write(fd);
939 
940 	close(fd);
941 }
942 
943 /*
944  * Test SEAL_FUTURE_WRITE
945  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
946  */
947 static void test_seal_future_write(void)
948 {
949 	int fd, fd2;
950 	void *p;
951 
952 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
953 
954 	fd = mfd_assert_new("kern_memfd_seal_future_write",
955 			    mfd_def_size,
956 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
957 
958 	p = mfd_assert_mmap_shared(fd);
959 
960 	mfd_assert_has_seals(fd, 0);
961 
962 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
963 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
964 
965 	/* read should pass, writes should fail */
966 	mfd_assert_read(fd);
967 	mfd_assert_read_shared(fd);
968 	mfd_fail_write(fd);
969 
970 	fd2 = mfd_assert_reopen_fd(fd);
971 	/* read should pass, writes should still fail */
972 	mfd_assert_read(fd2);
973 	mfd_assert_read_shared(fd2);
974 	mfd_fail_write(fd2);
975 
976 	mfd_assert_fork_private_write(fd);
977 
978 	munmap(p, mfd_def_size);
979 	close(fd2);
980 	close(fd);
981 }
982 
983 /*
984  * Test SEAL_SHRINK
985  * Test whether SEAL_SHRINK actually prevents shrinking
986  */
987 static void test_seal_shrink(void)
988 {
989 	int fd;
990 
991 	printf("%s SEAL-SHRINK\n", memfd_str);
992 
993 	fd = mfd_assert_new("kern_memfd_seal_shrink",
994 			    mfd_def_size,
995 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
996 	mfd_assert_has_seals(fd, 0);
997 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
998 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
999 
1000 	mfd_assert_read(fd);
1001 	mfd_assert_write(fd);
1002 	mfd_fail_shrink(fd);
1003 	mfd_assert_grow(fd);
1004 	mfd_assert_grow_write(fd);
1005 
1006 	close(fd);
1007 }
1008 
1009 /*
1010  * Test SEAL_GROW
1011  * Test whether SEAL_GROW actually prevents growing
1012  */
1013 static void test_seal_grow(void)
1014 {
1015 	int fd;
1016 
1017 	printf("%s SEAL-GROW\n", memfd_str);
1018 
1019 	fd = mfd_assert_new("kern_memfd_seal_grow",
1020 			    mfd_def_size,
1021 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1022 	mfd_assert_has_seals(fd, 0);
1023 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1024 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1025 
1026 	mfd_assert_read(fd);
1027 	mfd_assert_write(fd);
1028 	mfd_assert_shrink(fd);
1029 	mfd_fail_grow(fd);
1030 	mfd_fail_grow_write(fd);
1031 
1032 	close(fd);
1033 }
1034 
1035 /*
1036  * Test SEAL_SHRINK | SEAL_GROW
1037  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1038  */
1039 static void test_seal_resize(void)
1040 {
1041 	int fd;
1042 
1043 	printf("%s SEAL-RESIZE\n", memfd_str);
1044 
1045 	fd = mfd_assert_new("kern_memfd_seal_resize",
1046 			    mfd_def_size,
1047 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1048 	mfd_assert_has_seals(fd, 0);
1049 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1050 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1051 
1052 	mfd_assert_read(fd);
1053 	mfd_assert_write(fd);
1054 	mfd_fail_shrink(fd);
1055 	mfd_fail_grow(fd);
1056 	mfd_fail_grow_write(fd);
1057 
1058 	close(fd);
1059 }
1060 
1061 /*
1062  * Test SEAL_EXEC
1063  * Test fd is created with exec and allow sealing.
1064  * chmod() cannot change x bits after sealing.
1065  */
1066 static void test_exec_seal(void)
1067 {
1068 	int fd;
1069 
1070 	printf("%s SEAL-EXEC\n", memfd_str);
1071 
1072 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1073 	fd = mfd_assert_new("kern_memfd_seal_exec",
1074 			    mfd_def_size,
1075 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1076 
1077 	mfd_assert_mode(fd, 0777);
1078 	mfd_assert_chmod(fd, 0644);
1079 
1080 	mfd_assert_has_seals(fd, 0);
1081 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1082 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1083 
1084 	mfd_assert_chmod(fd, 0600);
1085 	mfd_fail_chmod(fd, 0777);
1086 	mfd_fail_chmod(fd, 0670);
1087 	mfd_fail_chmod(fd, 0605);
1088 	mfd_fail_chmod(fd, 0700);
1089 	mfd_fail_chmod(fd, 0100);
1090 	mfd_assert_chmod(fd, 0666);
1091 	mfd_assert_write(fd);
1092 	close(fd);
1093 
1094 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1095 	fd = mfd_assert_new("kern_memfd_seal_exec",
1096 			    mfd_def_size,
1097 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1098 
1099 	mfd_assert_mode(fd, 0777);
1100 	mfd_assert_chmod(fd, 0700);
1101 
1102 	mfd_assert_has_seals(fd, 0);
1103 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1104 	mfd_assert_has_seals(fd, F_WX_SEALS);
1105 
1106 	mfd_fail_chmod(fd, 0711);
1107 	mfd_fail_chmod(fd, 0600);
1108 	mfd_fail_write(fd);
1109 	close(fd);
1110 }
1111 
1112 /*
1113  * Test EXEC_NO_SEAL
1114  * Test fd is created with exec and not allow sealing.
1115  */
1116 static void test_exec_no_seal(void)
1117 {
1118 	int fd;
1119 
1120 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1121 
1122 	/* Create with EXEC but without ALLOW_SEALING */
1123 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1124 			    mfd_def_size,
1125 			    MFD_CLOEXEC | MFD_EXEC);
1126 	mfd_assert_mode(fd, 0777);
1127 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1128 	mfd_assert_chmod(fd, 0666);
1129 	close(fd);
1130 }
1131 
1132 /*
1133  * Test memfd_create with MFD_NOEXEC flag
1134  */
1135 static void test_noexec_seal(void)
1136 {
1137 	int fd;
1138 
1139 	printf("%s NOEXEC_SEAL\n", memfd_str);
1140 
1141 	/* Create with NOEXEC and ALLOW_SEALING */
1142 	fd = mfd_assert_new("kern_memfd_noexec",
1143 			    mfd_def_size,
1144 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1145 	mfd_assert_mode(fd, 0666);
1146 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1147 	mfd_fail_chmod(fd, 0777);
1148 	close(fd);
1149 
1150 	/* Create with NOEXEC but without ALLOW_SEALING */
1151 	fd = mfd_assert_new("kern_memfd_noexec",
1152 			    mfd_def_size,
1153 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1154 	mfd_assert_mode(fd, 0666);
1155 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1156 	mfd_fail_chmod(fd, 0777);
1157 	close(fd);
1158 }
1159 
1160 static void test_sysctl_sysctl0(void)
1161 {
1162 	int fd;
1163 
1164 	sysctl_assert_equal("0");
1165 
1166 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1167 			    mfd_def_size,
1168 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1169 	mfd_assert_mode(fd, 0777);
1170 	mfd_assert_has_seals(fd, 0);
1171 	mfd_assert_chmod(fd, 0644);
1172 	close(fd);
1173 }
1174 
1175 static void test_sysctl_set_sysctl0(void)
1176 {
1177 	sysctl_assert_write("0");
1178 	test_sysctl_sysctl0();
1179 }
1180 
1181 static void test_sysctl_sysctl1(void)
1182 {
1183 	int fd;
1184 
1185 	sysctl_assert_equal("1");
1186 
1187 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1188 			    mfd_def_size,
1189 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1190 	mfd_assert_mode(fd, 0666);
1191 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1192 	mfd_fail_chmod(fd, 0777);
1193 	close(fd);
1194 
1195 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1196 			    mfd_def_size,
1197 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1198 	mfd_assert_mode(fd, 0777);
1199 	mfd_assert_has_seals(fd, 0);
1200 	mfd_assert_chmod(fd, 0644);
1201 	close(fd);
1202 
1203 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1204 			    mfd_def_size,
1205 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1206 	mfd_assert_mode(fd, 0666);
1207 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1208 	mfd_fail_chmod(fd, 0777);
1209 	close(fd);
1210 }
1211 
1212 static void test_sysctl_set_sysctl1(void)
1213 {
1214 	sysctl_assert_write("1");
1215 	test_sysctl_sysctl1();
1216 }
1217 
1218 static void test_sysctl_sysctl2(void)
1219 {
1220 	int fd;
1221 
1222 	sysctl_assert_equal("2");
1223 
1224 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1225 			    mfd_def_size,
1226 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1227 	mfd_assert_mode(fd, 0666);
1228 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1229 	mfd_fail_chmod(fd, 0777);
1230 	close(fd);
1231 
1232 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1233 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1234 
1235 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1236 			    mfd_def_size,
1237 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1238 	mfd_assert_mode(fd, 0666);
1239 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1240 	mfd_fail_chmod(fd, 0777);
1241 	close(fd);
1242 }
1243 
1244 static void test_sysctl_set_sysctl2(void)
1245 {
1246 	sysctl_assert_write("2");
1247 	test_sysctl_sysctl2();
1248 }
1249 
1250 static int sysctl_simple_child(void *arg)
1251 {
1252 	printf("%s sysctl 0\n", memfd_str);
1253 	test_sysctl_set_sysctl0();
1254 
1255 	printf("%s sysctl 1\n", memfd_str);
1256 	test_sysctl_set_sysctl1();
1257 
1258 	printf("%s sysctl 0\n", memfd_str);
1259 	test_sysctl_set_sysctl0();
1260 
1261 	printf("%s sysctl 2\n", memfd_str);
1262 	test_sysctl_set_sysctl2();
1263 
1264 	printf("%s sysctl 1\n", memfd_str);
1265 	test_sysctl_set_sysctl1();
1266 
1267 	printf("%s sysctl 0\n", memfd_str);
1268 	test_sysctl_set_sysctl0();
1269 
1270 	return 0;
1271 }
1272 
1273 /*
1274  * Test sysctl
1275  * A very basic test to make sure the core sysctl semantics work.
1276  */
1277 static void test_sysctl_simple(void)
1278 {
1279 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1280 
1281 	join_thread(pid);
1282 }
1283 
1284 static int sysctl_nested(void *arg)
1285 {
1286 	void (*fn)(void) = arg;
1287 
1288 	fn();
1289 	return 0;
1290 }
1291 
1292 static int sysctl_nested_wait(void *arg)
1293 {
1294 	/* Wait for a SIGCONT. */
1295 	kill(getpid(), SIGSTOP);
1296 	return sysctl_nested(arg);
1297 }
1298 
1299 static void test_sysctl_sysctl1_failset(void)
1300 {
1301 	sysctl_fail_write("0");
1302 	test_sysctl_sysctl1();
1303 }
1304 
1305 static void test_sysctl_sysctl2_failset(void)
1306 {
1307 	sysctl_fail_write("1");
1308 	test_sysctl_sysctl2();
1309 
1310 	sysctl_fail_write("0");
1311 	test_sysctl_sysctl2();
1312 }
1313 
1314 static int sysctl_nested_child(void *arg)
1315 {
1316 	int pid;
1317 
1318 	printf("%s nested sysctl 0\n", memfd_str);
1319 	sysctl_assert_write("0");
1320 	/* A further nested pidns works the same. */
1321 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1322 	join_thread(pid);
1323 
1324 	printf("%s nested sysctl 1\n", memfd_str);
1325 	sysctl_assert_write("1");
1326 	/* Child inherits our setting. */
1327 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1328 	join_thread(pid);
1329 	/* Child cannot raise the setting. */
1330 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1331 			   test_sysctl_sysctl1_failset);
1332 	join_thread(pid);
1333 	/* Child can lower the setting. */
1334 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1335 			   test_sysctl_set_sysctl2);
1336 	join_thread(pid);
1337 	/* Child lowering the setting has no effect on our setting. */
1338 	test_sysctl_sysctl1();
1339 
1340 	printf("%s nested sysctl 2\n", memfd_str);
1341 	sysctl_assert_write("2");
1342 	/* Child inherits our setting. */
1343 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1344 	join_thread(pid);
1345 	/* Child cannot raise the setting. */
1346 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1347 			   test_sysctl_sysctl2_failset);
1348 	join_thread(pid);
1349 
1350 	/* Verify that the rules are actually inherited after fork. */
1351 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1352 	sysctl_assert_write("0");
1353 
1354 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1355 			   test_sysctl_sysctl1_failset);
1356 	sysctl_assert_write("1");
1357 	kill(pid, SIGCONT);
1358 	join_thread(pid);
1359 
1360 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1361 	sysctl_assert_write("0");
1362 
1363 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1364 			   test_sysctl_sysctl2_failset);
1365 	sysctl_assert_write("2");
1366 	kill(pid, SIGCONT);
1367 	join_thread(pid);
1368 
1369 	/*
1370 	 * Verify that the current effective setting is saved on fork, meaning
1371 	 * that the parent lowering the sysctl doesn't affect already-forked
1372 	 * children.
1373 	 */
1374 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1375 	sysctl_assert_write("2");
1376 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1377 			   test_sysctl_sysctl2);
1378 	sysctl_assert_write("1");
1379 	kill(pid, SIGCONT);
1380 	join_thread(pid);
1381 
1382 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1383 	sysctl_assert_write("2");
1384 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1385 			   test_sysctl_sysctl2);
1386 	sysctl_assert_write("0");
1387 	kill(pid, SIGCONT);
1388 	join_thread(pid);
1389 
1390 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1391 	sysctl_assert_write("1");
1392 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1393 			   test_sysctl_sysctl1);
1394 	sysctl_assert_write("0");
1395 	kill(pid, SIGCONT);
1396 	join_thread(pid);
1397 
1398 	return 0;
1399 }
1400 
1401 /*
1402  * Test sysctl with nested pid namespaces
1403  * Make sure that the sysctl nesting semantics work correctly.
1404  */
1405 static void test_sysctl_nested(void)
1406 {
1407 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1408 
1409 	join_thread(pid);
1410 }
1411 
1412 /*
1413  * Test sharing via dup()
1414  * Test that seals are shared between dupped FDs and they're all equal.
1415  */
1416 static void test_share_dup(char *banner, char *b_suffix)
1417 {
1418 	int fd, fd2;
1419 
1420 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1421 
1422 	fd = mfd_assert_new("kern_memfd_share_dup",
1423 			    mfd_def_size,
1424 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1425 	mfd_assert_has_seals(fd, 0);
1426 
1427 	fd2 = mfd_assert_dup(fd);
1428 	mfd_assert_has_seals(fd2, 0);
1429 
1430 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1431 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1432 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1433 
1434 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1435 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1436 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1437 
1438 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1439 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1440 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1441 
1442 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1443 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1444 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1445 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1446 
1447 	close(fd2);
1448 
1449 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1450 	close(fd);
1451 }
1452 
1453 /*
1454  * Test sealing with active mmap()s
1455  * Modifying seals is only allowed if no other mmap() refs exist.
1456  */
1457 static void test_share_mmap(char *banner, char *b_suffix)
1458 {
1459 	int fd;
1460 	void *p;
1461 
1462 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1463 
1464 	fd = mfd_assert_new("kern_memfd_share_mmap",
1465 			    mfd_def_size,
1466 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1467 	mfd_assert_has_seals(fd, 0);
1468 
1469 	/* shared/writable ref prevents sealing WRITE, but allows others */
1470 	p = mfd_assert_mmap_shared(fd);
1471 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1472 	mfd_assert_has_seals(fd, 0);
1473 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1474 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1475 	munmap(p, mfd_def_size);
1476 
1477 	/* readable ref allows sealing */
1478 	p = mfd_assert_mmap_private(fd);
1479 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1480 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1481 	munmap(p, mfd_def_size);
1482 
1483 	close(fd);
1484 }
1485 
1486 /*
1487  * Test sealing with open(/proc/self/fd/%d)
1488  * Via /proc we can get access to a separate file-context for the same memfd.
1489  * This is *not* like dup(), but like a real separate open(). Make sure the
1490  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1491  */
1492 static void test_share_open(char *banner, char *b_suffix)
1493 {
1494 	int fd, fd2;
1495 
1496 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1497 
1498 	fd = mfd_assert_new("kern_memfd_share_open",
1499 			    mfd_def_size,
1500 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1501 	mfd_assert_has_seals(fd, 0);
1502 
1503 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1504 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1505 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1506 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1507 
1508 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1509 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1510 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1511 
1512 	close(fd);
1513 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1514 
1515 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1516 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1517 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1518 
1519 	close(fd2);
1520 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1521 
1522 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1523 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1524 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1525 
1526 	close(fd2);
1527 	close(fd);
1528 }
1529 
1530 /*
1531  * Test sharing via fork()
1532  * Test whether seal-modifications work as expected with forked children.
1533  */
1534 static void test_share_fork(char *banner, char *b_suffix)
1535 {
1536 	int fd;
1537 	pid_t pid;
1538 
1539 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1540 
1541 	fd = mfd_assert_new("kern_memfd_share_fork",
1542 			    mfd_def_size,
1543 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1544 	mfd_assert_has_seals(fd, 0);
1545 
1546 	pid = spawn_idle_thread(0);
1547 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1548 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1549 
1550 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1551 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1552 
1553 	join_idle_thread(pid);
1554 
1555 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1556 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1557 
1558 	close(fd);
1559 }
1560 
1561 static bool pid_ns_supported(void)
1562 {
1563 	return access("/proc/self/ns/pid", F_OK) == 0;
1564 }
1565 
1566 int main(int argc, char **argv)
1567 {
1568 	pid_t pid;
1569 
1570 	if (argc == 2) {
1571 		if (!strcmp(argv[1], "hugetlbfs")) {
1572 			unsigned long hpage_size = default_huge_page_size();
1573 
1574 			if (!hpage_size) {
1575 				printf("Unable to determine huge page size\n");
1576 				abort();
1577 			}
1578 
1579 			hugetlbfs_test = 1;
1580 			memfd_str = MEMFD_HUGE_STR;
1581 			mfd_def_size = hpage_size * 2;
1582 		} else {
1583 			printf("Unknown option: %s\n", argv[1]);
1584 			abort();
1585 		}
1586 	}
1587 
1588 	test_create();
1589 	test_basic();
1590 	test_exec_seal();
1591 	test_exec_no_seal();
1592 	test_noexec_seal();
1593 
1594 	test_seal_write();
1595 	test_seal_future_write();
1596 	test_seal_shrink();
1597 	test_seal_grow();
1598 	test_seal_resize();
1599 
1600 	if (pid_ns_supported()) {
1601 		test_sysctl_simple();
1602 		test_sysctl_nested();
1603 	} else {
1604 		printf("PID namespaces are not supported; skipping sysctl tests\n");
1605 	}
1606 
1607 	test_share_dup("SHARE-DUP", "");
1608 	test_share_mmap("SHARE-MMAP", "");
1609 	test_share_open("SHARE-OPEN", "");
1610 	test_share_fork("SHARE-FORK", "");
1611 
1612 	/* Run test-suite in a multi-threaded environment with a shared
1613 	 * file-table. */
1614 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1615 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1616 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1617 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1618 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1619 	join_idle_thread(pid);
1620 
1621 	printf("memfd: DONE\n");
1622 
1623 	return 0;
1624 }
1625