xref: /linux/tools/testing/selftests/memfd/memfd_test.c (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <signal.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/stat.h>
19 #include <sys/syscall.h>
20 #include <sys/wait.h>
21 #include <sys/types.h>
22 #include <sys/ipc.h>
23 #include <sys/sem.h>
24 #include <unistd.h>
25 #include <ctype.h>
26 
27 #include "common.h"
28 
29 #define MEMFD_STR	"memfd:"
30 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
31 #define SHARED_FT_STR	"(shared file-table)"
32 
33 #define MFD_DEF_SIZE 8192
34 #define STACK_SIZE 65536
35 
36 #define F_SEAL_EXEC	0x0020
37 
38 #define F_WX_SEALS (F_SEAL_SHRINK | \
39 		    F_SEAL_GROW | \
40 		    F_SEAL_WRITE | \
41 		    F_SEAL_FUTURE_WRITE | \
42 		    F_SEAL_EXEC)
43 
44 #define MFD_NOEXEC_SEAL	0x0008U
45 union semun {
46 	int val;
47 	struct semid_ds *buf;
48 	unsigned short int *array;
49 	struct seminfo *__buf;
50 };
51 
52 /*
53  * we use semaphores on nested wait tasks due the use of CLONE_NEWPID: the
54  * child will be PID 1 and can't send SIGSTOP to themselves due special
55  * treatment of the init task, so the SIGSTOP/SIGCONT synchronization
56  * approach can't be used here.
57  */
58 #define SEM_KEY 0xdeadbeef
59 
60 /*
61  * Default is not to test hugetlbfs
62  */
63 static size_t mfd_def_size = MFD_DEF_SIZE;
64 static const char *memfd_str = MEMFD_STR;
65 
66 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
67 {
68 	char buf1[PATH_MAX];
69 	int size;
70 	ssize_t nbytes;
71 
72 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
73 	if (size < 0) {
74 		printf("snprintf(%d) failed on %m\n", fd);
75 		abort();
76 	}
77 
78 	/*
79 	 * reserver one byte for string termination.
80 	 */
81 	nbytes = readlink(buf1, buf, bufsize-1);
82 	if (nbytes == -1) {
83 		printf("readlink(%s) failed %m\n", buf1);
84 		abort();
85 	}
86 	buf[nbytes] = '\0';
87 	return nbytes;
88 }
89 
90 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
91 {
92 	int r, fd;
93 
94 	fd = sys_memfd_create(name, flags);
95 	if (fd < 0) {
96 		printf("memfd_create(\"%s\", %u) failed: %m\n",
97 		       name, flags);
98 		abort();
99 	}
100 
101 	r = ftruncate(fd, sz);
102 	if (r < 0) {
103 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
104 		abort();
105 	}
106 
107 	return fd;
108 }
109 
110 static void sysctl_assert_write(const char *val)
111 {
112 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
113 
114 	if (fd < 0) {
115 		printf("open sysctl failed: %m\n");
116 		abort();
117 	}
118 
119 	if (write(fd, val, strlen(val)) < 0) {
120 		printf("write sysctl %s failed: %m\n", val);
121 		abort();
122 	}
123 }
124 
125 static void sysctl_fail_write(const char *val)
126 {
127 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
128 
129 	if (fd < 0) {
130 		printf("open sysctl failed: %m\n");
131 		abort();
132 	}
133 
134 	if (write(fd, val, strlen(val)) >= 0) {
135 		printf("write sysctl %s succeeded, but failure expected\n",
136 				val);
137 		abort();
138 	}
139 }
140 
141 static void sysctl_assert_equal(const char *val)
142 {
143 	char *p, buf[128] = {};
144 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
145 
146 	if (fd < 0) {
147 		printf("open sysctl failed: %m\n");
148 		abort();
149 	}
150 
151 	if (read(fd, buf, sizeof(buf)) < 0) {
152 		printf("read sysctl failed: %m\n");
153 		abort();
154 	}
155 
156 	/* Strip trailing whitespace. */
157 	p = buf;
158 	while (!isspace(*p))
159 		p++;
160 	*p = '\0';
161 
162 	if (strcmp(buf, val) != 0) {
163 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
164 		abort();
165 	}
166 }
167 
168 static int mfd_assert_reopen_fd(int fd_in)
169 {
170 	int fd;
171 	char path[100];
172 
173 	sprintf(path, "/proc/self/fd/%d", fd_in);
174 
175 	fd = open(path, O_RDWR);
176 	if (fd < 0) {
177 		printf("re-open of existing fd %d failed\n", fd_in);
178 		abort();
179 	}
180 
181 	return fd;
182 }
183 
184 static void mfd_fail_new(const char *name, unsigned int flags)
185 {
186 	int r;
187 
188 	r = sys_memfd_create(name, flags);
189 	if (r >= 0) {
190 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
191 		       name ? name : "NULL", flags);
192 		close(r);
193 		abort();
194 	}
195 }
196 
197 static unsigned int mfd_assert_get_seals(int fd)
198 {
199 	int r;
200 
201 	r = fcntl(fd, F_GET_SEALS);
202 	if (r < 0) {
203 		printf("GET_SEALS(%d) failed: %m\n", fd);
204 		abort();
205 	}
206 
207 	return (unsigned int)r;
208 }
209 
210 static void mfd_assert_has_seals(int fd, unsigned int seals)
211 {
212 	char buf[PATH_MAX];
213 	unsigned int s;
214 	fd2name(fd, buf, PATH_MAX);
215 
216 	s = mfd_assert_get_seals(fd);
217 	if (s != seals) {
218 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
219 		abort();
220 	}
221 }
222 
223 static void mfd_assert_add_seals(int fd, unsigned int seals)
224 {
225 	int r;
226 	unsigned int s;
227 
228 	s = mfd_assert_get_seals(fd);
229 	r = fcntl(fd, F_ADD_SEALS, seals);
230 	if (r < 0) {
231 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
232 		abort();
233 	}
234 }
235 
236 static void mfd_fail_add_seals(int fd, unsigned int seals)
237 {
238 	int r;
239 	unsigned int s;
240 
241 	r = fcntl(fd, F_GET_SEALS);
242 	if (r < 0)
243 		s = 0;
244 	else
245 		s = (unsigned int)r;
246 
247 	r = fcntl(fd, F_ADD_SEALS, seals);
248 	if (r >= 0) {
249 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
250 				fd, s, seals);
251 		abort();
252 	}
253 }
254 
255 static void mfd_assert_size(int fd, size_t size)
256 {
257 	struct stat st;
258 	int r;
259 
260 	r = fstat(fd, &st);
261 	if (r < 0) {
262 		printf("fstat(%d) failed: %m\n", fd);
263 		abort();
264 	} else if (st.st_size != size) {
265 		printf("wrong file size %lld, but expected %lld\n",
266 		       (long long)st.st_size, (long long)size);
267 		abort();
268 	}
269 }
270 
271 static int mfd_assert_dup(int fd)
272 {
273 	int r;
274 
275 	r = dup(fd);
276 	if (r < 0) {
277 		printf("dup(%d) failed: %m\n", fd);
278 		abort();
279 	}
280 
281 	return r;
282 }
283 
284 static void *mfd_assert_mmap_shared(int fd)
285 {
286 	void *p;
287 
288 	p = mmap(NULL,
289 		 mfd_def_size,
290 		 PROT_READ | PROT_WRITE,
291 		 MAP_SHARED,
292 		 fd,
293 		 0);
294 	if (p == MAP_FAILED) {
295 		printf("mmap() failed: %m\n");
296 		abort();
297 	}
298 
299 	return p;
300 }
301 
302 static void *mfd_assert_mmap_read_shared(int fd)
303 {
304 	void *p;
305 
306 	p = mmap(NULL,
307 		 mfd_def_size,
308 		 PROT_READ,
309 		 MAP_SHARED,
310 		 fd,
311 		 0);
312 	if (p == MAP_FAILED) {
313 		printf("mmap() failed: %m\n");
314 		abort();
315 	}
316 
317 	return p;
318 }
319 
320 static void *mfd_assert_mmap_private(int fd)
321 {
322 	void *p;
323 
324 	p = mmap(NULL,
325 		 mfd_def_size,
326 		 PROT_READ,
327 		 MAP_PRIVATE,
328 		 fd,
329 		 0);
330 	if (p == MAP_FAILED) {
331 		printf("mmap() failed: %m\n");
332 		abort();
333 	}
334 
335 	return p;
336 }
337 
338 static int mfd_assert_open(int fd, int flags, mode_t mode)
339 {
340 	char buf[512];
341 	int r;
342 
343 	sprintf(buf, "/proc/self/fd/%d", fd);
344 	r = open(buf, flags, mode);
345 	if (r < 0) {
346 		printf("open(%s) failed: %m\n", buf);
347 		abort();
348 	}
349 
350 	return r;
351 }
352 
353 static void mfd_fail_open(int fd, int flags, mode_t mode)
354 {
355 	char buf[512];
356 	int r;
357 
358 	sprintf(buf, "/proc/self/fd/%d", fd);
359 	r = open(buf, flags, mode);
360 	if (r >= 0) {
361 		printf("open(%s) didn't fail as expected\n", buf);
362 		abort();
363 	}
364 }
365 
366 static void mfd_assert_read(int fd)
367 {
368 	char buf[16];
369 	void *p;
370 	ssize_t l;
371 
372 	l = read(fd, buf, sizeof(buf));
373 	if (l != sizeof(buf)) {
374 		printf("read() failed: %m\n");
375 		abort();
376 	}
377 
378 	/* verify PROT_READ *is* allowed */
379 	p = mmap(NULL,
380 		 mfd_def_size,
381 		 PROT_READ,
382 		 MAP_PRIVATE,
383 		 fd,
384 		 0);
385 	if (p == MAP_FAILED) {
386 		printf("mmap() failed: %m\n");
387 		abort();
388 	}
389 	munmap(p, mfd_def_size);
390 
391 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
392 	p = mmap(NULL,
393 		 mfd_def_size,
394 		 PROT_READ | PROT_WRITE,
395 		 MAP_PRIVATE,
396 		 fd,
397 		 0);
398 	if (p == MAP_FAILED) {
399 		printf("mmap() failed: %m\n");
400 		abort();
401 	}
402 	munmap(p, mfd_def_size);
403 }
404 
405 /* Test that PROT_READ + MAP_SHARED mappings work. */
406 static void mfd_assert_read_shared(int fd)
407 {
408 	void *p;
409 
410 	/* verify PROT_READ and MAP_SHARED *is* allowed */
411 	p = mmap(NULL,
412 		 mfd_def_size,
413 		 PROT_READ,
414 		 MAP_SHARED,
415 		 fd,
416 		 0);
417 	if (p == MAP_FAILED) {
418 		printf("mmap() failed: %m\n");
419 		abort();
420 	}
421 	munmap(p, mfd_def_size);
422 }
423 
424 static void mfd_assert_fork_private_write(int fd)
425 {
426 	int *p;
427 	pid_t pid;
428 
429 	p = mmap(NULL,
430 		 mfd_def_size,
431 		 PROT_READ | PROT_WRITE,
432 		 MAP_PRIVATE,
433 		 fd,
434 		 0);
435 	if (p == MAP_FAILED) {
436 		printf("mmap() failed: %m\n");
437 		abort();
438 	}
439 
440 	p[0] = 22;
441 
442 	pid = fork();
443 	if (pid == 0) {
444 		p[0] = 33;
445 		exit(0);
446 	} else {
447 		waitpid(pid, NULL, 0);
448 
449 		if (p[0] != 22) {
450 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
451 			abort();
452 		}
453 	}
454 
455 	munmap(p, mfd_def_size);
456 }
457 
458 static void mfd_assert_write(int fd)
459 {
460 	ssize_t l;
461 	void *p;
462 	int r;
463 
464 	/*
465 	 * huegtlbfs does not support write, but we want to
466 	 * verify everything else here.
467 	 */
468 	if (!hugetlbfs_test) {
469 		/* verify write() succeeds */
470 		l = write(fd, "\0\0\0\0", 4);
471 		if (l != 4) {
472 			printf("write() failed: %m\n");
473 			abort();
474 		}
475 	}
476 
477 	/* verify PROT_READ | PROT_WRITE is allowed */
478 	p = mmap(NULL,
479 		 mfd_def_size,
480 		 PROT_READ | PROT_WRITE,
481 		 MAP_SHARED,
482 		 fd,
483 		 0);
484 	if (p == MAP_FAILED) {
485 		printf("mmap() failed: %m\n");
486 		abort();
487 	}
488 	*(char *)p = 0;
489 	munmap(p, mfd_def_size);
490 
491 	/* verify PROT_WRITE is allowed */
492 	p = mmap(NULL,
493 		 mfd_def_size,
494 		 PROT_WRITE,
495 		 MAP_SHARED,
496 		 fd,
497 		 0);
498 	if (p == MAP_FAILED) {
499 		printf("mmap() failed: %m\n");
500 		abort();
501 	}
502 	*(char *)p = 0;
503 	munmap(p, mfd_def_size);
504 
505 	/* verify PROT_READ with MAP_SHARED is allowed and a following
506 	 * mprotect(PROT_WRITE) allows writing */
507 	p = mmap(NULL,
508 		 mfd_def_size,
509 		 PROT_READ,
510 		 MAP_SHARED,
511 		 fd,
512 		 0);
513 	if (p == MAP_FAILED) {
514 		printf("mmap() failed: %m\n");
515 		abort();
516 	}
517 
518 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
519 	if (r < 0) {
520 		printf("mprotect() failed: %m\n");
521 		abort();
522 	}
523 
524 	*(char *)p = 0;
525 	munmap(p, mfd_def_size);
526 
527 	/* verify PUNCH_HOLE works */
528 	r = fallocate(fd,
529 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
530 		      0,
531 		      mfd_def_size);
532 	if (r < 0) {
533 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
534 		abort();
535 	}
536 }
537 
538 static void mfd_fail_write(int fd)
539 {
540 	ssize_t l;
541 	void *p;
542 	int r;
543 
544 	/* verify write() fails */
545 	l = write(fd, "data", 4);
546 	if (l != -EPERM) {
547 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
548 		abort();
549 	}
550 
551 	/* verify PROT_READ | PROT_WRITE is not allowed */
552 	p = mmap(NULL,
553 		 mfd_def_size,
554 		 PROT_READ | PROT_WRITE,
555 		 MAP_SHARED,
556 		 fd,
557 		 0);
558 	if (p != MAP_FAILED) {
559 		printf("mmap() didn't fail as expected\n");
560 		abort();
561 	}
562 
563 	/* verify PROT_WRITE is not allowed */
564 	p = mmap(NULL,
565 		 mfd_def_size,
566 		 PROT_WRITE,
567 		 MAP_SHARED,
568 		 fd,
569 		 0);
570 	if (p != MAP_FAILED) {
571 		printf("mmap() didn't fail as expected\n");
572 		abort();
573 	}
574 
575 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
576 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
577 	p = mmap(NULL,
578 		 mfd_def_size,
579 		 PROT_READ,
580 		 MAP_SHARED,
581 		 fd,
582 		 0);
583 	if (p != MAP_FAILED) {
584 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
585 		if (r >= 0) {
586 			printf("mmap()+mprotect() didn't fail as expected\n");
587 			abort();
588 		}
589 		munmap(p, mfd_def_size);
590 	}
591 
592 	/* verify PUNCH_HOLE fails */
593 	r = fallocate(fd,
594 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
595 		      0,
596 		      mfd_def_size);
597 	if (r >= 0) {
598 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
599 		abort();
600 	}
601 }
602 
603 static void mfd_assert_shrink(int fd)
604 {
605 	int r, fd2;
606 
607 	r = ftruncate(fd, mfd_def_size / 2);
608 	if (r < 0) {
609 		printf("ftruncate(SHRINK) failed: %m\n");
610 		abort();
611 	}
612 
613 	mfd_assert_size(fd, mfd_def_size / 2);
614 
615 	fd2 = mfd_assert_open(fd,
616 			      O_RDWR | O_CREAT | O_TRUNC,
617 			      S_IRUSR | S_IWUSR);
618 	close(fd2);
619 
620 	mfd_assert_size(fd, 0);
621 }
622 
623 static void mfd_fail_shrink(int fd)
624 {
625 	int r;
626 
627 	r = ftruncate(fd, mfd_def_size / 2);
628 	if (r >= 0) {
629 		printf("ftruncate(SHRINK) didn't fail as expected\n");
630 		abort();
631 	}
632 
633 	mfd_fail_open(fd,
634 		      O_RDWR | O_CREAT | O_TRUNC,
635 		      S_IRUSR | S_IWUSR);
636 }
637 
638 static void mfd_assert_grow(int fd)
639 {
640 	int r;
641 
642 	r = ftruncate(fd, mfd_def_size * 2);
643 	if (r < 0) {
644 		printf("ftruncate(GROW) failed: %m\n");
645 		abort();
646 	}
647 
648 	mfd_assert_size(fd, mfd_def_size * 2);
649 
650 	r = fallocate(fd,
651 		      0,
652 		      0,
653 		      mfd_def_size * 4);
654 	if (r < 0) {
655 		printf("fallocate(ALLOC) failed: %m\n");
656 		abort();
657 	}
658 
659 	mfd_assert_size(fd, mfd_def_size * 4);
660 }
661 
662 static void mfd_fail_grow(int fd)
663 {
664 	int r;
665 
666 	r = ftruncate(fd, mfd_def_size * 2);
667 	if (r >= 0) {
668 		printf("ftruncate(GROW) didn't fail as expected\n");
669 		abort();
670 	}
671 
672 	r = fallocate(fd,
673 		      0,
674 		      0,
675 		      mfd_def_size * 4);
676 	if (r >= 0) {
677 		printf("fallocate(ALLOC) didn't fail as expected\n");
678 		abort();
679 	}
680 }
681 
682 static void mfd_assert_grow_write(int fd)
683 {
684 	static char *buf;
685 	ssize_t l;
686 
687 	/* hugetlbfs does not support write */
688 	if (hugetlbfs_test)
689 		return;
690 
691 	buf = malloc(mfd_def_size * 8);
692 	if (!buf) {
693 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
694 		abort();
695 	}
696 
697 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
698 	if (l != (mfd_def_size * 8)) {
699 		printf("pwrite() failed: %m\n");
700 		abort();
701 	}
702 
703 	mfd_assert_size(fd, mfd_def_size * 8);
704 }
705 
706 static void mfd_fail_grow_write(int fd)
707 {
708 	static char *buf;
709 	ssize_t l;
710 
711 	/* hugetlbfs does not support write */
712 	if (hugetlbfs_test)
713 		return;
714 
715 	buf = malloc(mfd_def_size * 8);
716 	if (!buf) {
717 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
718 		abort();
719 	}
720 
721 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
722 	if (l == (mfd_def_size * 8)) {
723 		printf("pwrite() didn't fail as expected\n");
724 		abort();
725 	}
726 }
727 
728 static void mfd_assert_mode(int fd, int mode)
729 {
730 	struct stat st;
731 	char buf[PATH_MAX];
732 
733 	fd2name(fd, buf, PATH_MAX);
734 
735 	if (fstat(fd, &st) < 0) {
736 		printf("fstat(%s) failed: %m\n", buf);
737 		abort();
738 	}
739 
740 	if ((st.st_mode & 07777) != mode) {
741 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
742 		       buf, (int)st.st_mode & 07777, mode);
743 		abort();
744 	}
745 }
746 
747 static void mfd_assert_chmod(int fd, int mode)
748 {
749 	char buf[PATH_MAX];
750 
751 	fd2name(fd, buf, PATH_MAX);
752 
753 	if (fchmod(fd, mode) < 0) {
754 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
755 		abort();
756 	}
757 
758 	mfd_assert_mode(fd, mode);
759 }
760 
761 static void mfd_fail_chmod(int fd, int mode)
762 {
763 	struct stat st;
764 	char buf[PATH_MAX];
765 
766 	fd2name(fd, buf, PATH_MAX);
767 
768 	if (fstat(fd, &st) < 0) {
769 		printf("fstat(%s) failed: %m\n", buf);
770 		abort();
771 	}
772 
773 	if (fchmod(fd, mode) == 0) {
774 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
775 		       buf, mode);
776 		abort();
777 	}
778 
779 	/* verify that file mode bits did not change */
780 	mfd_assert_mode(fd, st.st_mode & 07777);
781 }
782 
783 static int idle_thread_fn(void *arg)
784 {
785 	sigset_t set;
786 	int sig;
787 
788 	/* dummy waiter; SIGTERM terminates us anyway */
789 	sigemptyset(&set);
790 	sigaddset(&set, SIGTERM);
791 	sigwait(&set, &sig);
792 
793 	return 0;
794 }
795 
796 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
797 {
798 	uint8_t *stack;
799 	pid_t pid;
800 
801 	stack = malloc(STACK_SIZE);
802 	if (!stack) {
803 		printf("malloc(STACK_SIZE) failed: %m\n");
804 		abort();
805 	}
806 
807 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
808 	if (pid < 0) {
809 		printf("clone() failed: %m\n");
810 		abort();
811 	}
812 
813 	return pid;
814 }
815 
816 static void join_thread(pid_t pid)
817 {
818 	int wstatus;
819 
820 	if (waitpid(pid, &wstatus, 0) < 0) {
821 		printf("newpid thread: waitpid() failed: %m\n");
822 		abort();
823 	}
824 
825 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
826 		printf("newpid thread: exited with non-zero error code %d\n",
827 		       WEXITSTATUS(wstatus));
828 		abort();
829 	}
830 
831 	if (WIFSIGNALED(wstatus)) {
832 		printf("newpid thread: killed by signal %d\n",
833 		       WTERMSIG(wstatus));
834 		abort();
835 	}
836 }
837 
838 static pid_t spawn_idle_thread(unsigned int flags)
839 {
840 	return spawn_thread(flags, idle_thread_fn, NULL);
841 }
842 
843 static void join_idle_thread(pid_t pid)
844 {
845 	kill(pid, SIGTERM);
846 	waitpid(pid, NULL, 0);
847 }
848 
849 /*
850  * Test memfd_create() syscall
851  * Verify syscall-argument validation, including name checks, flag validation
852  * and more.
853  */
854 static void test_create(void)
855 {
856 	char buf[2048];
857 	int fd;
858 
859 	printf("%s CREATE\n", memfd_str);
860 
861 	/* test NULL name */
862 	mfd_fail_new(NULL, 0);
863 
864 	/* test over-long name (not zero-terminated) */
865 	memset(buf, 0xff, sizeof(buf));
866 	mfd_fail_new(buf, 0);
867 
868 	/* test over-long zero-terminated name */
869 	memset(buf, 0xff, sizeof(buf));
870 	buf[sizeof(buf) - 1] = 0;
871 	mfd_fail_new(buf, 0);
872 
873 	/* verify "" is a valid name */
874 	fd = mfd_assert_new("", 0, 0);
875 	close(fd);
876 
877 	/* verify invalid O_* open flags */
878 	mfd_fail_new("", 0x0100);
879 	mfd_fail_new("", ~MFD_CLOEXEC);
880 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
881 	mfd_fail_new("", ~0);
882 	mfd_fail_new("", 0x80000000U);
883 
884 	/* verify EXEC and NOEXEC_SEAL can't both be set */
885 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
886 
887 	/* verify MFD_CLOEXEC is allowed */
888 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
889 	close(fd);
890 
891 	/* verify MFD_ALLOW_SEALING is allowed */
892 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
893 	close(fd);
894 
895 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
896 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
897 	close(fd);
898 }
899 
900 /*
901  * Test basic sealing
902  * A very basic sealing test to see whether setting/retrieving seals works.
903  */
904 static void test_basic(void)
905 {
906 	int fd;
907 
908 	printf("%s BASIC\n", memfd_str);
909 
910 	fd = mfd_assert_new("kern_memfd_basic",
911 			    mfd_def_size,
912 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
913 
914 	/* add basic seals */
915 	mfd_assert_has_seals(fd, 0);
916 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
917 				 F_SEAL_WRITE);
918 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
919 				 F_SEAL_WRITE);
920 
921 	/* add them again */
922 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
923 				 F_SEAL_WRITE);
924 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
925 				 F_SEAL_WRITE);
926 
927 	/* add more seals and seal against sealing */
928 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
929 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
930 				 F_SEAL_GROW |
931 				 F_SEAL_WRITE |
932 				 F_SEAL_SEAL);
933 
934 	/* verify that sealing no longer works */
935 	mfd_fail_add_seals(fd, F_SEAL_GROW);
936 	mfd_fail_add_seals(fd, 0);
937 
938 	close(fd);
939 
940 	/* verify sealing does not work without MFD_ALLOW_SEALING */
941 	fd = mfd_assert_new("kern_memfd_basic",
942 			    mfd_def_size,
943 			    MFD_CLOEXEC);
944 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
945 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
946 			       F_SEAL_GROW |
947 			       F_SEAL_WRITE);
948 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
949 	close(fd);
950 }
951 
952 /*
953  * Test SEAL_WRITE
954  * Test whether SEAL_WRITE actually prevents modifications.
955  */
956 static void test_seal_write(void)
957 {
958 	int fd;
959 
960 	printf("%s SEAL-WRITE\n", memfd_str);
961 
962 	fd = mfd_assert_new("kern_memfd_seal_write",
963 			    mfd_def_size,
964 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
965 	mfd_assert_has_seals(fd, 0);
966 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
967 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
968 
969 	mfd_assert_read(fd);
970 	mfd_fail_write(fd);
971 	mfd_assert_shrink(fd);
972 	mfd_assert_grow(fd);
973 	mfd_fail_grow_write(fd);
974 
975 	close(fd);
976 }
977 
978 /*
979  * Test SEAL_FUTURE_WRITE
980  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
981  */
982 static void test_seal_future_write(void)
983 {
984 	int fd, fd2;
985 	void *p;
986 
987 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
988 
989 	fd = mfd_assert_new("kern_memfd_seal_future_write",
990 			    mfd_def_size,
991 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
992 
993 	p = mfd_assert_mmap_shared(fd);
994 
995 	mfd_assert_has_seals(fd, 0);
996 
997 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
998 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
999 
1000 	/* read should pass, writes should fail */
1001 	mfd_assert_read(fd);
1002 	mfd_assert_read_shared(fd);
1003 	mfd_fail_write(fd);
1004 
1005 	fd2 = mfd_assert_reopen_fd(fd);
1006 	/* read should pass, writes should still fail */
1007 	mfd_assert_read(fd2);
1008 	mfd_assert_read_shared(fd2);
1009 	mfd_fail_write(fd2);
1010 
1011 	mfd_assert_fork_private_write(fd);
1012 
1013 	munmap(p, mfd_def_size);
1014 	close(fd2);
1015 	close(fd);
1016 }
1017 
1018 static void test_seal_write_map_read_shared(void)
1019 {
1020 	int fd;
1021 	void *p;
1022 
1023 	printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
1024 
1025 	fd = mfd_assert_new("kern_memfd_seal_write_map_read",
1026 			    mfd_def_size,
1027 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1028 
1029 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1030 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1031 
1032 	p = mfd_assert_mmap_read_shared(fd);
1033 
1034 	mfd_assert_read(fd);
1035 	mfd_assert_read_shared(fd);
1036 	mfd_fail_write(fd);
1037 
1038 	munmap(p, mfd_def_size);
1039 	close(fd);
1040 }
1041 
1042 /*
1043  * Test SEAL_SHRINK
1044  * Test whether SEAL_SHRINK actually prevents shrinking
1045  */
1046 static void test_seal_shrink(void)
1047 {
1048 	int fd;
1049 
1050 	printf("%s SEAL-SHRINK\n", memfd_str);
1051 
1052 	fd = mfd_assert_new("kern_memfd_seal_shrink",
1053 			    mfd_def_size,
1054 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1055 	mfd_assert_has_seals(fd, 0);
1056 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1057 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1058 
1059 	mfd_assert_read(fd);
1060 	mfd_assert_write(fd);
1061 	mfd_fail_shrink(fd);
1062 	mfd_assert_grow(fd);
1063 	mfd_assert_grow_write(fd);
1064 
1065 	close(fd);
1066 }
1067 
1068 /*
1069  * Test SEAL_GROW
1070  * Test whether SEAL_GROW actually prevents growing
1071  */
1072 static void test_seal_grow(void)
1073 {
1074 	int fd;
1075 
1076 	printf("%s SEAL-GROW\n", memfd_str);
1077 
1078 	fd = mfd_assert_new("kern_memfd_seal_grow",
1079 			    mfd_def_size,
1080 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1081 	mfd_assert_has_seals(fd, 0);
1082 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1083 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1084 
1085 	mfd_assert_read(fd);
1086 	mfd_assert_write(fd);
1087 	mfd_assert_shrink(fd);
1088 	mfd_fail_grow(fd);
1089 	mfd_fail_grow_write(fd);
1090 
1091 	close(fd);
1092 }
1093 
1094 /*
1095  * Test SEAL_SHRINK | SEAL_GROW
1096  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1097  */
1098 static void test_seal_resize(void)
1099 {
1100 	int fd;
1101 
1102 	printf("%s SEAL-RESIZE\n", memfd_str);
1103 
1104 	fd = mfd_assert_new("kern_memfd_seal_resize",
1105 			    mfd_def_size,
1106 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1107 	mfd_assert_has_seals(fd, 0);
1108 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1109 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1110 
1111 	mfd_assert_read(fd);
1112 	mfd_assert_write(fd);
1113 	mfd_fail_shrink(fd);
1114 	mfd_fail_grow(fd);
1115 	mfd_fail_grow_write(fd);
1116 
1117 	close(fd);
1118 }
1119 
1120 /*
1121  * Test SEAL_EXEC
1122  * Test fd is created with exec and allow sealing.
1123  * chmod() cannot change x bits after sealing.
1124  */
1125 static void test_exec_seal(void)
1126 {
1127 	int fd;
1128 
1129 	printf("%s SEAL-EXEC\n", memfd_str);
1130 
1131 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1132 	fd = mfd_assert_new("kern_memfd_seal_exec",
1133 			    mfd_def_size,
1134 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1135 
1136 	mfd_assert_mode(fd, 0777);
1137 	mfd_assert_chmod(fd, 0644);
1138 
1139 	mfd_assert_has_seals(fd, 0);
1140 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1141 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1142 
1143 	mfd_assert_chmod(fd, 0600);
1144 	mfd_fail_chmod(fd, 0777);
1145 	mfd_fail_chmod(fd, 0670);
1146 	mfd_fail_chmod(fd, 0605);
1147 	mfd_fail_chmod(fd, 0700);
1148 	mfd_fail_chmod(fd, 0100);
1149 	mfd_assert_chmod(fd, 0666);
1150 	mfd_assert_write(fd);
1151 	close(fd);
1152 
1153 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1154 	fd = mfd_assert_new("kern_memfd_seal_exec",
1155 			    mfd_def_size,
1156 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1157 
1158 	mfd_assert_mode(fd, 0777);
1159 	mfd_assert_chmod(fd, 0700);
1160 
1161 	mfd_assert_has_seals(fd, 0);
1162 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1163 	mfd_assert_has_seals(fd, F_WX_SEALS);
1164 
1165 	mfd_fail_chmod(fd, 0711);
1166 	mfd_fail_chmod(fd, 0600);
1167 	mfd_fail_write(fd);
1168 	close(fd);
1169 }
1170 
1171 /*
1172  * Test EXEC_NO_SEAL
1173  * Test fd is created with exec and not allow sealing.
1174  */
1175 static void test_exec_no_seal(void)
1176 {
1177 	int fd;
1178 
1179 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1180 
1181 	/* Create with EXEC but without ALLOW_SEALING */
1182 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1183 			    mfd_def_size,
1184 			    MFD_CLOEXEC | MFD_EXEC);
1185 	mfd_assert_mode(fd, 0777);
1186 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1187 	mfd_assert_chmod(fd, 0666);
1188 	close(fd);
1189 }
1190 
1191 /*
1192  * Test memfd_create with MFD_NOEXEC flag
1193  */
1194 static void test_noexec_seal(void)
1195 {
1196 	int fd;
1197 
1198 	printf("%s NOEXEC_SEAL\n", memfd_str);
1199 
1200 	/* Create with NOEXEC and ALLOW_SEALING */
1201 	fd = mfd_assert_new("kern_memfd_noexec",
1202 			    mfd_def_size,
1203 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1204 	mfd_assert_mode(fd, 0666);
1205 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1206 	mfd_fail_chmod(fd, 0777);
1207 	close(fd);
1208 
1209 	/* Create with NOEXEC but without ALLOW_SEALING */
1210 	fd = mfd_assert_new("kern_memfd_noexec",
1211 			    mfd_def_size,
1212 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1213 	mfd_assert_mode(fd, 0666);
1214 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1215 	mfd_fail_chmod(fd, 0777);
1216 	close(fd);
1217 }
1218 
1219 static void test_sysctl_sysctl0(void)
1220 {
1221 	int fd;
1222 
1223 	sysctl_assert_equal("0");
1224 
1225 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1226 			    mfd_def_size,
1227 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1228 	mfd_assert_mode(fd, 0777);
1229 	mfd_assert_has_seals(fd, 0);
1230 	mfd_assert_chmod(fd, 0644);
1231 	close(fd);
1232 }
1233 
1234 static void test_sysctl_set_sysctl0(void)
1235 {
1236 	sysctl_assert_write("0");
1237 	test_sysctl_sysctl0();
1238 }
1239 
1240 static void test_sysctl_sysctl1(void)
1241 {
1242 	int fd;
1243 
1244 	sysctl_assert_equal("1");
1245 
1246 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1247 			    mfd_def_size,
1248 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1249 	mfd_assert_mode(fd, 0666);
1250 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1251 	mfd_fail_chmod(fd, 0777);
1252 	close(fd);
1253 
1254 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1255 			    mfd_def_size,
1256 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1257 	mfd_assert_mode(fd, 0777);
1258 	mfd_assert_has_seals(fd, 0);
1259 	mfd_assert_chmod(fd, 0644);
1260 	close(fd);
1261 
1262 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1263 			    mfd_def_size,
1264 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1265 	mfd_assert_mode(fd, 0666);
1266 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1267 	mfd_fail_chmod(fd, 0777);
1268 	close(fd);
1269 }
1270 
1271 static void test_sysctl_set_sysctl1(void)
1272 {
1273 	sysctl_assert_write("1");
1274 	test_sysctl_sysctl1();
1275 }
1276 
1277 static void test_sysctl_sysctl2(void)
1278 {
1279 	int fd;
1280 
1281 	sysctl_assert_equal("2");
1282 
1283 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1284 			    mfd_def_size,
1285 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1286 	mfd_assert_mode(fd, 0666);
1287 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1288 	mfd_fail_chmod(fd, 0777);
1289 	close(fd);
1290 
1291 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1292 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1293 
1294 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1295 			    mfd_def_size,
1296 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1297 	mfd_assert_mode(fd, 0666);
1298 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1299 	mfd_fail_chmod(fd, 0777);
1300 	close(fd);
1301 }
1302 
1303 static void test_sysctl_set_sysctl2(void)
1304 {
1305 	sysctl_assert_write("2");
1306 	test_sysctl_sysctl2();
1307 }
1308 
1309 static int sysctl_simple_child(void *arg)
1310 {
1311 	printf("%s sysctl 0\n", memfd_str);
1312 	test_sysctl_set_sysctl0();
1313 
1314 	printf("%s sysctl 1\n", memfd_str);
1315 	test_sysctl_set_sysctl1();
1316 
1317 	printf("%s sysctl 0\n", memfd_str);
1318 	test_sysctl_set_sysctl0();
1319 
1320 	printf("%s sysctl 2\n", memfd_str);
1321 	test_sysctl_set_sysctl2();
1322 
1323 	printf("%s sysctl 1\n", memfd_str);
1324 	test_sysctl_set_sysctl1();
1325 
1326 	printf("%s sysctl 0\n", memfd_str);
1327 	test_sysctl_set_sysctl0();
1328 
1329 	return 0;
1330 }
1331 
1332 /*
1333  * Test sysctl
1334  * A very basic test to make sure the core sysctl semantics work.
1335  */
1336 static void test_sysctl_simple(void)
1337 {
1338 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1339 
1340 	join_thread(pid);
1341 }
1342 
1343 static int sysctl_nested(void *arg)
1344 {
1345 	void (*fn)(void) = arg;
1346 
1347 	fn();
1348 	return 0;
1349 }
1350 
1351 static int sysctl_nested_wait(void *arg)
1352 {
1353 	int sem = semget(SEM_KEY, 1, 0600);
1354 	struct sembuf sembuf;
1355 
1356 	if (sem < 0) {
1357 		perror("semget:");
1358 		abort();
1359 	}
1360 	sembuf.sem_num = 0;
1361 	sembuf.sem_flg = 0;
1362 	sembuf.sem_op = 0;
1363 
1364 	if (semop(sem, &sembuf, 1) < 0) {
1365 		perror("semop:");
1366 		abort();
1367 	}
1368 
1369 	return sysctl_nested(arg);
1370 }
1371 
1372 static void test_sysctl_sysctl1_failset(void)
1373 {
1374 	sysctl_fail_write("0");
1375 	test_sysctl_sysctl1();
1376 }
1377 
1378 static void test_sysctl_sysctl2_failset(void)
1379 {
1380 	sysctl_fail_write("1");
1381 	test_sysctl_sysctl2();
1382 
1383 	sysctl_fail_write("0");
1384 	test_sysctl_sysctl2();
1385 }
1386 
1387 static int sysctl_nested_child(void *arg)
1388 {
1389 	int pid, sem;
1390 	union semun semun;
1391 	struct sembuf sembuf;
1392 
1393 	printf("%s nested sysctl 0\n", memfd_str);
1394 	sysctl_assert_write("0");
1395 	/* A further nested pidns works the same. */
1396 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1397 	join_thread(pid);
1398 
1399 	printf("%s nested sysctl 1\n", memfd_str);
1400 	sysctl_assert_write("1");
1401 	/* Child inherits our setting. */
1402 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1403 	join_thread(pid);
1404 	/* Child cannot raise the setting. */
1405 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1406 			   test_sysctl_sysctl1_failset);
1407 	join_thread(pid);
1408 	/* Child can lower the setting. */
1409 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1410 			   test_sysctl_set_sysctl2);
1411 	join_thread(pid);
1412 	/* Child lowering the setting has no effect on our setting. */
1413 	test_sysctl_sysctl1();
1414 
1415 	printf("%s nested sysctl 2\n", memfd_str);
1416 	sysctl_assert_write("2");
1417 	/* Child inherits our setting. */
1418 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1419 	join_thread(pid);
1420 	/* Child cannot raise the setting. */
1421 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1422 			   test_sysctl_sysctl2_failset);
1423 	join_thread(pid);
1424 
1425 	sem = semget(SEM_KEY, 1, IPC_CREAT | 0600);
1426 	if (sem < 0) {
1427 		perror("semget:");
1428 		return 1;
1429 	}
1430 	semun.val = 1;
1431 	sembuf.sem_op = -1;
1432 	sembuf.sem_flg = 0;
1433 	sembuf.sem_num = 0;
1434 
1435 	/* Verify that the rules are actually inherited after fork. */
1436 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1437 	sysctl_assert_write("0");
1438 
1439 	if (semctl(sem, 0, SETVAL, semun) < 0) {
1440 		perror("semctl:");
1441 		return 1;
1442 	}
1443 
1444 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1445 			   test_sysctl_sysctl1_failset);
1446 	sysctl_assert_write("1");
1447 
1448 	/* Allow child to continue */
1449 	if (semop(sem, &sembuf, 1) < 0) {
1450 		perror("semop:");
1451 		return 1;
1452 	}
1453 	join_thread(pid);
1454 
1455 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1456 	sysctl_assert_write("0");
1457 
1458 	if (semctl(sem, 0, SETVAL, semun) < 0) {
1459 		perror("semctl:");
1460 		return 1;
1461 	}
1462 
1463 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1464 			   test_sysctl_sysctl2_failset);
1465 	sysctl_assert_write("2");
1466 
1467 	/* Allow child to continue */
1468 	if (semop(sem, &sembuf, 1) < 0) {
1469 		perror("semop:");
1470 		return 1;
1471 	}
1472 	join_thread(pid);
1473 
1474 	/*
1475 	 * Verify that the current effective setting is saved on fork, meaning
1476 	 * that the parent lowering the sysctl doesn't affect already-forked
1477 	 * children.
1478 	 */
1479 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1480 	sysctl_assert_write("2");
1481 
1482 	if (semctl(sem, 0, SETVAL, semun) < 0) {
1483 		perror("semctl:");
1484 		return 1;
1485 	}
1486 
1487 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1488 			   test_sysctl_sysctl2);
1489 	sysctl_assert_write("1");
1490 
1491 	/* Allow child to continue */
1492 	if (semop(sem, &sembuf, 1) < 0) {
1493 		perror("semop:");
1494 		return 1;
1495 	}
1496 	join_thread(pid);
1497 
1498 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1499 	sysctl_assert_write("2");
1500 
1501 	if (semctl(sem, 0, SETVAL, semun) < 0) {
1502 		perror("semctl:");
1503 		return 1;
1504 	}
1505 
1506 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1507 			   test_sysctl_sysctl2);
1508 	sysctl_assert_write("0");
1509 
1510 	/* Allow child to continue */
1511 	if (semop(sem, &sembuf, 1) < 0) {
1512 		perror("semop:");
1513 		return 1;
1514 	}
1515 	join_thread(pid);
1516 
1517 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1518 	sysctl_assert_write("1");
1519 
1520 	if (semctl(sem, 0, SETVAL, semun) < 0) {
1521 		perror("semctl:");
1522 		return 1;
1523 	}
1524 
1525 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1526 			   test_sysctl_sysctl1);
1527 	sysctl_assert_write("0");
1528 	/* Allow child to continue */
1529 	if (semop(sem, &sembuf, 1) < 0) {
1530 		perror("semop:");
1531 		return 1;
1532 	}
1533 	join_thread(pid);
1534 
1535 	semctl(sem, 0, IPC_RMID);
1536 
1537 	return 0;
1538 }
1539 
1540 /*
1541  * Test sysctl with nested pid namespaces
1542  * Make sure that the sysctl nesting semantics work correctly.
1543  */
1544 static void test_sysctl_nested(void)
1545 {
1546 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1547 
1548 	join_thread(pid);
1549 }
1550 
1551 /*
1552  * Test sharing via dup()
1553  * Test that seals are shared between dupped FDs and they're all equal.
1554  */
1555 static void test_share_dup(char *banner, char *b_suffix)
1556 {
1557 	int fd, fd2;
1558 
1559 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1560 
1561 	fd = mfd_assert_new("kern_memfd_share_dup",
1562 			    mfd_def_size,
1563 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1564 	mfd_assert_has_seals(fd, 0);
1565 
1566 	fd2 = mfd_assert_dup(fd);
1567 	mfd_assert_has_seals(fd2, 0);
1568 
1569 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1570 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1571 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1572 
1573 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1574 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1575 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1576 
1577 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1578 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1579 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1580 
1581 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1582 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1583 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1584 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1585 
1586 	close(fd2);
1587 
1588 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1589 	close(fd);
1590 }
1591 
1592 /*
1593  * Test sealing with active mmap()s
1594  * Modifying seals is only allowed if no other mmap() refs exist.
1595  */
1596 static void test_share_mmap(char *banner, char *b_suffix)
1597 {
1598 	int fd;
1599 	void *p;
1600 
1601 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1602 
1603 	fd = mfd_assert_new("kern_memfd_share_mmap",
1604 			    mfd_def_size,
1605 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1606 	mfd_assert_has_seals(fd, 0);
1607 
1608 	/* shared/writable ref prevents sealing WRITE, but allows others */
1609 	p = mfd_assert_mmap_shared(fd);
1610 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1611 	mfd_assert_has_seals(fd, 0);
1612 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1613 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1614 	munmap(p, mfd_def_size);
1615 
1616 	/* readable ref allows sealing */
1617 	p = mfd_assert_mmap_private(fd);
1618 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1619 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1620 	munmap(p, mfd_def_size);
1621 
1622 	close(fd);
1623 }
1624 
1625 /*
1626  * Test sealing with open(/proc/self/fd/%d)
1627  * Via /proc we can get access to a separate file-context for the same memfd.
1628  * This is *not* like dup(), but like a real separate open(). Make sure the
1629  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1630  */
1631 static void test_share_open(char *banner, char *b_suffix)
1632 {
1633 	int fd, fd2;
1634 
1635 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1636 
1637 	fd = mfd_assert_new("kern_memfd_share_open",
1638 			    mfd_def_size,
1639 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1640 	mfd_assert_has_seals(fd, 0);
1641 
1642 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1643 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1644 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1645 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1646 
1647 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1648 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1649 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1650 
1651 	close(fd);
1652 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1653 
1654 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1655 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1656 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1657 
1658 	close(fd2);
1659 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1660 
1661 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1662 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1663 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1664 
1665 	close(fd2);
1666 	close(fd);
1667 }
1668 
1669 /*
1670  * Test sharing via fork()
1671  * Test whether seal-modifications work as expected with forked children.
1672  */
1673 static void test_share_fork(char *banner, char *b_suffix)
1674 {
1675 	int fd;
1676 	pid_t pid;
1677 
1678 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1679 
1680 	fd = mfd_assert_new("kern_memfd_share_fork",
1681 			    mfd_def_size,
1682 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1683 	mfd_assert_has_seals(fd, 0);
1684 
1685 	pid = spawn_idle_thread(0);
1686 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1687 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1688 
1689 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1690 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1691 
1692 	join_idle_thread(pid);
1693 
1694 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1695 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1696 
1697 	close(fd);
1698 }
1699 
1700 static bool pid_ns_supported(void)
1701 {
1702 	return access("/proc/self/ns/pid", F_OK) == 0;
1703 }
1704 
1705 int main(int argc, char **argv)
1706 {
1707 	pid_t pid;
1708 
1709 	if (argc == 2) {
1710 		if (!strcmp(argv[1], "hugetlbfs")) {
1711 			unsigned long hpage_size = default_huge_page_size();
1712 
1713 			if (!hpage_size) {
1714 				printf("Unable to determine huge page size\n");
1715 				abort();
1716 			}
1717 
1718 			hugetlbfs_test = 1;
1719 			memfd_str = MEMFD_HUGE_STR;
1720 			mfd_def_size = hpage_size * 2;
1721 		} else {
1722 			printf("Unknown option: %s\n", argv[1]);
1723 			abort();
1724 		}
1725 	}
1726 
1727 	test_create();
1728 	test_basic();
1729 	test_exec_seal();
1730 	test_exec_no_seal();
1731 	test_noexec_seal();
1732 
1733 	test_seal_write();
1734 	test_seal_future_write();
1735 	test_seal_write_map_read_shared();
1736 	test_seal_shrink();
1737 	test_seal_grow();
1738 	test_seal_resize();
1739 
1740 	if (pid_ns_supported()) {
1741 		test_sysctl_simple();
1742 		test_sysctl_nested();
1743 	} else {
1744 		printf("PID namespaces are not supported; skipping sysctl tests\n");
1745 	}
1746 
1747 	test_share_dup("SHARE-DUP", "");
1748 	test_share_mmap("SHARE-MMAP", "");
1749 	test_share_open("SHARE-OPEN", "");
1750 	test_share_fork("SHARE-FORK", "");
1751 
1752 	/* Run test-suite in a multi-threaded environment with a shared
1753 	 * file-table. */
1754 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1755 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1756 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1757 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1758 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1759 	join_idle_thread(pid);
1760 
1761 	printf("memfd: DONE\n");
1762 
1763 	return 0;
1764 }
1765