xref: /linux/tools/testing/selftests/memfd/memfd_test.c (revision 5635d8bad221701188017a6087fbe25ab245c226)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4 
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <signal.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/stat.h>
19 #include <sys/syscall.h>
20 #include <sys/wait.h>
21 #include <unistd.h>
22 #include <ctype.h>
23 
24 #include "common.h"
25 
26 #define MEMFD_STR	"memfd:"
27 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
28 #define SHARED_FT_STR	"(shared file-table)"
29 
30 #define MFD_DEF_SIZE 8192
31 #define STACK_SIZE 65536
32 
33 #define F_SEAL_EXEC	0x0020
34 
35 #define F_WX_SEALS (F_SEAL_SHRINK | \
36 		    F_SEAL_GROW | \
37 		    F_SEAL_WRITE | \
38 		    F_SEAL_FUTURE_WRITE | \
39 		    F_SEAL_EXEC)
40 
41 #define MFD_NOEXEC_SEAL	0x0008U
42 
43 /*
44  * Default is not to test hugetlbfs
45  */
46 static size_t mfd_def_size = MFD_DEF_SIZE;
47 static const char *memfd_str = MEMFD_STR;
48 
fd2name(int fd,char * buf,size_t bufsize)49 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
50 {
51 	char buf1[PATH_MAX];
52 	int size;
53 	ssize_t nbytes;
54 
55 	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
56 	if (size < 0) {
57 		printf("snprintf(%d) failed on %m\n", fd);
58 		abort();
59 	}
60 
61 	/*
62 	 * reserver one byte for string termination.
63 	 */
64 	nbytes = readlink(buf1, buf, bufsize-1);
65 	if (nbytes == -1) {
66 		printf("readlink(%s) failed %m\n", buf1);
67 		abort();
68 	}
69 	buf[nbytes] = '\0';
70 	return nbytes;
71 }
72 
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)73 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
74 {
75 	int r, fd;
76 
77 	fd = sys_memfd_create(name, flags);
78 	if (fd < 0) {
79 		printf("memfd_create(\"%s\", %u) failed: %m\n",
80 		       name, flags);
81 		abort();
82 	}
83 
84 	r = ftruncate(fd, sz);
85 	if (r < 0) {
86 		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
87 		abort();
88 	}
89 
90 	return fd;
91 }
92 
sysctl_assert_write(const char * val)93 static void sysctl_assert_write(const char *val)
94 {
95 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
96 
97 	if (fd < 0) {
98 		printf("open sysctl failed: %m\n");
99 		abort();
100 	}
101 
102 	if (write(fd, val, strlen(val)) < 0) {
103 		printf("write sysctl %s failed: %m\n", val);
104 		abort();
105 	}
106 }
107 
sysctl_fail_write(const char * val)108 static void sysctl_fail_write(const char *val)
109 {
110 	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
111 
112 	if (fd < 0) {
113 		printf("open sysctl failed: %m\n");
114 		abort();
115 	}
116 
117 	if (write(fd, val, strlen(val)) >= 0) {
118 		printf("write sysctl %s succeeded, but failure expected\n",
119 				val);
120 		abort();
121 	}
122 }
123 
sysctl_assert_equal(const char * val)124 static void sysctl_assert_equal(const char *val)
125 {
126 	char *p, buf[128] = {};
127 	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
128 
129 	if (fd < 0) {
130 		printf("open sysctl failed: %m\n");
131 		abort();
132 	}
133 
134 	if (read(fd, buf, sizeof(buf)) < 0) {
135 		printf("read sysctl failed: %m\n");
136 		abort();
137 	}
138 
139 	/* Strip trailing whitespace. */
140 	p = buf;
141 	while (!isspace(*p))
142 		p++;
143 	*p = '\0';
144 
145 	if (strcmp(buf, val) != 0) {
146 		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
147 		abort();
148 	}
149 }
150 
mfd_assert_reopen_fd(int fd_in)151 static int mfd_assert_reopen_fd(int fd_in)
152 {
153 	int fd;
154 	char path[100];
155 
156 	sprintf(path, "/proc/self/fd/%d", fd_in);
157 
158 	fd = open(path, O_RDWR);
159 	if (fd < 0) {
160 		printf("re-open of existing fd %d failed\n", fd_in);
161 		abort();
162 	}
163 
164 	return fd;
165 }
166 
mfd_fail_new(const char * name,unsigned int flags)167 static void mfd_fail_new(const char *name, unsigned int flags)
168 {
169 	int r;
170 
171 	r = sys_memfd_create(name, flags);
172 	if (r >= 0) {
173 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
174 		       name, flags);
175 		close(r);
176 		abort();
177 	}
178 }
179 
mfd_assert_get_seals(int fd)180 static unsigned int mfd_assert_get_seals(int fd)
181 {
182 	int r;
183 
184 	r = fcntl(fd, F_GET_SEALS);
185 	if (r < 0) {
186 		printf("GET_SEALS(%d) failed: %m\n", fd);
187 		abort();
188 	}
189 
190 	return (unsigned int)r;
191 }
192 
mfd_assert_has_seals(int fd,unsigned int seals)193 static void mfd_assert_has_seals(int fd, unsigned int seals)
194 {
195 	char buf[PATH_MAX];
196 	unsigned int s;
197 	fd2name(fd, buf, PATH_MAX);
198 
199 	s = mfd_assert_get_seals(fd);
200 	if (s != seals) {
201 		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
202 		abort();
203 	}
204 }
205 
mfd_assert_add_seals(int fd,unsigned int seals)206 static void mfd_assert_add_seals(int fd, unsigned int seals)
207 {
208 	int r;
209 	unsigned int s;
210 
211 	s = mfd_assert_get_seals(fd);
212 	r = fcntl(fd, F_ADD_SEALS, seals);
213 	if (r < 0) {
214 		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
215 		abort();
216 	}
217 }
218 
mfd_fail_add_seals(int fd,unsigned int seals)219 static void mfd_fail_add_seals(int fd, unsigned int seals)
220 {
221 	int r;
222 	unsigned int s;
223 
224 	r = fcntl(fd, F_GET_SEALS);
225 	if (r < 0)
226 		s = 0;
227 	else
228 		s = (unsigned int)r;
229 
230 	r = fcntl(fd, F_ADD_SEALS, seals);
231 	if (r >= 0) {
232 		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
233 				fd, s, seals);
234 		abort();
235 	}
236 }
237 
mfd_assert_size(int fd,size_t size)238 static void mfd_assert_size(int fd, size_t size)
239 {
240 	struct stat st;
241 	int r;
242 
243 	r = fstat(fd, &st);
244 	if (r < 0) {
245 		printf("fstat(%d) failed: %m\n", fd);
246 		abort();
247 	} else if (st.st_size != size) {
248 		printf("wrong file size %lld, but expected %lld\n",
249 		       (long long)st.st_size, (long long)size);
250 		abort();
251 	}
252 }
253 
mfd_assert_dup(int fd)254 static int mfd_assert_dup(int fd)
255 {
256 	int r;
257 
258 	r = dup(fd);
259 	if (r < 0) {
260 		printf("dup(%d) failed: %m\n", fd);
261 		abort();
262 	}
263 
264 	return r;
265 }
266 
mfd_assert_mmap_shared(int fd)267 static void *mfd_assert_mmap_shared(int fd)
268 {
269 	void *p;
270 
271 	p = mmap(NULL,
272 		 mfd_def_size,
273 		 PROT_READ | PROT_WRITE,
274 		 MAP_SHARED,
275 		 fd,
276 		 0);
277 	if (p == MAP_FAILED) {
278 		printf("mmap() failed: %m\n");
279 		abort();
280 	}
281 
282 	return p;
283 }
284 
mfd_assert_mmap_read_shared(int fd)285 static void *mfd_assert_mmap_read_shared(int fd)
286 {
287 	void *p;
288 
289 	p = mmap(NULL,
290 		 mfd_def_size,
291 		 PROT_READ,
292 		 MAP_SHARED,
293 		 fd,
294 		 0);
295 	if (p == MAP_FAILED) {
296 		printf("mmap() failed: %m\n");
297 		abort();
298 	}
299 
300 	return p;
301 }
302 
mfd_assert_mmap_private(int fd)303 static void *mfd_assert_mmap_private(int fd)
304 {
305 	void *p;
306 
307 	p = mmap(NULL,
308 		 mfd_def_size,
309 		 PROT_READ,
310 		 MAP_PRIVATE,
311 		 fd,
312 		 0);
313 	if (p == MAP_FAILED) {
314 		printf("mmap() failed: %m\n");
315 		abort();
316 	}
317 
318 	return p;
319 }
320 
mfd_assert_open(int fd,int flags,mode_t mode)321 static int mfd_assert_open(int fd, int flags, mode_t mode)
322 {
323 	char buf[512];
324 	int r;
325 
326 	sprintf(buf, "/proc/self/fd/%d", fd);
327 	r = open(buf, flags, mode);
328 	if (r < 0) {
329 		printf("open(%s) failed: %m\n", buf);
330 		abort();
331 	}
332 
333 	return r;
334 }
335 
mfd_fail_open(int fd,int flags,mode_t mode)336 static void mfd_fail_open(int fd, int flags, mode_t mode)
337 {
338 	char buf[512];
339 	int r;
340 
341 	sprintf(buf, "/proc/self/fd/%d", fd);
342 	r = open(buf, flags, mode);
343 	if (r >= 0) {
344 		printf("open(%s) didn't fail as expected\n", buf);
345 		abort();
346 	}
347 }
348 
mfd_assert_read(int fd)349 static void mfd_assert_read(int fd)
350 {
351 	char buf[16];
352 	void *p;
353 	ssize_t l;
354 
355 	l = read(fd, buf, sizeof(buf));
356 	if (l != sizeof(buf)) {
357 		printf("read() failed: %m\n");
358 		abort();
359 	}
360 
361 	/* verify PROT_READ *is* allowed */
362 	p = mmap(NULL,
363 		 mfd_def_size,
364 		 PROT_READ,
365 		 MAP_PRIVATE,
366 		 fd,
367 		 0);
368 	if (p == MAP_FAILED) {
369 		printf("mmap() failed: %m\n");
370 		abort();
371 	}
372 	munmap(p, mfd_def_size);
373 
374 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
375 	p = mmap(NULL,
376 		 mfd_def_size,
377 		 PROT_READ | PROT_WRITE,
378 		 MAP_PRIVATE,
379 		 fd,
380 		 0);
381 	if (p == MAP_FAILED) {
382 		printf("mmap() failed: %m\n");
383 		abort();
384 	}
385 	munmap(p, mfd_def_size);
386 }
387 
388 /* Test that PROT_READ + MAP_SHARED mappings work. */
mfd_assert_read_shared(int fd)389 static void mfd_assert_read_shared(int fd)
390 {
391 	void *p;
392 
393 	/* verify PROT_READ and MAP_SHARED *is* allowed */
394 	p = mmap(NULL,
395 		 mfd_def_size,
396 		 PROT_READ,
397 		 MAP_SHARED,
398 		 fd,
399 		 0);
400 	if (p == MAP_FAILED) {
401 		printf("mmap() failed: %m\n");
402 		abort();
403 	}
404 	munmap(p, mfd_def_size);
405 }
406 
mfd_assert_fork_private_write(int fd)407 static void mfd_assert_fork_private_write(int fd)
408 {
409 	int *p;
410 	pid_t pid;
411 
412 	p = mmap(NULL,
413 		 mfd_def_size,
414 		 PROT_READ | PROT_WRITE,
415 		 MAP_PRIVATE,
416 		 fd,
417 		 0);
418 	if (p == MAP_FAILED) {
419 		printf("mmap() failed: %m\n");
420 		abort();
421 	}
422 
423 	p[0] = 22;
424 
425 	pid = fork();
426 	if (pid == 0) {
427 		p[0] = 33;
428 		exit(0);
429 	} else {
430 		waitpid(pid, NULL, 0);
431 
432 		if (p[0] != 22) {
433 			printf("MAP_PRIVATE copy-on-write failed: %m\n");
434 			abort();
435 		}
436 	}
437 
438 	munmap(p, mfd_def_size);
439 }
440 
mfd_assert_write(int fd)441 static void mfd_assert_write(int fd)
442 {
443 	ssize_t l;
444 	void *p;
445 	int r;
446 
447 	/*
448 	 * huegtlbfs does not support write, but we want to
449 	 * verify everything else here.
450 	 */
451 	if (!hugetlbfs_test) {
452 		/* verify write() succeeds */
453 		l = write(fd, "\0\0\0\0", 4);
454 		if (l != 4) {
455 			printf("write() failed: %m\n");
456 			abort();
457 		}
458 	}
459 
460 	/* verify PROT_READ | PROT_WRITE is allowed */
461 	p = mmap(NULL,
462 		 mfd_def_size,
463 		 PROT_READ | PROT_WRITE,
464 		 MAP_SHARED,
465 		 fd,
466 		 0);
467 	if (p == MAP_FAILED) {
468 		printf("mmap() failed: %m\n");
469 		abort();
470 	}
471 	*(char *)p = 0;
472 	munmap(p, mfd_def_size);
473 
474 	/* verify PROT_WRITE is allowed */
475 	p = mmap(NULL,
476 		 mfd_def_size,
477 		 PROT_WRITE,
478 		 MAP_SHARED,
479 		 fd,
480 		 0);
481 	if (p == MAP_FAILED) {
482 		printf("mmap() failed: %m\n");
483 		abort();
484 	}
485 	*(char *)p = 0;
486 	munmap(p, mfd_def_size);
487 
488 	/* verify PROT_READ with MAP_SHARED is allowed and a following
489 	 * mprotect(PROT_WRITE) allows writing */
490 	p = mmap(NULL,
491 		 mfd_def_size,
492 		 PROT_READ,
493 		 MAP_SHARED,
494 		 fd,
495 		 0);
496 	if (p == MAP_FAILED) {
497 		printf("mmap() failed: %m\n");
498 		abort();
499 	}
500 
501 	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
502 	if (r < 0) {
503 		printf("mprotect() failed: %m\n");
504 		abort();
505 	}
506 
507 	*(char *)p = 0;
508 	munmap(p, mfd_def_size);
509 
510 	/* verify PUNCH_HOLE works */
511 	r = fallocate(fd,
512 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
513 		      0,
514 		      mfd_def_size);
515 	if (r < 0) {
516 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
517 		abort();
518 	}
519 }
520 
mfd_fail_write(int fd)521 static void mfd_fail_write(int fd)
522 {
523 	ssize_t l;
524 	void *p;
525 	int r;
526 
527 	/* verify write() fails */
528 	l = write(fd, "data", 4);
529 	if (l != -EPERM) {
530 		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
531 		abort();
532 	}
533 
534 	/* verify PROT_READ | PROT_WRITE is not allowed */
535 	p = mmap(NULL,
536 		 mfd_def_size,
537 		 PROT_READ | PROT_WRITE,
538 		 MAP_SHARED,
539 		 fd,
540 		 0);
541 	if (p != MAP_FAILED) {
542 		printf("mmap() didn't fail as expected\n");
543 		abort();
544 	}
545 
546 	/* verify PROT_WRITE is not allowed */
547 	p = mmap(NULL,
548 		 mfd_def_size,
549 		 PROT_WRITE,
550 		 MAP_SHARED,
551 		 fd,
552 		 0);
553 	if (p != MAP_FAILED) {
554 		printf("mmap() didn't fail as expected\n");
555 		abort();
556 	}
557 
558 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
559 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
560 	p = mmap(NULL,
561 		 mfd_def_size,
562 		 PROT_READ,
563 		 MAP_SHARED,
564 		 fd,
565 		 0);
566 	if (p != MAP_FAILED) {
567 		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
568 		if (r >= 0) {
569 			printf("mmap()+mprotect() didn't fail as expected\n");
570 			abort();
571 		}
572 		munmap(p, mfd_def_size);
573 	}
574 
575 	/* verify PUNCH_HOLE fails */
576 	r = fallocate(fd,
577 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
578 		      0,
579 		      mfd_def_size);
580 	if (r >= 0) {
581 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
582 		abort();
583 	}
584 }
585 
mfd_assert_shrink(int fd)586 static void mfd_assert_shrink(int fd)
587 {
588 	int r, fd2;
589 
590 	r = ftruncate(fd, mfd_def_size / 2);
591 	if (r < 0) {
592 		printf("ftruncate(SHRINK) failed: %m\n");
593 		abort();
594 	}
595 
596 	mfd_assert_size(fd, mfd_def_size / 2);
597 
598 	fd2 = mfd_assert_open(fd,
599 			      O_RDWR | O_CREAT | O_TRUNC,
600 			      S_IRUSR | S_IWUSR);
601 	close(fd2);
602 
603 	mfd_assert_size(fd, 0);
604 }
605 
mfd_fail_shrink(int fd)606 static void mfd_fail_shrink(int fd)
607 {
608 	int r;
609 
610 	r = ftruncate(fd, mfd_def_size / 2);
611 	if (r >= 0) {
612 		printf("ftruncate(SHRINK) didn't fail as expected\n");
613 		abort();
614 	}
615 
616 	mfd_fail_open(fd,
617 		      O_RDWR | O_CREAT | O_TRUNC,
618 		      S_IRUSR | S_IWUSR);
619 }
620 
mfd_assert_grow(int fd)621 static void mfd_assert_grow(int fd)
622 {
623 	int r;
624 
625 	r = ftruncate(fd, mfd_def_size * 2);
626 	if (r < 0) {
627 		printf("ftruncate(GROW) failed: %m\n");
628 		abort();
629 	}
630 
631 	mfd_assert_size(fd, mfd_def_size * 2);
632 
633 	r = fallocate(fd,
634 		      0,
635 		      0,
636 		      mfd_def_size * 4);
637 	if (r < 0) {
638 		printf("fallocate(ALLOC) failed: %m\n");
639 		abort();
640 	}
641 
642 	mfd_assert_size(fd, mfd_def_size * 4);
643 }
644 
mfd_fail_grow(int fd)645 static void mfd_fail_grow(int fd)
646 {
647 	int r;
648 
649 	r = ftruncate(fd, mfd_def_size * 2);
650 	if (r >= 0) {
651 		printf("ftruncate(GROW) didn't fail as expected\n");
652 		abort();
653 	}
654 
655 	r = fallocate(fd,
656 		      0,
657 		      0,
658 		      mfd_def_size * 4);
659 	if (r >= 0) {
660 		printf("fallocate(ALLOC) didn't fail as expected\n");
661 		abort();
662 	}
663 }
664 
mfd_assert_grow_write(int fd)665 static void mfd_assert_grow_write(int fd)
666 {
667 	static char *buf;
668 	ssize_t l;
669 
670 	/* hugetlbfs does not support write */
671 	if (hugetlbfs_test)
672 		return;
673 
674 	buf = malloc(mfd_def_size * 8);
675 	if (!buf) {
676 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
677 		abort();
678 	}
679 
680 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
681 	if (l != (mfd_def_size * 8)) {
682 		printf("pwrite() failed: %m\n");
683 		abort();
684 	}
685 
686 	mfd_assert_size(fd, mfd_def_size * 8);
687 }
688 
mfd_fail_grow_write(int fd)689 static void mfd_fail_grow_write(int fd)
690 {
691 	static char *buf;
692 	ssize_t l;
693 
694 	/* hugetlbfs does not support write */
695 	if (hugetlbfs_test)
696 		return;
697 
698 	buf = malloc(mfd_def_size * 8);
699 	if (!buf) {
700 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
701 		abort();
702 	}
703 
704 	l = pwrite(fd, buf, mfd_def_size * 8, 0);
705 	if (l == (mfd_def_size * 8)) {
706 		printf("pwrite() didn't fail as expected\n");
707 		abort();
708 	}
709 }
710 
mfd_assert_mode(int fd,int mode)711 static void mfd_assert_mode(int fd, int mode)
712 {
713 	struct stat st;
714 	char buf[PATH_MAX];
715 
716 	fd2name(fd, buf, PATH_MAX);
717 
718 	if (fstat(fd, &st) < 0) {
719 		printf("fstat(%s) failed: %m\n", buf);
720 		abort();
721 	}
722 
723 	if ((st.st_mode & 07777) != mode) {
724 		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
725 		       buf, (int)st.st_mode & 07777, mode);
726 		abort();
727 	}
728 }
729 
mfd_assert_chmod(int fd,int mode)730 static void mfd_assert_chmod(int fd, int mode)
731 {
732 	char buf[PATH_MAX];
733 
734 	fd2name(fd, buf, PATH_MAX);
735 
736 	if (fchmod(fd, mode) < 0) {
737 		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
738 		abort();
739 	}
740 
741 	mfd_assert_mode(fd, mode);
742 }
743 
mfd_fail_chmod(int fd,int mode)744 static void mfd_fail_chmod(int fd, int mode)
745 {
746 	struct stat st;
747 	char buf[PATH_MAX];
748 
749 	fd2name(fd, buf, PATH_MAX);
750 
751 	if (fstat(fd, &st) < 0) {
752 		printf("fstat(%s) failed: %m\n", buf);
753 		abort();
754 	}
755 
756 	if (fchmod(fd, mode) == 0) {
757 		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
758 		       buf, mode);
759 		abort();
760 	}
761 
762 	/* verify that file mode bits did not change */
763 	mfd_assert_mode(fd, st.st_mode & 07777);
764 }
765 
idle_thread_fn(void * arg)766 static int idle_thread_fn(void *arg)
767 {
768 	sigset_t set;
769 	int sig;
770 
771 	/* dummy waiter; SIGTERM terminates us anyway */
772 	sigemptyset(&set);
773 	sigaddset(&set, SIGTERM);
774 	sigwait(&set, &sig);
775 
776 	return 0;
777 }
778 
spawn_thread(unsigned int flags,int (* fn)(void *),void * arg)779 static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
780 {
781 	uint8_t *stack;
782 	pid_t pid;
783 
784 	stack = malloc(STACK_SIZE);
785 	if (!stack) {
786 		printf("malloc(STACK_SIZE) failed: %m\n");
787 		abort();
788 	}
789 
790 	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
791 	if (pid < 0) {
792 		printf("clone() failed: %m\n");
793 		abort();
794 	}
795 
796 	return pid;
797 }
798 
join_thread(pid_t pid)799 static void join_thread(pid_t pid)
800 {
801 	int wstatus;
802 
803 	if (waitpid(pid, &wstatus, 0) < 0) {
804 		printf("newpid thread: waitpid() failed: %m\n");
805 		abort();
806 	}
807 
808 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
809 		printf("newpid thread: exited with non-zero error code %d\n",
810 		       WEXITSTATUS(wstatus));
811 		abort();
812 	}
813 
814 	if (WIFSIGNALED(wstatus)) {
815 		printf("newpid thread: killed by signal %d\n",
816 		       WTERMSIG(wstatus));
817 		abort();
818 	}
819 }
820 
spawn_idle_thread(unsigned int flags)821 static pid_t spawn_idle_thread(unsigned int flags)
822 {
823 	return spawn_thread(flags, idle_thread_fn, NULL);
824 }
825 
join_idle_thread(pid_t pid)826 static void join_idle_thread(pid_t pid)
827 {
828 	kill(pid, SIGTERM);
829 	waitpid(pid, NULL, 0);
830 }
831 
832 /*
833  * Test memfd_create() syscall
834  * Verify syscall-argument validation, including name checks, flag validation
835  * and more.
836  */
test_create(void)837 static void test_create(void)
838 {
839 	char buf[2048];
840 	int fd;
841 
842 	printf("%s CREATE\n", memfd_str);
843 
844 	/* test NULL name */
845 	mfd_fail_new(NULL, 0);
846 
847 	/* test over-long name (not zero-terminated) */
848 	memset(buf, 0xff, sizeof(buf));
849 	mfd_fail_new(buf, 0);
850 
851 	/* test over-long zero-terminated name */
852 	memset(buf, 0xff, sizeof(buf));
853 	buf[sizeof(buf) - 1] = 0;
854 	mfd_fail_new(buf, 0);
855 
856 	/* verify "" is a valid name */
857 	fd = mfd_assert_new("", 0, 0);
858 	close(fd);
859 
860 	/* verify invalid O_* open flags */
861 	mfd_fail_new("", 0x0100);
862 	mfd_fail_new("", ~MFD_CLOEXEC);
863 	mfd_fail_new("", ~MFD_ALLOW_SEALING);
864 	mfd_fail_new("", ~0);
865 	mfd_fail_new("", 0x80000000U);
866 
867 	/* verify EXEC and NOEXEC_SEAL can't both be set */
868 	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
869 
870 	/* verify MFD_CLOEXEC is allowed */
871 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
872 	close(fd);
873 
874 	/* verify MFD_ALLOW_SEALING is allowed */
875 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
876 	close(fd);
877 
878 	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
879 	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
880 	close(fd);
881 }
882 
883 /*
884  * Test basic sealing
885  * A very basic sealing test to see whether setting/retrieving seals works.
886  */
test_basic(void)887 static void test_basic(void)
888 {
889 	int fd;
890 
891 	printf("%s BASIC\n", memfd_str);
892 
893 	fd = mfd_assert_new("kern_memfd_basic",
894 			    mfd_def_size,
895 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
896 
897 	/* add basic seals */
898 	mfd_assert_has_seals(fd, 0);
899 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
900 				 F_SEAL_WRITE);
901 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
902 				 F_SEAL_WRITE);
903 
904 	/* add them again */
905 	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
906 				 F_SEAL_WRITE);
907 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
908 				 F_SEAL_WRITE);
909 
910 	/* add more seals and seal against sealing */
911 	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
912 	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
913 				 F_SEAL_GROW |
914 				 F_SEAL_WRITE |
915 				 F_SEAL_SEAL);
916 
917 	/* verify that sealing no longer works */
918 	mfd_fail_add_seals(fd, F_SEAL_GROW);
919 	mfd_fail_add_seals(fd, 0);
920 
921 	close(fd);
922 
923 	/* verify sealing does not work without MFD_ALLOW_SEALING */
924 	fd = mfd_assert_new("kern_memfd_basic",
925 			    mfd_def_size,
926 			    MFD_CLOEXEC);
927 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
928 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
929 			       F_SEAL_GROW |
930 			       F_SEAL_WRITE);
931 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
932 	close(fd);
933 }
934 
935 /*
936  * Test SEAL_WRITE
937  * Test whether SEAL_WRITE actually prevents modifications.
938  */
test_seal_write(void)939 static void test_seal_write(void)
940 {
941 	int fd;
942 
943 	printf("%s SEAL-WRITE\n", memfd_str);
944 
945 	fd = mfd_assert_new("kern_memfd_seal_write",
946 			    mfd_def_size,
947 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
948 	mfd_assert_has_seals(fd, 0);
949 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
950 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
951 
952 	mfd_assert_read(fd);
953 	mfd_fail_write(fd);
954 	mfd_assert_shrink(fd);
955 	mfd_assert_grow(fd);
956 	mfd_fail_grow_write(fd);
957 
958 	close(fd);
959 }
960 
961 /*
962  * Test SEAL_FUTURE_WRITE
963  * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
964  */
test_seal_future_write(void)965 static void test_seal_future_write(void)
966 {
967 	int fd, fd2;
968 	void *p;
969 
970 	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
971 
972 	fd = mfd_assert_new("kern_memfd_seal_future_write",
973 			    mfd_def_size,
974 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
975 
976 	p = mfd_assert_mmap_shared(fd);
977 
978 	mfd_assert_has_seals(fd, 0);
979 
980 	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
981 	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
982 
983 	/* read should pass, writes should fail */
984 	mfd_assert_read(fd);
985 	mfd_assert_read_shared(fd);
986 	mfd_fail_write(fd);
987 
988 	fd2 = mfd_assert_reopen_fd(fd);
989 	/* read should pass, writes should still fail */
990 	mfd_assert_read(fd2);
991 	mfd_assert_read_shared(fd2);
992 	mfd_fail_write(fd2);
993 
994 	mfd_assert_fork_private_write(fd);
995 
996 	munmap(p, mfd_def_size);
997 	close(fd2);
998 	close(fd);
999 }
1000 
test_seal_write_map_read_shared(void)1001 static void test_seal_write_map_read_shared(void)
1002 {
1003 	int fd;
1004 	void *p;
1005 
1006 	printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
1007 
1008 	fd = mfd_assert_new("kern_memfd_seal_write_map_read",
1009 			    mfd_def_size,
1010 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1011 
1012 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1013 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1014 
1015 	p = mfd_assert_mmap_read_shared(fd);
1016 
1017 	mfd_assert_read(fd);
1018 	mfd_assert_read_shared(fd);
1019 	mfd_fail_write(fd);
1020 
1021 	munmap(p, mfd_def_size);
1022 	close(fd);
1023 }
1024 
1025 /*
1026  * Test SEAL_SHRINK
1027  * Test whether SEAL_SHRINK actually prevents shrinking
1028  */
test_seal_shrink(void)1029 static void test_seal_shrink(void)
1030 {
1031 	int fd;
1032 
1033 	printf("%s SEAL-SHRINK\n", memfd_str);
1034 
1035 	fd = mfd_assert_new("kern_memfd_seal_shrink",
1036 			    mfd_def_size,
1037 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1038 	mfd_assert_has_seals(fd, 0);
1039 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1040 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1041 
1042 	mfd_assert_read(fd);
1043 	mfd_assert_write(fd);
1044 	mfd_fail_shrink(fd);
1045 	mfd_assert_grow(fd);
1046 	mfd_assert_grow_write(fd);
1047 
1048 	close(fd);
1049 }
1050 
1051 /*
1052  * Test SEAL_GROW
1053  * Test whether SEAL_GROW actually prevents growing
1054  */
test_seal_grow(void)1055 static void test_seal_grow(void)
1056 {
1057 	int fd;
1058 
1059 	printf("%s SEAL-GROW\n", memfd_str);
1060 
1061 	fd = mfd_assert_new("kern_memfd_seal_grow",
1062 			    mfd_def_size,
1063 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1064 	mfd_assert_has_seals(fd, 0);
1065 	mfd_assert_add_seals(fd, F_SEAL_GROW);
1066 	mfd_assert_has_seals(fd, F_SEAL_GROW);
1067 
1068 	mfd_assert_read(fd);
1069 	mfd_assert_write(fd);
1070 	mfd_assert_shrink(fd);
1071 	mfd_fail_grow(fd);
1072 	mfd_fail_grow_write(fd);
1073 
1074 	close(fd);
1075 }
1076 
1077 /*
1078  * Test SEAL_SHRINK | SEAL_GROW
1079  * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1080  */
test_seal_resize(void)1081 static void test_seal_resize(void)
1082 {
1083 	int fd;
1084 
1085 	printf("%s SEAL-RESIZE\n", memfd_str);
1086 
1087 	fd = mfd_assert_new("kern_memfd_seal_resize",
1088 			    mfd_def_size,
1089 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1090 	mfd_assert_has_seals(fd, 0);
1091 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1092 	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1093 
1094 	mfd_assert_read(fd);
1095 	mfd_assert_write(fd);
1096 	mfd_fail_shrink(fd);
1097 	mfd_fail_grow(fd);
1098 	mfd_fail_grow_write(fd);
1099 
1100 	close(fd);
1101 }
1102 
1103 /*
1104  * Test SEAL_EXEC
1105  * Test fd is created with exec and allow sealing.
1106  * chmod() cannot change x bits after sealing.
1107  */
test_exec_seal(void)1108 static void test_exec_seal(void)
1109 {
1110 	int fd;
1111 
1112 	printf("%s SEAL-EXEC\n", memfd_str);
1113 
1114 	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1115 	fd = mfd_assert_new("kern_memfd_seal_exec",
1116 			    mfd_def_size,
1117 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1118 
1119 	mfd_assert_mode(fd, 0777);
1120 	mfd_assert_chmod(fd, 0644);
1121 
1122 	mfd_assert_has_seals(fd, 0);
1123 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1124 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1125 
1126 	mfd_assert_chmod(fd, 0600);
1127 	mfd_fail_chmod(fd, 0777);
1128 	mfd_fail_chmod(fd, 0670);
1129 	mfd_fail_chmod(fd, 0605);
1130 	mfd_fail_chmod(fd, 0700);
1131 	mfd_fail_chmod(fd, 0100);
1132 	mfd_assert_chmod(fd, 0666);
1133 	mfd_assert_write(fd);
1134 	close(fd);
1135 
1136 	printf("%s	Apply ALL_SEALS\n", memfd_str);
1137 	fd = mfd_assert_new("kern_memfd_seal_exec",
1138 			    mfd_def_size,
1139 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1140 
1141 	mfd_assert_mode(fd, 0777);
1142 	mfd_assert_chmod(fd, 0700);
1143 
1144 	mfd_assert_has_seals(fd, 0);
1145 	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1146 	mfd_assert_has_seals(fd, F_WX_SEALS);
1147 
1148 	mfd_fail_chmod(fd, 0711);
1149 	mfd_fail_chmod(fd, 0600);
1150 	mfd_fail_write(fd);
1151 	close(fd);
1152 }
1153 
1154 /*
1155  * Test EXEC_NO_SEAL
1156  * Test fd is created with exec and not allow sealing.
1157  */
test_exec_no_seal(void)1158 static void test_exec_no_seal(void)
1159 {
1160 	int fd;
1161 
1162 	printf("%s EXEC_NO_SEAL\n", memfd_str);
1163 
1164 	/* Create with EXEC but without ALLOW_SEALING */
1165 	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1166 			    mfd_def_size,
1167 			    MFD_CLOEXEC | MFD_EXEC);
1168 	mfd_assert_mode(fd, 0777);
1169 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1170 	mfd_assert_chmod(fd, 0666);
1171 	close(fd);
1172 }
1173 
1174 /*
1175  * Test memfd_create with MFD_NOEXEC flag
1176  */
test_noexec_seal(void)1177 static void test_noexec_seal(void)
1178 {
1179 	int fd;
1180 
1181 	printf("%s NOEXEC_SEAL\n", memfd_str);
1182 
1183 	/* Create with NOEXEC and ALLOW_SEALING */
1184 	fd = mfd_assert_new("kern_memfd_noexec",
1185 			    mfd_def_size,
1186 			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1187 	mfd_assert_mode(fd, 0666);
1188 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1189 	mfd_fail_chmod(fd, 0777);
1190 	close(fd);
1191 
1192 	/* Create with NOEXEC but without ALLOW_SEALING */
1193 	fd = mfd_assert_new("kern_memfd_noexec",
1194 			    mfd_def_size,
1195 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1196 	mfd_assert_mode(fd, 0666);
1197 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1198 	mfd_fail_chmod(fd, 0777);
1199 	close(fd);
1200 }
1201 
test_sysctl_sysctl0(void)1202 static void test_sysctl_sysctl0(void)
1203 {
1204 	int fd;
1205 
1206 	sysctl_assert_equal("0");
1207 
1208 	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1209 			    mfd_def_size,
1210 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1211 	mfd_assert_mode(fd, 0777);
1212 	mfd_assert_has_seals(fd, 0);
1213 	mfd_assert_chmod(fd, 0644);
1214 	close(fd);
1215 }
1216 
test_sysctl_set_sysctl0(void)1217 static void test_sysctl_set_sysctl0(void)
1218 {
1219 	sysctl_assert_write("0");
1220 	test_sysctl_sysctl0();
1221 }
1222 
test_sysctl_sysctl1(void)1223 static void test_sysctl_sysctl1(void)
1224 {
1225 	int fd;
1226 
1227 	sysctl_assert_equal("1");
1228 
1229 	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1230 			    mfd_def_size,
1231 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1232 	mfd_assert_mode(fd, 0666);
1233 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1234 	mfd_fail_chmod(fd, 0777);
1235 	close(fd);
1236 
1237 	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1238 			    mfd_def_size,
1239 			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1240 	mfd_assert_mode(fd, 0777);
1241 	mfd_assert_has_seals(fd, 0);
1242 	mfd_assert_chmod(fd, 0644);
1243 	close(fd);
1244 
1245 	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1246 			    mfd_def_size,
1247 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1248 	mfd_assert_mode(fd, 0666);
1249 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1250 	mfd_fail_chmod(fd, 0777);
1251 	close(fd);
1252 }
1253 
test_sysctl_set_sysctl1(void)1254 static void test_sysctl_set_sysctl1(void)
1255 {
1256 	sysctl_assert_write("1");
1257 	test_sysctl_sysctl1();
1258 }
1259 
test_sysctl_sysctl2(void)1260 static void test_sysctl_sysctl2(void)
1261 {
1262 	int fd;
1263 
1264 	sysctl_assert_equal("2");
1265 
1266 	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1267 			    mfd_def_size,
1268 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1269 	mfd_assert_mode(fd, 0666);
1270 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1271 	mfd_fail_chmod(fd, 0777);
1272 	close(fd);
1273 
1274 	mfd_fail_new("kern_memfd_sysctl_2_exec",
1275 		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1276 
1277 	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1278 			    mfd_def_size,
1279 			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1280 	mfd_assert_mode(fd, 0666);
1281 	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1282 	mfd_fail_chmod(fd, 0777);
1283 	close(fd);
1284 }
1285 
test_sysctl_set_sysctl2(void)1286 static void test_sysctl_set_sysctl2(void)
1287 {
1288 	sysctl_assert_write("2");
1289 	test_sysctl_sysctl2();
1290 }
1291 
sysctl_simple_child(void * arg)1292 static int sysctl_simple_child(void *arg)
1293 {
1294 	printf("%s sysctl 0\n", memfd_str);
1295 	test_sysctl_set_sysctl0();
1296 
1297 	printf("%s sysctl 1\n", memfd_str);
1298 	test_sysctl_set_sysctl1();
1299 
1300 	printf("%s sysctl 0\n", memfd_str);
1301 	test_sysctl_set_sysctl0();
1302 
1303 	printf("%s sysctl 2\n", memfd_str);
1304 	test_sysctl_set_sysctl2();
1305 
1306 	printf("%s sysctl 1\n", memfd_str);
1307 	test_sysctl_set_sysctl1();
1308 
1309 	printf("%s sysctl 0\n", memfd_str);
1310 	test_sysctl_set_sysctl0();
1311 
1312 	return 0;
1313 }
1314 
1315 /*
1316  * Test sysctl
1317  * A very basic test to make sure the core sysctl semantics work.
1318  */
test_sysctl_simple(void)1319 static void test_sysctl_simple(void)
1320 {
1321 	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1322 
1323 	join_thread(pid);
1324 }
1325 
sysctl_nested(void * arg)1326 static int sysctl_nested(void *arg)
1327 {
1328 	void (*fn)(void) = arg;
1329 
1330 	fn();
1331 	return 0;
1332 }
1333 
sysctl_nested_wait(void * arg)1334 static int sysctl_nested_wait(void *arg)
1335 {
1336 	/* Wait for a SIGCONT. */
1337 	kill(getpid(), SIGSTOP);
1338 	return sysctl_nested(arg);
1339 }
1340 
test_sysctl_sysctl1_failset(void)1341 static void test_sysctl_sysctl1_failset(void)
1342 {
1343 	sysctl_fail_write("0");
1344 	test_sysctl_sysctl1();
1345 }
1346 
test_sysctl_sysctl2_failset(void)1347 static void test_sysctl_sysctl2_failset(void)
1348 {
1349 	sysctl_fail_write("1");
1350 	test_sysctl_sysctl2();
1351 
1352 	sysctl_fail_write("0");
1353 	test_sysctl_sysctl2();
1354 }
1355 
sysctl_nested_child(void * arg)1356 static int sysctl_nested_child(void *arg)
1357 {
1358 	int pid;
1359 
1360 	printf("%s nested sysctl 0\n", memfd_str);
1361 	sysctl_assert_write("0");
1362 	/* A further nested pidns works the same. */
1363 	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1364 	join_thread(pid);
1365 
1366 	printf("%s nested sysctl 1\n", memfd_str);
1367 	sysctl_assert_write("1");
1368 	/* Child inherits our setting. */
1369 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1370 	join_thread(pid);
1371 	/* Child cannot raise the setting. */
1372 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1373 			   test_sysctl_sysctl1_failset);
1374 	join_thread(pid);
1375 	/* Child can lower the setting. */
1376 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1377 			   test_sysctl_set_sysctl2);
1378 	join_thread(pid);
1379 	/* Child lowering the setting has no effect on our setting. */
1380 	test_sysctl_sysctl1();
1381 
1382 	printf("%s nested sysctl 2\n", memfd_str);
1383 	sysctl_assert_write("2");
1384 	/* Child inherits our setting. */
1385 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1386 	join_thread(pid);
1387 	/* Child cannot raise the setting. */
1388 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1389 			   test_sysctl_sysctl2_failset);
1390 	join_thread(pid);
1391 
1392 	/* Verify that the rules are actually inherited after fork. */
1393 	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1394 	sysctl_assert_write("0");
1395 
1396 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1397 			   test_sysctl_sysctl1_failset);
1398 	sysctl_assert_write("1");
1399 	kill(pid, SIGCONT);
1400 	join_thread(pid);
1401 
1402 	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1403 	sysctl_assert_write("0");
1404 
1405 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1406 			   test_sysctl_sysctl2_failset);
1407 	sysctl_assert_write("2");
1408 	kill(pid, SIGCONT);
1409 	join_thread(pid);
1410 
1411 	/*
1412 	 * Verify that the current effective setting is saved on fork, meaning
1413 	 * that the parent lowering the sysctl doesn't affect already-forked
1414 	 * children.
1415 	 */
1416 	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1417 	sysctl_assert_write("2");
1418 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1419 			   test_sysctl_sysctl2);
1420 	sysctl_assert_write("1");
1421 	kill(pid, SIGCONT);
1422 	join_thread(pid);
1423 
1424 	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1425 	sysctl_assert_write("2");
1426 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1427 			   test_sysctl_sysctl2);
1428 	sysctl_assert_write("0");
1429 	kill(pid, SIGCONT);
1430 	join_thread(pid);
1431 
1432 	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1433 	sysctl_assert_write("1");
1434 	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1435 			   test_sysctl_sysctl1);
1436 	sysctl_assert_write("0");
1437 	kill(pid, SIGCONT);
1438 	join_thread(pid);
1439 
1440 	return 0;
1441 }
1442 
1443 /*
1444  * Test sysctl with nested pid namespaces
1445  * Make sure that the sysctl nesting semantics work correctly.
1446  */
test_sysctl_nested(void)1447 static void test_sysctl_nested(void)
1448 {
1449 	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1450 
1451 	join_thread(pid);
1452 }
1453 
1454 /*
1455  * Test sharing via dup()
1456  * Test that seals are shared between dupped FDs and they're all equal.
1457  */
test_share_dup(char * banner,char * b_suffix)1458 static void test_share_dup(char *banner, char *b_suffix)
1459 {
1460 	int fd, fd2;
1461 
1462 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1463 
1464 	fd = mfd_assert_new("kern_memfd_share_dup",
1465 			    mfd_def_size,
1466 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1467 	mfd_assert_has_seals(fd, 0);
1468 
1469 	fd2 = mfd_assert_dup(fd);
1470 	mfd_assert_has_seals(fd2, 0);
1471 
1472 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1473 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1474 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1475 
1476 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1477 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1478 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1479 
1480 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1481 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1482 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1483 
1484 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1485 	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1486 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1487 	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1488 
1489 	close(fd2);
1490 
1491 	mfd_fail_add_seals(fd, F_SEAL_GROW);
1492 	close(fd);
1493 }
1494 
1495 /*
1496  * Test sealing with active mmap()s
1497  * Modifying seals is only allowed if no other mmap() refs exist.
1498  */
test_share_mmap(char * banner,char * b_suffix)1499 static void test_share_mmap(char *banner, char *b_suffix)
1500 {
1501 	int fd;
1502 	void *p;
1503 
1504 	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1505 
1506 	fd = mfd_assert_new("kern_memfd_share_mmap",
1507 			    mfd_def_size,
1508 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1509 	mfd_assert_has_seals(fd, 0);
1510 
1511 	/* shared/writable ref prevents sealing WRITE, but allows others */
1512 	p = mfd_assert_mmap_shared(fd);
1513 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1514 	mfd_assert_has_seals(fd, 0);
1515 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1516 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1517 	munmap(p, mfd_def_size);
1518 
1519 	/* readable ref allows sealing */
1520 	p = mfd_assert_mmap_private(fd);
1521 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1522 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1523 	munmap(p, mfd_def_size);
1524 
1525 	close(fd);
1526 }
1527 
1528 /*
1529  * Test sealing with open(/proc/self/fd/%d)
1530  * Via /proc we can get access to a separate file-context for the same memfd.
1531  * This is *not* like dup(), but like a real separate open(). Make sure the
1532  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1533  */
test_share_open(char * banner,char * b_suffix)1534 static void test_share_open(char *banner, char *b_suffix)
1535 {
1536 	int fd, fd2;
1537 
1538 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1539 
1540 	fd = mfd_assert_new("kern_memfd_share_open",
1541 			    mfd_def_size,
1542 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1543 	mfd_assert_has_seals(fd, 0);
1544 
1545 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1546 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1547 	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1548 	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1549 
1550 	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1551 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1552 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1553 
1554 	close(fd);
1555 	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1556 
1557 	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1558 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1559 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1560 
1561 	close(fd2);
1562 	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1563 
1564 	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1565 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1566 	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1567 
1568 	close(fd2);
1569 	close(fd);
1570 }
1571 
1572 /*
1573  * Test sharing via fork()
1574  * Test whether seal-modifications work as expected with forked children.
1575  */
test_share_fork(char * banner,char * b_suffix)1576 static void test_share_fork(char *banner, char *b_suffix)
1577 {
1578 	int fd;
1579 	pid_t pid;
1580 
1581 	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1582 
1583 	fd = mfd_assert_new("kern_memfd_share_fork",
1584 			    mfd_def_size,
1585 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1586 	mfd_assert_has_seals(fd, 0);
1587 
1588 	pid = spawn_idle_thread(0);
1589 	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1590 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1591 
1592 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1593 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1594 
1595 	join_idle_thread(pid);
1596 
1597 	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1598 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1599 
1600 	close(fd);
1601 }
1602 
pid_ns_supported(void)1603 static bool pid_ns_supported(void)
1604 {
1605 	return access("/proc/self/ns/pid", F_OK) == 0;
1606 }
1607 
main(int argc,char ** argv)1608 int main(int argc, char **argv)
1609 {
1610 	pid_t pid;
1611 
1612 	if (argc == 2) {
1613 		if (!strcmp(argv[1], "hugetlbfs")) {
1614 			unsigned long hpage_size = default_huge_page_size();
1615 
1616 			if (!hpage_size) {
1617 				printf("Unable to determine huge page size\n");
1618 				abort();
1619 			}
1620 
1621 			hugetlbfs_test = 1;
1622 			memfd_str = MEMFD_HUGE_STR;
1623 			mfd_def_size = hpage_size * 2;
1624 		} else {
1625 			printf("Unknown option: %s\n", argv[1]);
1626 			abort();
1627 		}
1628 	}
1629 
1630 	test_create();
1631 	test_basic();
1632 	test_exec_seal();
1633 	test_exec_no_seal();
1634 	test_noexec_seal();
1635 
1636 	test_seal_write();
1637 	test_seal_future_write();
1638 	test_seal_write_map_read_shared();
1639 	test_seal_shrink();
1640 	test_seal_grow();
1641 	test_seal_resize();
1642 
1643 	if (pid_ns_supported()) {
1644 		test_sysctl_simple();
1645 		test_sysctl_nested();
1646 	} else {
1647 		printf("PID namespaces are not supported; skipping sysctl tests\n");
1648 	}
1649 
1650 	test_share_dup("SHARE-DUP", "");
1651 	test_share_mmap("SHARE-MMAP", "");
1652 	test_share_open("SHARE-OPEN", "");
1653 	test_share_fork("SHARE-FORK", "");
1654 
1655 	/* Run test-suite in a multi-threaded environment with a shared
1656 	 * file-table. */
1657 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1658 	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1659 	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1660 	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1661 	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1662 	join_idle_thread(pid);
1663 
1664 	printf("memfd: DONE\n");
1665 
1666 	return 0;
1667 }
1668