18ac5aef8SEnji Cooper // Tests of Linux-specific functionality
28ac5aef8SEnji Cooper #ifdef __linux__
38ac5aef8SEnji Cooper
48ac5aef8SEnji Cooper #include <sys/types.h>
58ac5aef8SEnji Cooper #include <sys/stat.h>
68ac5aef8SEnji Cooper #include <sys/socket.h>
78ac5aef8SEnji Cooper #include <sys/timerfd.h>
88ac5aef8SEnji Cooper #include <sys/signalfd.h>
98ac5aef8SEnji Cooper #include <sys/eventfd.h>
108ac5aef8SEnji Cooper #include <sys/epoll.h>
118ac5aef8SEnji Cooper #include <sys/inotify.h>
128ac5aef8SEnji Cooper #include <sys/fanotify.h>
138ac5aef8SEnji Cooper #include <sys/mman.h>
148ac5aef8SEnji Cooper #include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
158ac5aef8SEnji Cooper #include <linux/aio_abi.h>
168ac5aef8SEnji Cooper #include <linux/filter.h>
178ac5aef8SEnji Cooper #include <linux/seccomp.h>
188ac5aef8SEnji Cooper #include <linux/version.h>
198ac5aef8SEnji Cooper #include <poll.h>
208ac5aef8SEnji Cooper #include <sched.h>
218ac5aef8SEnji Cooper #include <signal.h>
228ac5aef8SEnji Cooper #include <fcntl.h>
238ac5aef8SEnji Cooper #include <unistd.h>
248ac5aef8SEnji Cooper
258ac5aef8SEnji Cooper #include <string>
268ac5aef8SEnji Cooper
278ac5aef8SEnji Cooper #include "capsicum.h"
288ac5aef8SEnji Cooper #include "syscalls.h"
298ac5aef8SEnji Cooper #include "capsicum-test.h"
308ac5aef8SEnji Cooper
TEST(Linux,TimerFD)318ac5aef8SEnji Cooper TEST(Linux, TimerFD) {
328ac5aef8SEnji Cooper int fd = timerfd_create(CLOCK_MONOTONIC, 0);
338ac5aef8SEnji Cooper
348ac5aef8SEnji Cooper cap_rights_t r_ro;
358ac5aef8SEnji Cooper cap_rights_init(&r_ro, CAP_READ);
368ac5aef8SEnji Cooper cap_rights_t r_wo;
378ac5aef8SEnji Cooper cap_rights_init(&r_wo, CAP_WRITE);
388ac5aef8SEnji Cooper cap_rights_t r_rw;
398ac5aef8SEnji Cooper cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
408ac5aef8SEnji Cooper cap_rights_t r_rwpoll;
418ac5aef8SEnji Cooper cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
428ac5aef8SEnji Cooper
438ac5aef8SEnji Cooper int cap_fd_ro = dup(fd);
448ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro);
458ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
468ac5aef8SEnji Cooper int cap_fd_wo = dup(fd);
478ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo);
488ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
498ac5aef8SEnji Cooper int cap_fd_rw = dup(fd);
508ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw);
518ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
528ac5aef8SEnji Cooper int cap_fd_all = dup(fd);
538ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all);
548ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
558ac5aef8SEnji Cooper
568ac5aef8SEnji Cooper struct itimerspec old_ispec;
578ac5aef8SEnji Cooper struct itimerspec ispec;
588ac5aef8SEnji Cooper ispec.it_interval.tv_sec = 0;
598ac5aef8SEnji Cooper ispec.it_interval.tv_nsec = 0;
608ac5aef8SEnji Cooper ispec.it_value.tv_sec = 0;
618ac5aef8SEnji Cooper ispec.it_value.tv_nsec = 100000000; // 100ms
628ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
638ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
648ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
658ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
668ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
678ac5aef8SEnji Cooper
688ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
698ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
708ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
718ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
728ac5aef8SEnji Cooper
738ac5aef8SEnji Cooper // To be able to poll() for the timer pop, still need CAP_EVENT.
748ac5aef8SEnji Cooper struct pollfd poll_fd;
758ac5aef8SEnji Cooper for (int ii = 0; ii < 3; ii++) {
768ac5aef8SEnji Cooper poll_fd.revents = 0;
778ac5aef8SEnji Cooper poll_fd.events = POLLIN;
788ac5aef8SEnji Cooper switch (ii) {
798ac5aef8SEnji Cooper case 0: poll_fd.fd = cap_fd_ro; break;
808ac5aef8SEnji Cooper case 1: poll_fd.fd = cap_fd_wo; break;
818ac5aef8SEnji Cooper case 2: poll_fd.fd = cap_fd_rw; break;
828ac5aef8SEnji Cooper }
838ac5aef8SEnji Cooper // Poll immediately returns with POLLNVAL
848ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
858ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN));
868ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
878ac5aef8SEnji Cooper }
888ac5aef8SEnji Cooper
898ac5aef8SEnji Cooper poll_fd.fd = cap_fd_all;
908ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
918ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN));
928ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
938ac5aef8SEnji Cooper
948ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
958ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_value.tv_sec);
968ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
978ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
988ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
998ac5aef8SEnji Cooper
1008ac5aef8SEnji Cooper close(cap_fd_all);
1018ac5aef8SEnji Cooper close(cap_fd_rw);
1028ac5aef8SEnji Cooper close(cap_fd_wo);
1038ac5aef8SEnji Cooper close(cap_fd_ro);
1048ac5aef8SEnji Cooper close(fd);
1058ac5aef8SEnji Cooper }
1068ac5aef8SEnji Cooper
FORK_TEST(Linux,SignalFDIfSingleThreaded)107*2d936e6cSAlex Richardson FORK_TEST(Linux, SignalFDIfSingleThreaded) {
1088ac5aef8SEnji Cooper if (force_mt) {
109*2d936e6cSAlex Richardson GTEST_SKIP() << "multi-threaded run clashes with signals";
1108ac5aef8SEnji Cooper }
1118ac5aef8SEnji Cooper pid_t me = getpid();
1128ac5aef8SEnji Cooper sigset_t mask;
1138ac5aef8SEnji Cooper sigemptyset(&mask);
1148ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR1);
1158ac5aef8SEnji Cooper
1168ac5aef8SEnji Cooper // Block signals before registering against a new signal FD.
1178ac5aef8SEnji Cooper EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
1188ac5aef8SEnji Cooper int fd = signalfd(-1, &mask, 0);
1198ac5aef8SEnji Cooper EXPECT_OK(fd);
1208ac5aef8SEnji Cooper
1218ac5aef8SEnji Cooper cap_rights_t r_rs;
1228ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1238ac5aef8SEnji Cooper cap_rights_t r_ws;
1248ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1258ac5aef8SEnji Cooper cap_rights_t r_sig;
1268ac5aef8SEnji Cooper cap_rights_init(&r_sig, CAP_FSIGNAL);
1278ac5aef8SEnji Cooper cap_rights_t r_rssig;
1288ac5aef8SEnji Cooper cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
1298ac5aef8SEnji Cooper cap_rights_t r_rssig_poll;
1308ac5aef8SEnji Cooper cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
1318ac5aef8SEnji Cooper
1328ac5aef8SEnji Cooper // Various capability variants.
1338ac5aef8SEnji Cooper int cap_fd_none = dup(fd);
1348ac5aef8SEnji Cooper EXPECT_OK(cap_fd_none);
1358ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
1368ac5aef8SEnji Cooper int cap_fd_read = dup(fd);
1378ac5aef8SEnji Cooper EXPECT_OK(cap_fd_read);
1388ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
1398ac5aef8SEnji Cooper int cap_fd_sig = dup(fd);
1408ac5aef8SEnji Cooper EXPECT_OK(cap_fd_sig);
1418ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
1428ac5aef8SEnji Cooper int cap_fd_sig_read = dup(fd);
1438ac5aef8SEnji Cooper EXPECT_OK(cap_fd_sig_read);
1448ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
1458ac5aef8SEnji Cooper int cap_fd_all = dup(fd);
1468ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all);
1478ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
1488ac5aef8SEnji Cooper
1498ac5aef8SEnji Cooper struct signalfd_siginfo fdsi;
1508ac5aef8SEnji Cooper
1518ac5aef8SEnji Cooper // Need CAP_READ to read the signal information
1528ac5aef8SEnji Cooper kill(me, SIGUSR1);
1538ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
1548ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
1558ac5aef8SEnji Cooper int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
1568ac5aef8SEnji Cooper EXPECT_OK(len);
1578ac5aef8SEnji Cooper EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
1588ac5aef8SEnji Cooper EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
1598ac5aef8SEnji Cooper
1608ac5aef8SEnji Cooper // Need CAP_FSIGNAL to modify the signal mask.
1618ac5aef8SEnji Cooper sigemptyset(&mask);
1628ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR1);
1638ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR2);
1648ac5aef8SEnji Cooper EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
1658ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
1668ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
1678ac5aef8SEnji Cooper EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
1688ac5aef8SEnji Cooper
1698ac5aef8SEnji Cooper // Need CAP_EVENT to get notification of a signal in poll(2).
1708ac5aef8SEnji Cooper kill(me, SIGUSR2);
1718ac5aef8SEnji Cooper
1728ac5aef8SEnji Cooper struct pollfd poll_fd;
1738ac5aef8SEnji Cooper poll_fd.revents = 0;
1748ac5aef8SEnji Cooper poll_fd.events = POLLIN;
1758ac5aef8SEnji Cooper poll_fd.fd = cap_fd_sig_read;
1768ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
1778ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN));
1788ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
1798ac5aef8SEnji Cooper
1808ac5aef8SEnji Cooper poll_fd.fd = cap_fd_all;
1818ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
1828ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN));
1838ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
1848ac5aef8SEnji Cooper }
1858ac5aef8SEnji Cooper
TEST(Linux,EventFD)1868ac5aef8SEnji Cooper TEST(Linux, EventFD) {
1878ac5aef8SEnji Cooper int fd = eventfd(0, 0);
1888ac5aef8SEnji Cooper EXPECT_OK(fd);
1898ac5aef8SEnji Cooper
1908ac5aef8SEnji Cooper cap_rights_t r_rs;
1918ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1928ac5aef8SEnji Cooper cap_rights_t r_ws;
1938ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1948ac5aef8SEnji Cooper cap_rights_t r_rws;
1958ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
1968ac5aef8SEnji Cooper cap_rights_t r_rwspoll;
1978ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
1988ac5aef8SEnji Cooper
1998ac5aef8SEnji Cooper int cap_ro = dup(fd);
2008ac5aef8SEnji Cooper EXPECT_OK(cap_ro);
2018ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
2028ac5aef8SEnji Cooper int cap_wo = dup(fd);
2038ac5aef8SEnji Cooper EXPECT_OK(cap_wo);
2048ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
2058ac5aef8SEnji Cooper int cap_rw = dup(fd);
2068ac5aef8SEnji Cooper EXPECT_OK(cap_rw);
2078ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
2088ac5aef8SEnji Cooper int cap_all = dup(fd);
2098ac5aef8SEnji Cooper EXPECT_OK(cap_all);
2108ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
2118ac5aef8SEnji Cooper
2128ac5aef8SEnji Cooper pid_t child = fork();
2138ac5aef8SEnji Cooper if (child == 0) {
2148ac5aef8SEnji Cooper // Child: write counter to eventfd
2158ac5aef8SEnji Cooper uint64_t u = 42;
2168ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
2178ac5aef8SEnji Cooper EXPECT_OK(write(cap_wo, &u, sizeof(u)));
2188ac5aef8SEnji Cooper exit(HasFailure());
2198ac5aef8SEnji Cooper }
2208ac5aef8SEnji Cooper
2218ac5aef8SEnji Cooper sleep(1); // Allow child to write
2228ac5aef8SEnji Cooper
2238ac5aef8SEnji Cooper struct pollfd poll_fd;
2248ac5aef8SEnji Cooper poll_fd.revents = 0;
2258ac5aef8SEnji Cooper poll_fd.events = POLLIN;
2268ac5aef8SEnji Cooper poll_fd.fd = cap_rw;
2278ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
2288ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN));
2298ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
2308ac5aef8SEnji Cooper
2318ac5aef8SEnji Cooper poll_fd.fd = cap_all;
2328ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400));
2338ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN));
2348ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
2358ac5aef8SEnji Cooper
2368ac5aef8SEnji Cooper uint64_t u;
2378ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
2388ac5aef8SEnji Cooper EXPECT_OK(read(cap_ro, &u, sizeof(u)));
2398ac5aef8SEnji Cooper EXPECT_EQ(42, (int)u);
2408ac5aef8SEnji Cooper
2418ac5aef8SEnji Cooper // Wait for the child.
2428ac5aef8SEnji Cooper int status;
2438ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0));
2448ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
2458ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
2468ac5aef8SEnji Cooper
2478ac5aef8SEnji Cooper close(cap_all);
2488ac5aef8SEnji Cooper close(cap_rw);
2498ac5aef8SEnji Cooper close(cap_wo);
2508ac5aef8SEnji Cooper close(cap_ro);
2518ac5aef8SEnji Cooper close(fd);
2528ac5aef8SEnji Cooper }
2538ac5aef8SEnji Cooper
FORK_TEST(Linux,epoll)2548ac5aef8SEnji Cooper FORK_TEST(Linux, epoll) {
2558ac5aef8SEnji Cooper int sock_fds[2];
2568ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
2578ac5aef8SEnji Cooper // Queue some data.
2588ac5aef8SEnji Cooper char buffer[4] = {1, 2, 3, 4};
2598ac5aef8SEnji Cooper EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
2608ac5aef8SEnji Cooper
2618ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode.
2628ac5aef8SEnji Cooper
2638ac5aef8SEnji Cooper int epoll_fd = epoll_create(1);
2648ac5aef8SEnji Cooper EXPECT_OK(epoll_fd);
2658ac5aef8SEnji Cooper
2668ac5aef8SEnji Cooper cap_rights_t r_rs;
2678ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
2688ac5aef8SEnji Cooper cap_rights_t r_ws;
2698ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
2708ac5aef8SEnji Cooper cap_rights_t r_rws;
2718ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
2728ac5aef8SEnji Cooper cap_rights_t r_rwspoll;
2738ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
2748ac5aef8SEnji Cooper cap_rights_t r_epoll;
2758ac5aef8SEnji Cooper cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
2768ac5aef8SEnji Cooper
2778ac5aef8SEnji Cooper int cap_epoll_wo = dup(epoll_fd);
2788ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_wo);
2798ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
2808ac5aef8SEnji Cooper int cap_epoll_ro = dup(epoll_fd);
2818ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_ro);
2828ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
2838ac5aef8SEnji Cooper int cap_epoll_rw = dup(epoll_fd);
2848ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_rw);
2858ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
2868ac5aef8SEnji Cooper int cap_epoll_poll = dup(epoll_fd);
2878ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_poll);
2888ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
2898ac5aef8SEnji Cooper int cap_epoll_ctl = dup(epoll_fd);
2908ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_ctl);
2918ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
2928ac5aef8SEnji Cooper
2938ac5aef8SEnji Cooper // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
2948ac5aef8SEnji Cooper struct epoll_event eev;
2958ac5aef8SEnji Cooper memset(&eev, 0, sizeof(eev));
2968ac5aef8SEnji Cooper eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
2978ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
2988ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
2998ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
3008ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
3018ac5aef8SEnji Cooper eev.events = EPOLLIN|EPOLLOUT;
3028ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
3038ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
3048ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
3058ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
3068ac5aef8SEnji Cooper
3078ac5aef8SEnji Cooper // Running epoll_pwait(2) requires CAP_EVENT.
3088ac5aef8SEnji Cooper eev.events = 0;
3098ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
3108ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
3118ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
3128ac5aef8SEnji Cooper EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
3138ac5aef8SEnji Cooper EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
3148ac5aef8SEnji Cooper
3158ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
3168ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
3178ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
3188ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
3198ac5aef8SEnji Cooper
3208ac5aef8SEnji Cooper close(cap_epoll_ctl);
3218ac5aef8SEnji Cooper close(cap_epoll_poll);
3228ac5aef8SEnji Cooper close(cap_epoll_rw);
3238ac5aef8SEnji Cooper close(cap_epoll_ro);
3248ac5aef8SEnji Cooper close(cap_epoll_wo);
3258ac5aef8SEnji Cooper close(epoll_fd);
3268ac5aef8SEnji Cooper close(sock_fds[1]);
3278ac5aef8SEnji Cooper close(sock_fds[0]);
3288ac5aef8SEnji Cooper }
3298ac5aef8SEnji Cooper
TEST(Linux,fstatat)3308ac5aef8SEnji Cooper TEST(Linux, fstatat) {
3318ac5aef8SEnji Cooper int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
3328ac5aef8SEnji Cooper EXPECT_OK(fd);
3338ac5aef8SEnji Cooper unsigned char buffer[] = {1, 2, 3, 4};
3348ac5aef8SEnji Cooper EXPECT_OK(write(fd, buffer, sizeof(buffer)));
3358ac5aef8SEnji Cooper cap_rights_t rights;
3368ac5aef8SEnji Cooper int cap_rf = dup(fd);
3378ac5aef8SEnji Cooper EXPECT_OK(cap_rf);
3388ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
3398ac5aef8SEnji Cooper int cap_ro = dup(fd);
3408ac5aef8SEnji Cooper EXPECT_OK(cap_ro);
3418ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
3428ac5aef8SEnji Cooper
3438ac5aef8SEnji Cooper struct stat info;
3448ac5aef8SEnji Cooper EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
3458ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
3468ac5aef8SEnji Cooper EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
3478ac5aef8SEnji Cooper
3488ac5aef8SEnji Cooper close(cap_ro);
3498ac5aef8SEnji Cooper close(cap_rf);
3508ac5aef8SEnji Cooper close(fd);
3518ac5aef8SEnji Cooper
3528ac5aef8SEnji Cooper int dir = open(tmpdir.c_str(), O_RDONLY);
3538ac5aef8SEnji Cooper EXPECT_OK(dir);
3548ac5aef8SEnji Cooper int dir_rf = dup(dir);
3558ac5aef8SEnji Cooper EXPECT_OK(dir_rf);
3568ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
3578ac5aef8SEnji Cooper int dir_ro = dup(fd);
3588ac5aef8SEnji Cooper EXPECT_OK(dir_ro);
3598ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
3608ac5aef8SEnji Cooper
3618ac5aef8SEnji Cooper EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
3628ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
3638ac5aef8SEnji Cooper EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
3648ac5aef8SEnji Cooper
3658ac5aef8SEnji Cooper close(dir_ro);
3668ac5aef8SEnji Cooper close(dir_rf);
3678ac5aef8SEnji Cooper close(dir);
3688ac5aef8SEnji Cooper
3698ac5aef8SEnji Cooper unlink(TmpFile("cap_fstatat"));
3708ac5aef8SEnji Cooper }
3718ac5aef8SEnji Cooper
3728ac5aef8SEnji Cooper // fanotify support may not be available at compile-time
3738ac5aef8SEnji Cooper #ifdef __NR_fanotify_init
TEST(Linux,FanotifyIfRoot)374*2d936e6cSAlex Richardson TEST(Linux, FanotifyIfRoot) {
375*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
3768ac5aef8SEnji Cooper int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
3778ac5aef8SEnji Cooper EXPECT_OK(fa_fd);
3788ac5aef8SEnji Cooper if (fa_fd < 0) return; // May not be enabled
3798ac5aef8SEnji Cooper
3808ac5aef8SEnji Cooper cap_rights_t r_rs;
3818ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
3828ac5aef8SEnji Cooper cap_rights_t r_ws;
3838ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
3848ac5aef8SEnji Cooper cap_rights_t r_rws;
3858ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
3868ac5aef8SEnji Cooper cap_rights_t r_rwspoll;
3878ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
3888ac5aef8SEnji Cooper cap_rights_t r_rwsnotify;
3898ac5aef8SEnji Cooper cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
3908ac5aef8SEnji Cooper cap_rights_t r_rsl;
3918ac5aef8SEnji Cooper cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
3928ac5aef8SEnji Cooper cap_rights_t r_rslstat;
3938ac5aef8SEnji Cooper cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
3948ac5aef8SEnji Cooper cap_rights_t r_rsstat;
3958ac5aef8SEnji Cooper cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
3968ac5aef8SEnji Cooper
3978ac5aef8SEnji Cooper int cap_fd_ro = dup(fa_fd);
3988ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro);
3998ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
4008ac5aef8SEnji Cooper int cap_fd_wo = dup(fa_fd);
4018ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo);
4028ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
4038ac5aef8SEnji Cooper int cap_fd_rw = dup(fa_fd);
4048ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw);
4058ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
4068ac5aef8SEnji Cooper int cap_fd_poll = dup(fa_fd);
4078ac5aef8SEnji Cooper EXPECT_OK(cap_fd_poll);
4088ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
4098ac5aef8SEnji Cooper int cap_fd_not = dup(fa_fd);
4108ac5aef8SEnji Cooper EXPECT_OK(cap_fd_not);
4118ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
4128ac5aef8SEnji Cooper
4138ac5aef8SEnji Cooper int rc = mkdir(TmpFile("cap_notify"), 0755);
4148ac5aef8SEnji Cooper EXPECT_TRUE(rc == 0 || errno == EEXIST);
4158ac5aef8SEnji Cooper int dfd = open(TmpFile("cap_notify"), O_RDONLY);
4168ac5aef8SEnji Cooper EXPECT_OK(dfd);
4178ac5aef8SEnji Cooper int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
4188ac5aef8SEnji Cooper close(fd);
4198ac5aef8SEnji Cooper int cap_dfd = dup(dfd);
4208ac5aef8SEnji Cooper EXPECT_OK(cap_dfd);
4218ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
4228ac5aef8SEnji Cooper EXPECT_OK(cap_dfd);
4238ac5aef8SEnji Cooper int cap_dfd_rs = dup(dfd);
4248ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rs);
4258ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
4268ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rs);
4278ac5aef8SEnji Cooper int cap_dfd_rsstat = dup(dfd);
4288ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsstat);
4298ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
4308ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsstat);
4318ac5aef8SEnji Cooper int cap_dfd_rsl = dup(dfd);
4328ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsl);
4338ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
4348ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsl);
4358ac5aef8SEnji Cooper
4368ac5aef8SEnji Cooper // Need CAP_NOTIFY to change what's monitored.
4378ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
4388ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
4398ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
4408ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
4418ac5aef8SEnji Cooper
4428ac5aef8SEnji Cooper // Need CAP_FSTAT on the thing monitored.
4438ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
4448ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
4458ac5aef8SEnji Cooper
4468ac5aef8SEnji Cooper // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
4478ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
4488ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
4498ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
4508ac5aef8SEnji Cooper
4518ac5aef8SEnji Cooper pid_t child = fork();
4528ac5aef8SEnji Cooper if (child == 0) {
4538ac5aef8SEnji Cooper // Child: Perform activity in the directory under notify.
4548ac5aef8SEnji Cooper sleep(1);
4558ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/temp"));
4568ac5aef8SEnji Cooper int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
4578ac5aef8SEnji Cooper close(fd);
4588ac5aef8SEnji Cooper exit(0);
4598ac5aef8SEnji Cooper }
4608ac5aef8SEnji Cooper
4618ac5aef8SEnji Cooper // Need CAP_EVENT to poll.
4628ac5aef8SEnji Cooper struct pollfd poll_fd;
4638ac5aef8SEnji Cooper poll_fd.revents = 0;
4648ac5aef8SEnji Cooper poll_fd.events = POLLIN;
4658ac5aef8SEnji Cooper poll_fd.fd = cap_fd_rw;
4668ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400));
4678ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN));
4688ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
4698ac5aef8SEnji Cooper
4708ac5aef8SEnji Cooper poll_fd.fd = cap_fd_not;
4718ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400));
4728ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN));
4738ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
4748ac5aef8SEnji Cooper
4758ac5aef8SEnji Cooper poll_fd.fd = cap_fd_poll;
4768ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400));
4778ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN));
4788ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
4798ac5aef8SEnji Cooper
4808ac5aef8SEnji Cooper // Need CAP_READ to read.
4818ac5aef8SEnji Cooper struct fanotify_event_metadata ev;
4828ac5aef8SEnji Cooper memset(&ev, 0, sizeof(ev));
4838ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
4848ac5aef8SEnji Cooper rc = read(fa_fd, &ev, sizeof(ev));
4858ac5aef8SEnji Cooper EXPECT_OK(rc);
4868ac5aef8SEnji Cooper EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
4878ac5aef8SEnji Cooper EXPECT_EQ(child, ev.pid);
4888ac5aef8SEnji Cooper EXPECT_NE(0, ev.fd);
4898ac5aef8SEnji Cooper
4908ac5aef8SEnji Cooper // TODO(drysdale): reinstate if/when capsicum-linux propagates rights
4918ac5aef8SEnji Cooper // to fanotify-generated FDs.
4928ac5aef8SEnji Cooper #ifdef OMIT
4938ac5aef8SEnji Cooper // fanotify(7) gives us a FD for the changed file. This should
4948ac5aef8SEnji Cooper // only have rights that are a subset of those for the original
4958ac5aef8SEnji Cooper // monitored directory file descriptor.
4968ac5aef8SEnji Cooper cap_rights_t rights;
4978ac5aef8SEnji Cooper CAP_SET_ALL(&rights);
4988ac5aef8SEnji Cooper EXPECT_OK(cap_rights_get(ev.fd, &rights));
4998ac5aef8SEnji Cooper EXPECT_RIGHTS_IN(&rights, &r_rslstat);
5008ac5aef8SEnji Cooper #endif
5018ac5aef8SEnji Cooper
5028ac5aef8SEnji Cooper // Wait for the child.
5038ac5aef8SEnji Cooper int status;
5048ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0));
5058ac5aef8SEnji Cooper rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
5068ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
5078ac5aef8SEnji Cooper
5088ac5aef8SEnji Cooper close(cap_dfd_rsstat);
5098ac5aef8SEnji Cooper close(cap_dfd_rsl);
5108ac5aef8SEnji Cooper close(cap_dfd_rs);
5118ac5aef8SEnji Cooper close(cap_dfd);
5128ac5aef8SEnji Cooper close(dfd);
5138ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/file"));
5148ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/temp"));
5158ac5aef8SEnji Cooper rmdir(TmpFile("cap_notify"));
5168ac5aef8SEnji Cooper close(cap_fd_not);
5178ac5aef8SEnji Cooper close(cap_fd_poll);
5188ac5aef8SEnji Cooper close(cap_fd_rw);
5198ac5aef8SEnji Cooper close(cap_fd_wo);
5208ac5aef8SEnji Cooper close(cap_fd_ro);
5218ac5aef8SEnji Cooper close(fa_fd);
5228ac5aef8SEnji Cooper }
5238ac5aef8SEnji Cooper #endif
5248ac5aef8SEnji Cooper
TEST(Linux,inotify)5258ac5aef8SEnji Cooper TEST(Linux, inotify) {
5268ac5aef8SEnji Cooper int i_fd = inotify_init();
5278ac5aef8SEnji Cooper EXPECT_OK(i_fd);
5288ac5aef8SEnji Cooper
5298ac5aef8SEnji Cooper cap_rights_t r_rs;
5308ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
5318ac5aef8SEnji Cooper cap_rights_t r_ws;
5328ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
5338ac5aef8SEnji Cooper cap_rights_t r_rws;
5348ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
5358ac5aef8SEnji Cooper cap_rights_t r_rwsnotify;
5368ac5aef8SEnji Cooper cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
5378ac5aef8SEnji Cooper
5388ac5aef8SEnji Cooper int cap_fd_ro = dup(i_fd);
5398ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro);
5408ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
5418ac5aef8SEnji Cooper int cap_fd_wo = dup(i_fd);
5428ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo);
5438ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
5448ac5aef8SEnji Cooper int cap_fd_rw = dup(i_fd);
5458ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw);
5468ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
5478ac5aef8SEnji Cooper int cap_fd_all = dup(i_fd);
5488ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all);
5498ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
5508ac5aef8SEnji Cooper
5518ac5aef8SEnji Cooper int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
5528ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
5538ac5aef8SEnji Cooper int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
5548ac5aef8SEnji Cooper EXPECT_OK(wd);
5558ac5aef8SEnji Cooper
5568ac5aef8SEnji Cooper unsigned char buffer[] = {1, 2, 3, 4};
5578ac5aef8SEnji Cooper EXPECT_OK(write(fd, buffer, sizeof(buffer)));
5588ac5aef8SEnji Cooper
5598ac5aef8SEnji Cooper struct inotify_event iev;
5608ac5aef8SEnji Cooper memset(&iev, 0, sizeof(iev));
5618ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
5628ac5aef8SEnji Cooper int rc = read(cap_fd_ro, &iev, sizeof(iev));
5638ac5aef8SEnji Cooper EXPECT_OK(rc);
5648ac5aef8SEnji Cooper EXPECT_EQ((int)sizeof(iev), rc);
5658ac5aef8SEnji Cooper EXPECT_EQ(wd, iev.wd);
5668ac5aef8SEnji Cooper
5678ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
5688ac5aef8SEnji Cooper EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
5698ac5aef8SEnji Cooper
5708ac5aef8SEnji Cooper close(fd);
5718ac5aef8SEnji Cooper close(cap_fd_all);
5728ac5aef8SEnji Cooper close(cap_fd_rw);
5738ac5aef8SEnji Cooper close(cap_fd_wo);
5748ac5aef8SEnji Cooper close(cap_fd_ro);
5758ac5aef8SEnji Cooper close(i_fd);
5768ac5aef8SEnji Cooper unlink(TmpFile("cap_inotify"));
5778ac5aef8SEnji Cooper }
5788ac5aef8SEnji Cooper
TEST(Linux,ArchChangeIfAvailable)579*2d936e6cSAlex Richardson TEST(Linux, ArchChangeIfAvailable) {
5808ac5aef8SEnji Cooper const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
5818ac5aef8SEnji Cooper const char* progs[] = {NULL, NULL, NULL};
5828ac5aef8SEnji Cooper char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
5838ac5aef8SEnji Cooper char* null_envp[] = {NULL};
5848ac5aef8SEnji Cooper int fds[3];
5858ac5aef8SEnji Cooper int count = 0;
5868ac5aef8SEnji Cooper
5878ac5aef8SEnji Cooper for (int ii = 0; ii < 3; ii++) {
5888ac5aef8SEnji Cooper fds[count] = open(prog_candidates[ii], O_RDONLY);
5898ac5aef8SEnji Cooper if (fds[count] >= 0) {
5908ac5aef8SEnji Cooper progs[count] = prog_candidates[ii];
5918ac5aef8SEnji Cooper count++;
5928ac5aef8SEnji Cooper }
5938ac5aef8SEnji Cooper }
5948ac5aef8SEnji Cooper if (count == 0) {
595*2d936e6cSAlex Richardson GTEST_SKIP() << "no different-architecture programs available";
5968ac5aef8SEnji Cooper }
5978ac5aef8SEnji Cooper
5988ac5aef8SEnji Cooper for (int ii = 0; ii < count; ii++) {
5998ac5aef8SEnji Cooper // Fork-and-exec a binary of this architecture.
6008ac5aef8SEnji Cooper pid_t child = fork();
6018ac5aef8SEnji Cooper if (child == 0) {
6028ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode
6038ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
6048ac5aef8SEnji Cooper getpid_(), progs[ii], argv_pass[1]);
6058ac5aef8SEnji Cooper argv_pass[0] = (char *)progs[ii];
6068ac5aef8SEnji Cooper int rc = fexecve_(fds[ii], argv_pass, null_envp);
6078ac5aef8SEnji Cooper fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
6088ac5aef8SEnji Cooper exit(99); // Should not reach here.
6098ac5aef8SEnji Cooper }
6108ac5aef8SEnji Cooper int status;
6118ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0));
6128ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
6138ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
6148ac5aef8SEnji Cooper close(fds[ii]);
6158ac5aef8SEnji Cooper }
6168ac5aef8SEnji Cooper }
6178ac5aef8SEnji Cooper
FORK_TEST(Linux,NamespaceIfRoot)618*2d936e6cSAlex Richardson FORK_TEST(Linux, NamespaceIfRoot) {
619*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
6208ac5aef8SEnji Cooper pid_t me = getpid_();
6218ac5aef8SEnji Cooper
6228ac5aef8SEnji Cooper // Create a new UTS namespace.
6238ac5aef8SEnji Cooper EXPECT_OK(unshare(CLONE_NEWUTS));
6248ac5aef8SEnji Cooper // Open an FD to its symlink.
6258ac5aef8SEnji Cooper char buffer[256];
6268ac5aef8SEnji Cooper sprintf(buffer, "/proc/%d/ns/uts", me);
6278ac5aef8SEnji Cooper int ns_fd = open(buffer, O_RDONLY);
6288ac5aef8SEnji Cooper
6298ac5aef8SEnji Cooper cap_rights_t r_rwlstat;
6308ac5aef8SEnji Cooper cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
6318ac5aef8SEnji Cooper cap_rights_t r_rwlstatns;
6328ac5aef8SEnji Cooper cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
6338ac5aef8SEnji Cooper
6348ac5aef8SEnji Cooper int cap_fd = dup(ns_fd);
6358ac5aef8SEnji Cooper EXPECT_OK(cap_fd);
6368ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
6378ac5aef8SEnji Cooper int cap_fd_setns = dup(ns_fd);
6388ac5aef8SEnji Cooper EXPECT_OK(cap_fd_setns);
6398ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
6408ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
6418ac5aef8SEnji Cooper EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
6428ac5aef8SEnji Cooper
6438ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode.
6448ac5aef8SEnji Cooper
6458ac5aef8SEnji Cooper // No setns(2) but unshare(2) is allowed.
6468ac5aef8SEnji Cooper EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
6478ac5aef8SEnji Cooper EXPECT_OK(unshare(CLONE_NEWUTS));
6488ac5aef8SEnji Cooper }
6498ac5aef8SEnji Cooper
SendFD(int fd,int over)6508ac5aef8SEnji Cooper static void SendFD(int fd, int over) {
6518ac5aef8SEnji Cooper struct msghdr mh;
6528ac5aef8SEnji Cooper mh.msg_name = NULL; // No address needed
6538ac5aef8SEnji Cooper mh.msg_namelen = 0;
6548ac5aef8SEnji Cooper char buffer1[1024];
6558ac5aef8SEnji Cooper struct iovec iov[1];
6568ac5aef8SEnji Cooper iov[0].iov_base = buffer1;
6578ac5aef8SEnji Cooper iov[0].iov_len = sizeof(buffer1);
6588ac5aef8SEnji Cooper mh.msg_iov = iov;
6598ac5aef8SEnji Cooper mh.msg_iovlen = 1;
6608ac5aef8SEnji Cooper char buffer2[1024];
6618ac5aef8SEnji Cooper mh.msg_control = buffer2;
6628ac5aef8SEnji Cooper mh.msg_controllen = CMSG_LEN(sizeof(int));
6638ac5aef8SEnji Cooper struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
6648ac5aef8SEnji Cooper cmptr->cmsg_level = SOL_SOCKET;
6658ac5aef8SEnji Cooper cmptr->cmsg_type = SCM_RIGHTS;
6668ac5aef8SEnji Cooper cmptr->cmsg_len = CMSG_LEN(sizeof(int));
6678ac5aef8SEnji Cooper *(int *)CMSG_DATA(cmptr) = fd;
6688ac5aef8SEnji Cooper buffer1[0] = 0;
6698ac5aef8SEnji Cooper iov[0].iov_len = 1;
6708ac5aef8SEnji Cooper int rc = sendmsg(over, &mh, 0);
6718ac5aef8SEnji Cooper EXPECT_OK(rc);
6728ac5aef8SEnji Cooper }
6738ac5aef8SEnji Cooper
ReceiveFD(int over)6748ac5aef8SEnji Cooper static int ReceiveFD(int over) {
6758ac5aef8SEnji Cooper struct msghdr mh;
6768ac5aef8SEnji Cooper mh.msg_name = NULL; // No address needed
6778ac5aef8SEnji Cooper mh.msg_namelen = 0;
6788ac5aef8SEnji Cooper char buffer1[1024];
6798ac5aef8SEnji Cooper struct iovec iov[1];
6808ac5aef8SEnji Cooper iov[0].iov_base = buffer1;
6818ac5aef8SEnji Cooper iov[0].iov_len = sizeof(buffer1);
6828ac5aef8SEnji Cooper mh.msg_iov = iov;
6838ac5aef8SEnji Cooper mh.msg_iovlen = 1;
6848ac5aef8SEnji Cooper char buffer2[1024];
6858ac5aef8SEnji Cooper mh.msg_control = buffer2;
6868ac5aef8SEnji Cooper mh.msg_controllen = sizeof(buffer2);
6878ac5aef8SEnji Cooper int rc = recvmsg(over, &mh, 0);
6888ac5aef8SEnji Cooper EXPECT_OK(rc);
6898ac5aef8SEnji Cooper EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
6908ac5aef8SEnji Cooper struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
6918ac5aef8SEnji Cooper int fd = *(int*)CMSG_DATA(cmptr);
6928ac5aef8SEnji Cooper EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
6938ac5aef8SEnji Cooper cmptr = CMSG_NXTHDR(&mh, cmptr);
6948ac5aef8SEnji Cooper EXPECT_TRUE(cmptr == NULL);
6958ac5aef8SEnji Cooper return fd;
6968ac5aef8SEnji Cooper }
6978ac5aef8SEnji Cooper
6988ac5aef8SEnji Cooper static int shared_pd = -1;
6998ac5aef8SEnji Cooper static int shared_sock_fds[2];
7008ac5aef8SEnji Cooper
ChildFunc(void * arg)7018ac5aef8SEnji Cooper static int ChildFunc(void *arg) {
7028ac5aef8SEnji Cooper // This function is running in a new PID namespace, and so is pid 1.
7038ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
7048ac5aef8SEnji Cooper EXPECT_EQ(1, getpid_());
7058ac5aef8SEnji Cooper EXPECT_EQ(0, getppid());
7068ac5aef8SEnji Cooper
7078ac5aef8SEnji Cooper // The shared process descriptor is outside our namespace, so we cannot
7088ac5aef8SEnji Cooper // get its pid.
7098ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd);
7108ac5aef8SEnji Cooper pid_t shared_child = -1;
7118ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(shared_pd, &shared_child));
7128ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child);
7138ac5aef8SEnji Cooper EXPECT_EQ(0, shared_child);
7148ac5aef8SEnji Cooper
7158ac5aef8SEnji Cooper // But we can pdkill() it even so.
7168ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd);
7178ac5aef8SEnji Cooper EXPECT_OK(pdkill(shared_pd, SIGINT));
7188ac5aef8SEnji Cooper
7198ac5aef8SEnji Cooper int pd;
7208ac5aef8SEnji Cooper pid_t child = pdfork(&pd, 0);
7218ac5aef8SEnji Cooper EXPECT_OK(child);
7228ac5aef8SEnji Cooper if (child == 0) {
7238ac5aef8SEnji Cooper // Child: expect pid 2.
7248ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
7258ac5aef8SEnji Cooper EXPECT_EQ(2, getpid_());
7268ac5aef8SEnji Cooper EXPECT_EQ(1, getppid());
7278ac5aef8SEnji Cooper while (true) {
7288ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n");
7298ac5aef8SEnji Cooper sleep(1);
7308ac5aef8SEnji Cooper }
7318ac5aef8SEnji Cooper exit(0);
7328ac5aef8SEnji Cooper }
7338ac5aef8SEnji Cooper EXPECT_EQ(2, child);
7348ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child);
7358ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
7368ac5aef8SEnji Cooper pd, child, ProcessState(child));
7378ac5aef8SEnji Cooper
7388ac5aef8SEnji Cooper pid_t pid;
7398ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(pd, &pid));
7408ac5aef8SEnji Cooper EXPECT_EQ(child, pid);
7418ac5aef8SEnji Cooper
7428ac5aef8SEnji Cooper sleep(2);
7438ac5aef8SEnji Cooper
7448ac5aef8SEnji Cooper // Send the process descriptor over UNIX domain socket back to parent.
7458ac5aef8SEnji Cooper SendFD(pd, shared_sock_fds[1]);
7468ac5aef8SEnji Cooper
7478ac5aef8SEnji Cooper // Wait for death of (grand)child, killed by our parent.
7488ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child);
7498ac5aef8SEnji Cooper int status;
7508ac5aef8SEnji Cooper EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
7518ac5aef8SEnji Cooper
7528ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: return 0\n");
7538ac5aef8SEnji Cooper return 0;
7548ac5aef8SEnji Cooper }
7558ac5aef8SEnji Cooper
7568ac5aef8SEnji Cooper #define STACK_SIZE (1024 * 1024)
7578ac5aef8SEnji Cooper static char child_stack[STACK_SIZE];
7588ac5aef8SEnji Cooper
759*2d936e6cSAlex Richardson // TODO(drysdale): fork into a user namespace first so GTEST_SKIP_IF_NOT_ROOT can be removed.
TEST(Linux,PidNamespacePdForkIfRoot)760*2d936e6cSAlex Richardson TEST(Linux, PidNamespacePdForkIfRoot) {
761*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
7628ac5aef8SEnji Cooper // Pass process descriptors in both directions across a PID namespace boundary.
7638ac5aef8SEnji Cooper // pdfork() off a child before we start, holding its process descriptor in a global
7648ac5aef8SEnji Cooper // variable that's accessible to children.
7658ac5aef8SEnji Cooper pid_t firstborn = pdfork(&shared_pd, 0);
7668ac5aef8SEnji Cooper EXPECT_OK(firstborn);
7678ac5aef8SEnji Cooper if (firstborn == 0) {
7688ac5aef8SEnji Cooper while (true) {
7698ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n");
7708ac5aef8SEnji Cooper sleep(1);
7718ac5aef8SEnji Cooper }
7728ac5aef8SEnji Cooper exit(0);
7738ac5aef8SEnji Cooper }
7748ac5aef8SEnji Cooper EXPECT_PID_ALIVE(firstborn);
7758ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
7768ac5aef8SEnji Cooper shared_pd, firstborn, ProcessState(firstborn));
7778ac5aef8SEnji Cooper sleep(2);
7788ac5aef8SEnji Cooper
7798ac5aef8SEnji Cooper // Prepare sockets to communicate with child process.
7808ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
7818ac5aef8SEnji Cooper
7828ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace.
7838ac5aef8SEnji Cooper pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
7848ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
7858ac5aef8SEnji Cooper EXPECT_OK(child);
7868ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child);
7878ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
7888ac5aef8SEnji Cooper
7898ac5aef8SEnji Cooper // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd.
7908ac5aef8SEnji Cooper sleep(1);
7918ac5aef8SEnji Cooper EXPECT_PID_DEAD(firstborn);
7928ac5aef8SEnji Cooper
7938ac5aef8SEnji Cooper // But we can still retrieve firstborn's PID, as it's not been reaped yet.
7948ac5aef8SEnji Cooper pid_t child0;
7958ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(shared_pd, &child0));
7968ac5aef8SEnji Cooper EXPECT_EQ(firstborn, child0);
7978ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
7988ac5aef8SEnji Cooper shared_pd, child0, ProcessState(child0));
7998ac5aef8SEnji Cooper
8008ac5aef8SEnji Cooper // Now reap it.
8018ac5aef8SEnji Cooper int status;
8028ac5aef8SEnji Cooper EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
8038ac5aef8SEnji Cooper
8048ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer.
8058ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
8068ac5aef8SEnji Cooper
8078ac5aef8SEnji Cooper // Our notion of the pid associated with the grandchild is in the main PID namespace.
8088ac5aef8SEnji Cooper pid_t grandchild;
8098ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
8108ac5aef8SEnji Cooper EXPECT_NE(2, grandchild);
8118ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
8128ac5aef8SEnji Cooper grandchild_pd, grandchild, ProcessState(grandchild));
8138ac5aef8SEnji Cooper EXPECT_PID_ALIVE(grandchild);
8148ac5aef8SEnji Cooper
8158ac5aef8SEnji Cooper // Kill the grandchild via the process descriptor.
8168ac5aef8SEnji Cooper EXPECT_OK(pdkill(grandchild_pd, SIGINT));
8178ac5aef8SEnji Cooper usleep(10000);
8188ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
8198ac5aef8SEnji Cooper grandchild_pd, grandchild, ProcessState(grandchild));
8208ac5aef8SEnji Cooper EXPECT_PID_DEAD(grandchild);
8218ac5aef8SEnji Cooper
8228ac5aef8SEnji Cooper sleep(2);
8238ac5aef8SEnji Cooper
8248ac5aef8SEnji Cooper // Wait for the child.
8258ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
8268ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
8278ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
8288ac5aef8SEnji Cooper
8298ac5aef8SEnji Cooper close(shared_sock_fds[0]);
8308ac5aef8SEnji Cooper close(shared_sock_fds[1]);
8318ac5aef8SEnji Cooper close(shared_pd);
8328ac5aef8SEnji Cooper close(grandchild_pd);
8338ac5aef8SEnji Cooper }
8348ac5aef8SEnji Cooper
NSInit(void * data)8358ac5aef8SEnji Cooper int NSInit(void *data) {
8368ac5aef8SEnji Cooper // This function is running in a new PID namespace, and so is pid 1.
8378ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
8388ac5aef8SEnji Cooper EXPECT_EQ(1, getpid_());
8398ac5aef8SEnji Cooper EXPECT_EQ(0, getppid());
8408ac5aef8SEnji Cooper
8418ac5aef8SEnji Cooper int pd;
8428ac5aef8SEnji Cooper pid_t child = pdfork(&pd, 0);
8438ac5aef8SEnji Cooper EXPECT_OK(child);
8448ac5aef8SEnji Cooper if (child == 0) {
8458ac5aef8SEnji Cooper // Child: loop forever until terminated.
8468ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
8478ac5aef8SEnji Cooper while (true) {
8488ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n");
8498ac5aef8SEnji Cooper usleep(100000);
8508ac5aef8SEnji Cooper }
8518ac5aef8SEnji Cooper exit(0);
8528ac5aef8SEnji Cooper }
8538ac5aef8SEnji Cooper EXPECT_EQ(2, child);
8548ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child);
8558ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
8568ac5aef8SEnji Cooper pd, child, ProcessState(child));
8578ac5aef8SEnji Cooper sleep(1);
8588ac5aef8SEnji Cooper
8598ac5aef8SEnji Cooper // Send the process descriptor over UNIX domain socket back to parent.
8608ac5aef8SEnji Cooper SendFD(pd, shared_sock_fds[1]);
8618ac5aef8SEnji Cooper close(pd);
8628ac5aef8SEnji Cooper
8638ac5aef8SEnji Cooper // Wait for a byte back in the other direction.
8648ac5aef8SEnji Cooper int value;
8658ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: block waiting for value\n");
8668ac5aef8SEnji Cooper read(shared_sock_fds[1], &value, sizeof(value));
8678ac5aef8SEnji Cooper
8688ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: return 0\n");
8698ac5aef8SEnji Cooper return 0;
8708ac5aef8SEnji Cooper }
8718ac5aef8SEnji Cooper
TEST(Linux,DeadNSInitIfRoot)872*2d936e6cSAlex Richardson TEST(Linux, DeadNSInitIfRoot) {
873*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
8748ac5aef8SEnji Cooper
8758ac5aef8SEnji Cooper // Prepare sockets to communicate with child process.
8768ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
8778ac5aef8SEnji Cooper
8788ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace.
8798ac5aef8SEnji Cooper pid_t child = clone(NSInit, child_stack + STACK_SIZE,
8808ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
8818ac5aef8SEnji Cooper usleep(10000);
8828ac5aef8SEnji Cooper EXPECT_OK(child);
8838ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child);
8848ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
8858ac5aef8SEnji Cooper
8868ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer.
8878ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
8888ac5aef8SEnji Cooper pid_t grandchild;
8898ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
8908ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
8918ac5aef8SEnji Cooper
8928ac5aef8SEnji Cooper // Send an int to the child to trigger its termination. Grandchild should also
8938ac5aef8SEnji Cooper // go, as its init process is gone.
8948ac5aef8SEnji Cooper int zero = 0;
8958ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
8968ac5aef8SEnji Cooper write(shared_sock_fds[0], &zero, sizeof(zero));
8978ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(child);
8988ac5aef8SEnji Cooper EXPECT_PID_GONE(grandchild);
8998ac5aef8SEnji Cooper
9008ac5aef8SEnji Cooper // Wait for the child.
9018ac5aef8SEnji Cooper int status;
9028ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
9038ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
9048ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
9058ac5aef8SEnji Cooper EXPECT_PID_GONE(child);
9068ac5aef8SEnji Cooper
9078ac5aef8SEnji Cooper close(shared_sock_fds[0]);
9088ac5aef8SEnji Cooper close(shared_sock_fds[1]);
9098ac5aef8SEnji Cooper close(grandchild_pd);
9108ac5aef8SEnji Cooper
9118ac5aef8SEnji Cooper if (verbose) {
9128ac5aef8SEnji Cooper fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
9138ac5aef8SEnji Cooper fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
9148ac5aef8SEnji Cooper }
9158ac5aef8SEnji Cooper }
9168ac5aef8SEnji Cooper
TEST(Linux,DeadNSInit2IfRoot)917*2d936e6cSAlex Richardson TEST(Linux, DeadNSInit2IfRoot) {
918*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
9198ac5aef8SEnji Cooper
9208ac5aef8SEnji Cooper // Prepare sockets to communicate with child process.
9218ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
9228ac5aef8SEnji Cooper
9238ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace.
9248ac5aef8SEnji Cooper pid_t child = clone(NSInit, child_stack + STACK_SIZE,
9258ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
9268ac5aef8SEnji Cooper usleep(10000);
9278ac5aef8SEnji Cooper EXPECT_OK(child);
9288ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child);
9298ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
9308ac5aef8SEnji Cooper
9318ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer.
9328ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
9338ac5aef8SEnji Cooper pid_t grandchild;
9348ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
9358ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
9368ac5aef8SEnji Cooper
9378ac5aef8SEnji Cooper // Kill the grandchild
9388ac5aef8SEnji Cooper EXPECT_OK(pdkill(grandchild_pd, SIGINT));
9398ac5aef8SEnji Cooper usleep(10000);
9408ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(grandchild);
9418ac5aef8SEnji Cooper // Close the process descriptor, so there are now no procdesc references to grandchild.
9428ac5aef8SEnji Cooper close(grandchild_pd);
9438ac5aef8SEnji Cooper
9448ac5aef8SEnji Cooper // Send an int to the child to trigger its termination. Grandchild should also
9458ac5aef8SEnji Cooper // go, as its init process is gone.
9468ac5aef8SEnji Cooper int zero = 0;
9478ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
9488ac5aef8SEnji Cooper write(shared_sock_fds[0], &zero, sizeof(zero));
9498ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(child);
9508ac5aef8SEnji Cooper EXPECT_PID_GONE(grandchild);
9518ac5aef8SEnji Cooper
9528ac5aef8SEnji Cooper // Wait for the child.
9538ac5aef8SEnji Cooper int status;
9548ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
9558ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
9568ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
9578ac5aef8SEnji Cooper
9588ac5aef8SEnji Cooper close(shared_sock_fds[0]);
9598ac5aef8SEnji Cooper close(shared_sock_fds[1]);
9608ac5aef8SEnji Cooper
9618ac5aef8SEnji Cooper if (verbose) {
9628ac5aef8SEnji Cooper fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
9638ac5aef8SEnji Cooper fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
9648ac5aef8SEnji Cooper }
9658ac5aef8SEnji Cooper }
9668ac5aef8SEnji Cooper
9678ac5aef8SEnji Cooper #ifdef __x86_64__
FORK_TEST(Linux,CheckHighWord)9688ac5aef8SEnji Cooper FORK_TEST(Linux, CheckHighWord) {
9698ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode.
9708ac5aef8SEnji Cooper
9718ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
9728ac5aef8SEnji Cooper EXPECT_OK(rc);
9738ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1
9748ac5aef8SEnji Cooper
9758ac5aef8SEnji Cooper // Set some of the high 32-bits of argument zero.
9768ac5aef8SEnji Cooper uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
9778ac5aef8SEnji Cooper EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
9788ac5aef8SEnji Cooper }
9798ac5aef8SEnji Cooper #endif
9808ac5aef8SEnji Cooper
FORK_TEST(Linux,PrctlOpenatBeneath)9818ac5aef8SEnji Cooper FORK_TEST(Linux, PrctlOpenatBeneath) {
9828ac5aef8SEnji Cooper // Set no_new_privs = 1
9838ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
9848ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
9858ac5aef8SEnji Cooper EXPECT_OK(rc);
9868ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1
9878ac5aef8SEnji Cooper
9888ac5aef8SEnji Cooper // Set openat-beneath mode
9898ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
9908ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
9918ac5aef8SEnji Cooper EXPECT_OK(rc);
9928ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1
9938ac5aef8SEnji Cooper
9948ac5aef8SEnji Cooper // Clear openat-beneath mode
9958ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
9968ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
9978ac5aef8SEnji Cooper EXPECT_OK(rc);
9988ac5aef8SEnji Cooper EXPECT_EQ(0, rc); // openat_beneath = 0
9998ac5aef8SEnji Cooper
10008ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode
10018ac5aef8SEnji Cooper
10028ac5aef8SEnji Cooper // Expect to be in openat_beneath mode
10038ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
10048ac5aef8SEnji Cooper EXPECT_OK(rc);
10058ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1
10068ac5aef8SEnji Cooper
10078ac5aef8SEnji Cooper // Expect this to be immutable.
10088ac5aef8SEnji Cooper EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
10098ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
10108ac5aef8SEnji Cooper EXPECT_OK(rc);
10118ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1
10128ac5aef8SEnji Cooper
10138ac5aef8SEnji Cooper }
10148ac5aef8SEnji Cooper
FORK_TEST(Linux,NoNewPrivs)10158ac5aef8SEnji Cooper FORK_TEST(Linux, NoNewPrivs) {
10168ac5aef8SEnji Cooper if (getuid() == 0) {
10178ac5aef8SEnji Cooper // If root, drop CAP_SYS_ADMIN POSIX.1e capability.
10188ac5aef8SEnji Cooper struct __user_cap_header_struct hdr;
10198ac5aef8SEnji Cooper hdr.version = _LINUX_CAPABILITY_VERSION_3;
10208ac5aef8SEnji Cooper hdr.pid = getpid_();
10218ac5aef8SEnji Cooper struct __user_cap_data_struct data[3];
10228ac5aef8SEnji Cooper EXPECT_OK(capget(&hdr, &data[0]));
10238ac5aef8SEnji Cooper data[0].effective &= ~(1 << CAP_SYS_ADMIN);
10248ac5aef8SEnji Cooper data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
10258ac5aef8SEnji Cooper data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
10268ac5aef8SEnji Cooper EXPECT_OK(capset(&hdr, &data[0]));
10278ac5aef8SEnji Cooper }
10288ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
10298ac5aef8SEnji Cooper EXPECT_OK(rc);
10308ac5aef8SEnji Cooper EXPECT_EQ(0, rc); // no_new_privs == 0
10318ac5aef8SEnji Cooper
10328ac5aef8SEnji Cooper // Can't enter seccomp-bpf mode with no_new_privs == 0
10338ac5aef8SEnji Cooper struct sock_filter filter[] = {
10348ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
10358ac5aef8SEnji Cooper };
10368ac5aef8SEnji Cooper struct sock_fprog bpf;
10378ac5aef8SEnji Cooper bpf.len = (sizeof(filter) / sizeof(filter[0]));
10388ac5aef8SEnji Cooper bpf.filter = filter;
10398ac5aef8SEnji Cooper rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
10408ac5aef8SEnji Cooper EXPECT_EQ(-1, rc);
10418ac5aef8SEnji Cooper EXPECT_EQ(EACCES, errno);
10428ac5aef8SEnji Cooper
10438ac5aef8SEnji Cooper // Set no_new_privs = 1
10448ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
10458ac5aef8SEnji Cooper rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
10468ac5aef8SEnji Cooper EXPECT_OK(rc);
10478ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1
10488ac5aef8SEnji Cooper
10498ac5aef8SEnji Cooper // Can now turn on seccomp mode
10508ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
10518ac5aef8SEnji Cooper }
10528ac5aef8SEnji Cooper
10538ac5aef8SEnji Cooper /* Macros for BPF generation */
10548ac5aef8SEnji Cooper #define BPF_RETURN_ERRNO(err) \
10558ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
10568ac5aef8SEnji Cooper #define BPF_KILL_PROCESS \
10578ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
10588ac5aef8SEnji Cooper #define BPF_ALLOW \
10598ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
10608ac5aef8SEnji Cooper #define EXAMINE_SYSCALL \
10618ac5aef8SEnji Cooper BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
10628ac5aef8SEnji Cooper #define ALLOW_SYSCALL(name) \
10638ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
10648ac5aef8SEnji Cooper BPF_ALLOW
10658ac5aef8SEnji Cooper #define KILL_SYSCALL(name) \
10668ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
10678ac5aef8SEnji Cooper BPF_KILL_PROCESS
10688ac5aef8SEnji Cooper #define FAIL_SYSCALL(name, err) \
10698ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
10708ac5aef8SEnji Cooper BPF_RETURN_ERRNO(err)
10718ac5aef8SEnji Cooper
TEST(Linux,CapModeWithBPF)10728ac5aef8SEnji Cooper TEST(Linux, CapModeWithBPF) {
10738ac5aef8SEnji Cooper pid_t child = fork();
10748ac5aef8SEnji Cooper EXPECT_OK(child);
10758ac5aef8SEnji Cooper if (child == 0) {
10768ac5aef8SEnji Cooper int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
10778ac5aef8SEnji Cooper cap_rights_t rights;
10788ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
10798ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd, &rights));
10808ac5aef8SEnji Cooper
10818ac5aef8SEnji Cooper struct sock_filter filter[] = { EXAMINE_SYSCALL,
10828ac5aef8SEnji Cooper FAIL_SYSCALL(fchmod, ENOMEM),
10838ac5aef8SEnji Cooper FAIL_SYSCALL(fstat, ENOEXEC),
10848ac5aef8SEnji Cooper ALLOW_SYSCALL(close),
10858ac5aef8SEnji Cooper KILL_SYSCALL(fsync),
10868ac5aef8SEnji Cooper BPF_ALLOW };
10878ac5aef8SEnji Cooper struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
10888ac5aef8SEnji Cooper .filter = filter};
10898ac5aef8SEnji Cooper // Set up seccomp-bpf first.
10908ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
10918ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
10928ac5aef8SEnji Cooper
10938ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode.
10948ac5aef8SEnji Cooper
10958ac5aef8SEnji Cooper // fchmod is allowed by Capsicum, but failed by BPF.
10968ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
10978ac5aef8SEnji Cooper // open is allowed by BPF, but failed by Capsicum
10988ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
10998ac5aef8SEnji Cooper // fstat is failed by both BPF and Capsicum; tie-break is on errno
11008ac5aef8SEnji Cooper struct stat buf;
11018ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
11028ac5aef8SEnji Cooper // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
11038ac5aef8SEnji Cooper fsync(fd); // terminate with unhandled SIGSYS
11048ac5aef8SEnji Cooper exit(0);
11058ac5aef8SEnji Cooper }
11068ac5aef8SEnji Cooper int status;
11078ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0));
11088ac5aef8SEnji Cooper EXPECT_TRUE(WIFSIGNALED(status));
11098ac5aef8SEnji Cooper EXPECT_EQ(SIGSYS, WTERMSIG(status));
11108ac5aef8SEnji Cooper unlink(TmpFile("cap_bpf_capmode"));
11118ac5aef8SEnji Cooper }
11128ac5aef8SEnji Cooper
TEST(Linux,AIO)11138ac5aef8SEnji Cooper TEST(Linux, AIO) {
11148ac5aef8SEnji Cooper int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
11158ac5aef8SEnji Cooper EXPECT_OK(fd);
11168ac5aef8SEnji Cooper
11178ac5aef8SEnji Cooper cap_rights_t r_rs;
11188ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
11198ac5aef8SEnji Cooper cap_rights_t r_ws;
11208ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
11218ac5aef8SEnji Cooper cap_rights_t r_rwssync;
11228ac5aef8SEnji Cooper cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
11238ac5aef8SEnji Cooper
11248ac5aef8SEnji Cooper int cap_ro = dup(fd);
11258ac5aef8SEnji Cooper EXPECT_OK(cap_ro);
11268ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
11278ac5aef8SEnji Cooper EXPECT_OK(cap_ro);
11288ac5aef8SEnji Cooper int cap_wo = dup(fd);
11298ac5aef8SEnji Cooper EXPECT_OK(cap_wo);
11308ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
11318ac5aef8SEnji Cooper EXPECT_OK(cap_wo);
11328ac5aef8SEnji Cooper int cap_all = dup(fd);
11338ac5aef8SEnji Cooper EXPECT_OK(cap_all);
11348ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
11358ac5aef8SEnji Cooper EXPECT_OK(cap_all);
11368ac5aef8SEnji Cooper
11378ac5aef8SEnji Cooper // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
11388ac5aef8SEnji Cooper aio_context_t ctx = 0;
11398ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
11408ac5aef8SEnji Cooper
11418ac5aef8SEnji Cooper unsigned char buffer[32] = {1, 2, 3, 4};
11428ac5aef8SEnji Cooper struct iocb req;
11438ac5aef8SEnji Cooper memset(&req, 0, sizeof(req));
11448ac5aef8SEnji Cooper req.aio_reqprio = 0;
11458ac5aef8SEnji Cooper req.aio_fildes = fd;
11468ac5aef8SEnji Cooper uintptr_t bufaddr = (uintptr_t)buffer;
11478ac5aef8SEnji Cooper req.aio_buf = (__u64)bufaddr;
11488ac5aef8SEnji Cooper req.aio_nbytes = 4;
11498ac5aef8SEnji Cooper req.aio_offset = 0;
11508ac5aef8SEnji Cooper struct iocb* reqs[1] = {&req};
11518ac5aef8SEnji Cooper
11528ac5aef8SEnji Cooper // Write operation
11538ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_PWRITE;
11548ac5aef8SEnji Cooper req.aio_fildes = cap_ro;
11558ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11568ac5aef8SEnji Cooper req.aio_fildes = cap_wo;
11578ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
11588ac5aef8SEnji Cooper
11598ac5aef8SEnji Cooper // Sync operation
11608ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FSYNC;
11618ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11628ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FDSYNC;
11638ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11648ac5aef8SEnji Cooper // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
11658ac5aef8SEnji Cooper req.aio_fildes = cap_all;
11668ac5aef8SEnji Cooper EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11678ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FSYNC;
11688ac5aef8SEnji Cooper EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11698ac5aef8SEnji Cooper
11708ac5aef8SEnji Cooper // Read operation
11718ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_PREAD;
11728ac5aef8SEnji Cooper req.aio_fildes = cap_wo;
11738ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
11748ac5aef8SEnji Cooper req.aio_fildes = cap_ro;
11758ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
11768ac5aef8SEnji Cooper
11778ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_destroy, ctx));
11788ac5aef8SEnji Cooper
11798ac5aef8SEnji Cooper close(cap_all);
11808ac5aef8SEnji Cooper close(cap_wo);
11818ac5aef8SEnji Cooper close(cap_ro);
11828ac5aef8SEnji Cooper close(fd);
11838ac5aef8SEnji Cooper unlink(TmpFile("cap_aio"));
11848ac5aef8SEnji Cooper }
11858ac5aef8SEnji Cooper
11868ac5aef8SEnji Cooper #ifndef KCMP_FILE
11878ac5aef8SEnji Cooper #define KCMP_FILE 0
11888ac5aef8SEnji Cooper #endif
TEST(Linux,KcmpIfAvailable)1189*2d936e6cSAlex Richardson TEST(Linux, KcmpIfAvailable) {
11908ac5aef8SEnji Cooper // This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
11918ac5aef8SEnji Cooper int fd = open("/etc/passwd", O_RDONLY);
11928ac5aef8SEnji Cooper EXPECT_OK(fd);
11938ac5aef8SEnji Cooper pid_t parent = getpid_();
11948ac5aef8SEnji Cooper
11958ac5aef8SEnji Cooper errno = 0;
11968ac5aef8SEnji Cooper int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
11978ac5aef8SEnji Cooper if (rc == -1 && errno == ENOSYS) {
1198*2d936e6cSAlex Richardson GTEST_SKIP() << "kcmp(2) gives -ENOSYS";
11998ac5aef8SEnji Cooper }
12008ac5aef8SEnji Cooper
12018ac5aef8SEnji Cooper pid_t child = fork();
12028ac5aef8SEnji Cooper if (child == 0) {
12038ac5aef8SEnji Cooper // Child: limit rights on FD.
12048ac5aef8SEnji Cooper child = getpid_();
12058ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
12068ac5aef8SEnji Cooper cap_rights_t rights;
12078ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE);
12088ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd, &rights));
12098ac5aef8SEnji Cooper // A capability wrapping a normal FD is different (from a kcmp(2) perspective)
12108ac5aef8SEnji Cooper // than the original file.
12118ac5aef8SEnji Cooper EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
12128ac5aef8SEnji Cooper exit(HasFailure());
12138ac5aef8SEnji Cooper }
12148ac5aef8SEnji Cooper // Wait for the child.
12158ac5aef8SEnji Cooper int status;
12168ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0));
12178ac5aef8SEnji Cooper rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
12188ac5aef8SEnji Cooper EXPECT_EQ(0, rc);
12198ac5aef8SEnji Cooper
12208ac5aef8SEnji Cooper close(fd);
12218ac5aef8SEnji Cooper }
12228ac5aef8SEnji Cooper
TEST(Linux,ProcFS)12238ac5aef8SEnji Cooper TEST(Linux, ProcFS) {
12248ac5aef8SEnji Cooper cap_rights_t rights;
12258ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_SEEK);
12268ac5aef8SEnji Cooper int fd = open("/etc/passwd", O_RDONLY);
12278ac5aef8SEnji Cooper EXPECT_OK(fd);
12288ac5aef8SEnji Cooper lseek(fd, 4, SEEK_SET);
12298ac5aef8SEnji Cooper int cap = dup(fd);
12308ac5aef8SEnji Cooper EXPECT_OK(cap);
12318ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap, &rights));
12328ac5aef8SEnji Cooper pid_t me = getpid_();
12338ac5aef8SEnji Cooper
12348ac5aef8SEnji Cooper char buffer[1024];
12358ac5aef8SEnji Cooper sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
12368ac5aef8SEnji Cooper int procfd = open(buffer, O_RDONLY);
12378ac5aef8SEnji Cooper EXPECT_OK(procfd) << " failed to open " << buffer;
12388ac5aef8SEnji Cooper if (procfd < 0) return;
12398ac5aef8SEnji Cooper int proccap = dup(procfd);
12408ac5aef8SEnji Cooper EXPECT_OK(proccap);
12418ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(proccap, &rights));
12428ac5aef8SEnji Cooper
12438ac5aef8SEnji Cooper EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
12448ac5aef8SEnji Cooper // The fdinfo should include the file pos of the underlying file
12458ac5aef8SEnji Cooper EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
12468ac5aef8SEnji Cooper // ...and the rights of the Capsicum capability.
12478ac5aef8SEnji Cooper EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
12488ac5aef8SEnji Cooper
12498ac5aef8SEnji Cooper close(procfd);
12508ac5aef8SEnji Cooper close(proccap);
12518ac5aef8SEnji Cooper close(cap);
12528ac5aef8SEnji Cooper close(fd);
12538ac5aef8SEnji Cooper }
12548ac5aef8SEnji Cooper
FORK_TEST(Linux,ProcessClocks)12558ac5aef8SEnji Cooper FORK_TEST(Linux, ProcessClocks) {
12568ac5aef8SEnji Cooper pid_t self = getpid_();
12578ac5aef8SEnji Cooper pid_t child = fork();
12588ac5aef8SEnji Cooper EXPECT_OK(child);
12598ac5aef8SEnji Cooper if (child == 0) {
12608ac5aef8SEnji Cooper child = getpid_();
12618ac5aef8SEnji Cooper usleep(100000);
12628ac5aef8SEnji Cooper exit(0);
12638ac5aef8SEnji Cooper }
12648ac5aef8SEnji Cooper
12658ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode.
12668ac5aef8SEnji Cooper
12678ac5aef8SEnji Cooper // Nefariously build a clock ID for the child's CPU time.
12688ac5aef8SEnji Cooper // This relies on knowledge of the internal layout of clock IDs.
12698ac5aef8SEnji Cooper clockid_t child_clock;
12708ac5aef8SEnji Cooper child_clock = ((~child) << 3) | 0x0;
12718ac5aef8SEnji Cooper struct timespec ts;
12728ac5aef8SEnji Cooper memset(&ts, 0, sizeof(ts));
12738ac5aef8SEnji Cooper
12748ac5aef8SEnji Cooper // TODO(drysdale): Should not be possible to retrieve info about a
12758ac5aef8SEnji Cooper // different process, as the PID global namespace should be locked
12768ac5aef8SEnji Cooper // down.
12778ac5aef8SEnji Cooper EXPECT_OK(clock_gettime(child_clock, &ts));
12788ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
12798ac5aef8SEnji Cooper self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
12808ac5aef8SEnji Cooper
12818ac5aef8SEnji Cooper child_clock = ((~1) << 3) | 0x0;
12828ac5aef8SEnji Cooper memset(&ts, 0, sizeof(ts));
12838ac5aef8SEnji Cooper EXPECT_OK(clock_gettime(child_clock, &ts));
12848ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
12858ac5aef8SEnji Cooper self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
12868ac5aef8SEnji Cooper
12878ac5aef8SEnji Cooper // Orphan the child.
12888ac5aef8SEnji Cooper }
12898ac5aef8SEnji Cooper
TEST(Linux,SetLease)12908ac5aef8SEnji Cooper TEST(Linux, SetLease) {
12918ac5aef8SEnji Cooper int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
12928ac5aef8SEnji Cooper EXPECT_OK(fd_all);
12938ac5aef8SEnji Cooper int fd_rw = dup(fd_all);
12948ac5aef8SEnji Cooper EXPECT_OK(fd_rw);
12958ac5aef8SEnji Cooper
12968ac5aef8SEnji Cooper cap_rights_t r_all;
12978ac5aef8SEnji Cooper cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
12988ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd_all, &r_all));
12998ac5aef8SEnji Cooper
13008ac5aef8SEnji Cooper cap_rights_t r_rw;
13018ac5aef8SEnji Cooper cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
13028ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
13038ac5aef8SEnji Cooper
13048ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
13058ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
13068ac5aef8SEnji Cooper
13078ac5aef8SEnji Cooper if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases
13088ac5aef8SEnji Cooper EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
13098ac5aef8SEnji Cooper EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
13108ac5aef8SEnji Cooper
13118ac5aef8SEnji Cooper EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
13128ac5aef8SEnji Cooper EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
13138ac5aef8SEnji Cooper }
13148ac5aef8SEnji Cooper close(fd_all);
13158ac5aef8SEnji Cooper close(fd_rw);
13168ac5aef8SEnji Cooper unlink(TmpFile("cap_lease"));
13178ac5aef8SEnji Cooper }
13188ac5aef8SEnji Cooper
TEST(Linux,InvalidRightsSyscall)13198ac5aef8SEnji Cooper TEST(Linux, InvalidRightsSyscall) {
13208ac5aef8SEnji Cooper int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
13218ac5aef8SEnji Cooper EXPECT_OK(fd);
13228ac5aef8SEnji Cooper
13238ac5aef8SEnji Cooper cap_rights_t rights;
13248ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
13258ac5aef8SEnji Cooper
13268ac5aef8SEnji Cooper // Use the raw syscall throughout.
13278ac5aef8SEnji Cooper EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
13288ac5aef8SEnji Cooper
13298ac5aef8SEnji Cooper // Directly access the syscall, and find all unseemly manner of use for it.
13308ac5aef8SEnji Cooper // - Invalid flags
13318ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
13328ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13338ac5aef8SEnji Cooper // - Specify an fcntl subright, but no CAP_FCNTL set
13348ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
13358ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13368ac5aef8SEnji Cooper // - Specify an ioctl subright, but no CAP_IOCTL set
13378ac5aef8SEnji Cooper unsigned int ioctl1 = 1;
13388ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
13398ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13408ac5aef8SEnji Cooper // - N ioctls, but null pointer passed
13418ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
13428ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13438ac5aef8SEnji Cooper // - Invalid nioctls
13448ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
13458ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13468ac5aef8SEnji Cooper // - Null primary rights
13478ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
13488ac5aef8SEnji Cooper EXPECT_EQ(EFAULT, errno);
13498ac5aef8SEnji Cooper // - Invalid index bitmask
13508ac5aef8SEnji Cooper rights.cr_rights[0] |= 3ULL << 57;
13518ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
13528ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13538ac5aef8SEnji Cooper // - Invalid version
13548ac5aef8SEnji Cooper rights.cr_rights[0] |= 2ULL << 62;
13558ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
13568ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno);
13578ac5aef8SEnji Cooper
13588ac5aef8SEnji Cooper close(fd);
13598ac5aef8SEnji Cooper unlink(TmpFile("cap_invalid_rights"));
13608ac5aef8SEnji Cooper }
13618ac5aef8SEnji Cooper
1362*2d936e6cSAlex Richardson FORK_TEST_ON(Linux, OpenByHandleAtIfRoot, TmpFile("cap_openbyhandle_testfile")) {
1363*2d936e6cSAlex Richardson GTEST_SKIP_IF_NOT_ROOT();
13648ac5aef8SEnji Cooper int dir = open(tmpdir.c_str(), O_RDONLY);
13658ac5aef8SEnji Cooper EXPECT_OK(dir);
13668ac5aef8SEnji Cooper int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
13678ac5aef8SEnji Cooper EXPECT_OK(fd);
13688ac5aef8SEnji Cooper const char* message = "Saved text";
13698ac5aef8SEnji Cooper EXPECT_OK(write(fd, message, strlen(message)));
13708ac5aef8SEnji Cooper close(fd);
13718ac5aef8SEnji Cooper
13728ac5aef8SEnji Cooper struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
13738ac5aef8SEnji Cooper fhandle->handle_bytes = MAX_HANDLE_SZ;
13748ac5aef8SEnji Cooper int mount_id;
13758ac5aef8SEnji Cooper EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0));
13768ac5aef8SEnji Cooper
13778ac5aef8SEnji Cooper fd = open_by_handle_at(dir, fhandle, O_RDONLY);
13788ac5aef8SEnji Cooper EXPECT_OK(fd);
13798ac5aef8SEnji Cooper char buffer[200];
1380*2d936e6cSAlex Richardson ssize_t len = read(fd, buffer, 199);
1381*2d936e6cSAlex Richardson EXPECT_OK(len);
1382*2d936e6cSAlex Richardson EXPECT_EQ(std::string(message), std::string(buffer, len));
13838ac5aef8SEnji Cooper close(fd);
13848ac5aef8SEnji Cooper
13858ac5aef8SEnji Cooper // Cannot issue open_by_handle_at after entering capability mode.
13868ac5aef8SEnji Cooper cap_enter();
13878ac5aef8SEnji Cooper EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
13888ac5aef8SEnji Cooper
13898ac5aef8SEnji Cooper close(dir);
13908ac5aef8SEnji Cooper }
13918ac5aef8SEnji Cooper
getrandom_(void * buf,size_t buflen,unsigned int flags)13928ac5aef8SEnji Cooper int getrandom_(void *buf, size_t buflen, unsigned int flags) {
13938ac5aef8SEnji Cooper #ifdef __NR_getrandom
13948ac5aef8SEnji Cooper return syscall(__NR_getrandom, buf, buflen, flags);
13958ac5aef8SEnji Cooper #else
13968ac5aef8SEnji Cooper errno = ENOSYS;
13978ac5aef8SEnji Cooper return -1;
13988ac5aef8SEnji Cooper #endif
13998ac5aef8SEnji Cooper }
14008ac5aef8SEnji Cooper
14018ac5aef8SEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
14028ac5aef8SEnji Cooper #include <linux/random.h> // Requires 3.17 kernel
FORK_TEST(Linux,GetRandom)14038ac5aef8SEnji Cooper FORK_TEST(Linux, GetRandom) {
14048ac5aef8SEnji Cooper EXPECT_OK(cap_enter());
14058ac5aef8SEnji Cooper unsigned char buffer[1024];
14068ac5aef8SEnji Cooper unsigned char buffer2[1024];
14078ac5aef8SEnji Cooper EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
14088ac5aef8SEnji Cooper EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
14098ac5aef8SEnji Cooper EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
14108ac5aef8SEnji Cooper }
14118ac5aef8SEnji Cooper #endif
14128ac5aef8SEnji Cooper
memfd_create_(const char * name,unsigned int flags)14138ac5aef8SEnji Cooper int memfd_create_(const char *name, unsigned int flags) {
14148ac5aef8SEnji Cooper #ifdef __NR_memfd_create
14158ac5aef8SEnji Cooper return syscall(__NR_memfd_create, name, flags);
14168ac5aef8SEnji Cooper #else
14178ac5aef8SEnji Cooper errno = ENOSYS;
14188ac5aef8SEnji Cooper return -1;
14198ac5aef8SEnji Cooper #endif
14208ac5aef8SEnji Cooper }
14218ac5aef8SEnji Cooper
14228ac5aef8SEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
14238ac5aef8SEnji Cooper #include <linux/memfd.h> // Requires 3.17 kernel
TEST(Linux,MemFDDeathTestIfAvailable)1424*2d936e6cSAlex Richardson TEST(Linux, MemFDDeathTestIfAvailable) {
14258ac5aef8SEnji Cooper int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
14268ac5aef8SEnji Cooper if (memfd == -1 && errno == ENOSYS) {
1427*2d936e6cSAlex Richardson GTEST_SKIP() << "memfd_create(2) gives -ENOSYS";
14288ac5aef8SEnji Cooper }
14298ac5aef8SEnji Cooper const int LEN = 16;
14308ac5aef8SEnji Cooper EXPECT_OK(ftruncate(memfd, LEN));
14318ac5aef8SEnji Cooper int memfd_ro = dup(memfd);
14328ac5aef8SEnji Cooper int memfd_rw = dup(memfd);
14338ac5aef8SEnji Cooper EXPECT_OK(memfd_ro);
14348ac5aef8SEnji Cooper EXPECT_OK(memfd_rw);
14358ac5aef8SEnji Cooper cap_rights_t rights;
14368ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
14378ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
14388ac5aef8SEnji Cooper
14398ac5aef8SEnji Cooper unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
14408ac5aef8SEnji Cooper EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
14418ac5aef8SEnji Cooper unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
14428ac5aef8SEnji Cooper EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
14438ac5aef8SEnji Cooper EXPECT_EQ(MAP_FAILED,
14448ac5aef8SEnji Cooper mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
14458ac5aef8SEnji Cooper
14468ac5aef8SEnji Cooper *p_rw = 42;
14478ac5aef8SEnji Cooper EXPECT_EQ(42, *p_ro);
14488ac5aef8SEnji Cooper EXPECT_DEATH(*p_ro = 42, "");
14498ac5aef8SEnji Cooper
14508ac5aef8SEnji Cooper #ifndef F_ADD_SEALS
14518ac5aef8SEnji Cooper // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
14528ac5aef8SEnji Cooper #define _F_LINUX_SPECIFIC_BASE F_SETLEASE
14538ac5aef8SEnji Cooper #define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9)
14548ac5aef8SEnji Cooper #define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10)
14558ac5aef8SEnji Cooper #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
14568ac5aef8SEnji Cooper #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
14578ac5aef8SEnji Cooper #define F_SEAL_GROW 0x0004 /* prevent file from growing */
14588ac5aef8SEnji Cooper #define F_SEAL_WRITE 0x0008 /* prevent writes */
14598ac5aef8SEnji Cooper #endif
14608ac5aef8SEnji Cooper
14618ac5aef8SEnji Cooper // Reading the seal information requires CAP_FSTAT.
14628ac5aef8SEnji Cooper int seals = fcntl(memfd, F_GET_SEALS);
14638ac5aef8SEnji Cooper EXPECT_OK(seals);
14648ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
14658ac5aef8SEnji Cooper int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
14668ac5aef8SEnji Cooper EXPECT_EQ(seals, seals_ro);
14678ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
14688ac5aef8SEnji Cooper int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
14698ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(seals_rw);
14708ac5aef8SEnji Cooper
14718ac5aef8SEnji Cooper // Fail to seal as a writable mapping exists.
14728ac5aef8SEnji Cooper EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
14738ac5aef8SEnji Cooper EXPECT_EQ(EBUSY, errno);
14748ac5aef8SEnji Cooper *p_rw = 42;
14758ac5aef8SEnji Cooper
14768ac5aef8SEnji Cooper // Seal the rw version; need to unmap first.
14778ac5aef8SEnji Cooper munmap(p_rw, LEN);
14788ac5aef8SEnji Cooper munmap(p_ro, LEN);
14798ac5aef8SEnji Cooper EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
14808ac5aef8SEnji Cooper
14818ac5aef8SEnji Cooper seals = fcntl(memfd, F_GET_SEALS);
14828ac5aef8SEnji Cooper EXPECT_OK(seals);
14838ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
14848ac5aef8SEnji Cooper seals_ro = fcntl(memfd_ro, F_GET_SEALS);
14858ac5aef8SEnji Cooper EXPECT_EQ(seals, seals_ro);
14868ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
14878ac5aef8SEnji Cooper
14888ac5aef8SEnji Cooper // Remove the CAP_FCHMOD right, can no longer add seals.
14898ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
14908ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
14918ac5aef8SEnji Cooper
14928ac5aef8SEnji Cooper close(memfd);
14938ac5aef8SEnji Cooper close(memfd_ro);
14948ac5aef8SEnji Cooper close(memfd_rw);
14958ac5aef8SEnji Cooper }
14968ac5aef8SEnji Cooper #endif
14978ac5aef8SEnji Cooper
14988ac5aef8SEnji Cooper #else
noop()14998ac5aef8SEnji Cooper void noop() {}
15008ac5aef8SEnji Cooper #endif
1501