1*8ac5aef8SEnji Cooper // Tests of Linux-specific functionality 2*8ac5aef8SEnji Cooper #ifdef __linux__ 3*8ac5aef8SEnji Cooper 4*8ac5aef8SEnji Cooper #include <sys/types.h> 5*8ac5aef8SEnji Cooper #include <sys/stat.h> 6*8ac5aef8SEnji Cooper #include <sys/socket.h> 7*8ac5aef8SEnji Cooper #include <sys/timerfd.h> 8*8ac5aef8SEnji Cooper #include <sys/signalfd.h> 9*8ac5aef8SEnji Cooper #include <sys/eventfd.h> 10*8ac5aef8SEnji Cooper #include <sys/epoll.h> 11*8ac5aef8SEnji Cooper #include <sys/inotify.h> 12*8ac5aef8SEnji Cooper #include <sys/fanotify.h> 13*8ac5aef8SEnji Cooper #include <sys/mman.h> 14*8ac5aef8SEnji Cooper #include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers 15*8ac5aef8SEnji Cooper #include <linux/aio_abi.h> 16*8ac5aef8SEnji Cooper #include <linux/filter.h> 17*8ac5aef8SEnji Cooper #include <linux/seccomp.h> 18*8ac5aef8SEnji Cooper #include <linux/version.h> 19*8ac5aef8SEnji Cooper #include <poll.h> 20*8ac5aef8SEnji Cooper #include <sched.h> 21*8ac5aef8SEnji Cooper #include <signal.h> 22*8ac5aef8SEnji Cooper #include <fcntl.h> 23*8ac5aef8SEnji Cooper #include <unistd.h> 24*8ac5aef8SEnji Cooper 25*8ac5aef8SEnji Cooper #include <string> 26*8ac5aef8SEnji Cooper 27*8ac5aef8SEnji Cooper #include "capsicum.h" 28*8ac5aef8SEnji Cooper #include "syscalls.h" 29*8ac5aef8SEnji Cooper #include "capsicum-test.h" 30*8ac5aef8SEnji Cooper 31*8ac5aef8SEnji Cooper TEST(Linux, TimerFD) { 32*8ac5aef8SEnji Cooper int fd = timerfd_create(CLOCK_MONOTONIC, 0); 33*8ac5aef8SEnji Cooper 34*8ac5aef8SEnji Cooper cap_rights_t r_ro; 35*8ac5aef8SEnji Cooper cap_rights_init(&r_ro, CAP_READ); 36*8ac5aef8SEnji Cooper cap_rights_t r_wo; 37*8ac5aef8SEnji Cooper cap_rights_init(&r_wo, CAP_WRITE); 38*8ac5aef8SEnji Cooper cap_rights_t r_rw; 39*8ac5aef8SEnji Cooper cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); 40*8ac5aef8SEnji Cooper cap_rights_t r_rwpoll; 41*8ac5aef8SEnji Cooper cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT); 42*8ac5aef8SEnji Cooper 43*8ac5aef8SEnji Cooper int cap_fd_ro = dup(fd); 44*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro); 45*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro)); 46*8ac5aef8SEnji Cooper int cap_fd_wo = dup(fd); 47*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo); 48*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo)); 49*8ac5aef8SEnji Cooper int cap_fd_rw = dup(fd); 50*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw); 51*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw)); 52*8ac5aef8SEnji Cooper int cap_fd_all = dup(fd); 53*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all); 54*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll)); 55*8ac5aef8SEnji Cooper 56*8ac5aef8SEnji Cooper struct itimerspec old_ispec; 57*8ac5aef8SEnji Cooper struct itimerspec ispec; 58*8ac5aef8SEnji Cooper ispec.it_interval.tv_sec = 0; 59*8ac5aef8SEnji Cooper ispec.it_interval.tv_nsec = 0; 60*8ac5aef8SEnji Cooper ispec.it_value.tv_sec = 0; 61*8ac5aef8SEnji Cooper ispec.it_value.tv_nsec = 100000000; // 100ms 62*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL)); 63*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec)); 64*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL)); 65*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL)); 66*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL)); 67*8ac5aef8SEnji Cooper 68*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec)); 69*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec)); 70*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec)); 71*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); 72*8ac5aef8SEnji Cooper 73*8ac5aef8SEnji Cooper // To be able to poll() for the timer pop, still need CAP_EVENT. 74*8ac5aef8SEnji Cooper struct pollfd poll_fd; 75*8ac5aef8SEnji Cooper for (int ii = 0; ii < 3; ii++) { 76*8ac5aef8SEnji Cooper poll_fd.revents = 0; 77*8ac5aef8SEnji Cooper poll_fd.events = POLLIN; 78*8ac5aef8SEnji Cooper switch (ii) { 79*8ac5aef8SEnji Cooper case 0: poll_fd.fd = cap_fd_ro; break; 80*8ac5aef8SEnji Cooper case 1: poll_fd.fd = cap_fd_wo; break; 81*8ac5aef8SEnji Cooper case 2: poll_fd.fd = cap_fd_rw; break; 82*8ac5aef8SEnji Cooper } 83*8ac5aef8SEnji Cooper // Poll immediately returns with POLLNVAL 84*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 85*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 86*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 87*8ac5aef8SEnji Cooper } 88*8ac5aef8SEnji Cooper 89*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_all; 90*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 91*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN)); 92*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 93*8ac5aef8SEnji Cooper 94*8ac5aef8SEnji Cooper EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); 95*8ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_value.tv_sec); 96*8ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_value.tv_nsec); 97*8ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_interval.tv_sec); 98*8ac5aef8SEnji Cooper EXPECT_EQ(0, old_ispec.it_interval.tv_nsec); 99*8ac5aef8SEnji Cooper 100*8ac5aef8SEnji Cooper close(cap_fd_all); 101*8ac5aef8SEnji Cooper close(cap_fd_rw); 102*8ac5aef8SEnji Cooper close(cap_fd_wo); 103*8ac5aef8SEnji Cooper close(cap_fd_ro); 104*8ac5aef8SEnji Cooper close(fd); 105*8ac5aef8SEnji Cooper } 106*8ac5aef8SEnji Cooper 107*8ac5aef8SEnji Cooper FORK_TEST(Linux, SignalFD) { 108*8ac5aef8SEnji Cooper if (force_mt) { 109*8ac5aef8SEnji Cooper TEST_SKIPPED("multi-threaded run clashes with signals"); 110*8ac5aef8SEnji Cooper return; 111*8ac5aef8SEnji Cooper } 112*8ac5aef8SEnji Cooper pid_t me = getpid(); 113*8ac5aef8SEnji Cooper sigset_t mask; 114*8ac5aef8SEnji Cooper sigemptyset(&mask); 115*8ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR1); 116*8ac5aef8SEnji Cooper 117*8ac5aef8SEnji Cooper // Block signals before registering against a new signal FD. 118*8ac5aef8SEnji Cooper EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); 119*8ac5aef8SEnji Cooper int fd = signalfd(-1, &mask, 0); 120*8ac5aef8SEnji Cooper EXPECT_OK(fd); 121*8ac5aef8SEnji Cooper 122*8ac5aef8SEnji Cooper cap_rights_t r_rs; 123*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 124*8ac5aef8SEnji Cooper cap_rights_t r_ws; 125*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 126*8ac5aef8SEnji Cooper cap_rights_t r_sig; 127*8ac5aef8SEnji Cooper cap_rights_init(&r_sig, CAP_FSIGNAL); 128*8ac5aef8SEnji Cooper cap_rights_t r_rssig; 129*8ac5aef8SEnji Cooper cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK); 130*8ac5aef8SEnji Cooper cap_rights_t r_rssig_poll; 131*8ac5aef8SEnji Cooper cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT); 132*8ac5aef8SEnji Cooper 133*8ac5aef8SEnji Cooper // Various capability variants. 134*8ac5aef8SEnji Cooper int cap_fd_none = dup(fd); 135*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_none); 136*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws)); 137*8ac5aef8SEnji Cooper int cap_fd_read = dup(fd); 138*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_read); 139*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs)); 140*8ac5aef8SEnji Cooper int cap_fd_sig = dup(fd); 141*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_sig); 142*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig)); 143*8ac5aef8SEnji Cooper int cap_fd_sig_read = dup(fd); 144*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_sig_read); 145*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig)); 146*8ac5aef8SEnji Cooper int cap_fd_all = dup(fd); 147*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all); 148*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll)); 149*8ac5aef8SEnji Cooper 150*8ac5aef8SEnji Cooper struct signalfd_siginfo fdsi; 151*8ac5aef8SEnji Cooper 152*8ac5aef8SEnji Cooper // Need CAP_READ to read the signal information 153*8ac5aef8SEnji Cooper kill(me, SIGUSR1); 154*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo))); 155*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo))); 156*8ac5aef8SEnji Cooper int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo)); 157*8ac5aef8SEnji Cooper EXPECT_OK(len); 158*8ac5aef8SEnji Cooper EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len); 159*8ac5aef8SEnji Cooper EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo); 160*8ac5aef8SEnji Cooper 161*8ac5aef8SEnji Cooper // Need CAP_FSIGNAL to modify the signal mask. 162*8ac5aef8SEnji Cooper sigemptyset(&mask); 163*8ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR1); 164*8ac5aef8SEnji Cooper sigaddset(&mask, SIGUSR2); 165*8ac5aef8SEnji Cooper EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); 166*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0)); 167*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0)); 168*8ac5aef8SEnji Cooper EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0)); 169*8ac5aef8SEnji Cooper 170*8ac5aef8SEnji Cooper // Need CAP_EVENT to get notification of a signal in poll(2). 171*8ac5aef8SEnji Cooper kill(me, SIGUSR2); 172*8ac5aef8SEnji Cooper 173*8ac5aef8SEnji Cooper struct pollfd poll_fd; 174*8ac5aef8SEnji Cooper poll_fd.revents = 0; 175*8ac5aef8SEnji Cooper poll_fd.events = POLLIN; 176*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_sig_read; 177*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 178*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 179*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 180*8ac5aef8SEnji Cooper 181*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_all; 182*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 183*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN)); 184*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 185*8ac5aef8SEnji Cooper } 186*8ac5aef8SEnji Cooper 187*8ac5aef8SEnji Cooper TEST(Linux, EventFD) { 188*8ac5aef8SEnji Cooper int fd = eventfd(0, 0); 189*8ac5aef8SEnji Cooper EXPECT_OK(fd); 190*8ac5aef8SEnji Cooper 191*8ac5aef8SEnji Cooper cap_rights_t r_rs; 192*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 193*8ac5aef8SEnji Cooper cap_rights_t r_ws; 194*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 195*8ac5aef8SEnji Cooper cap_rights_t r_rws; 196*8ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 197*8ac5aef8SEnji Cooper cap_rights_t r_rwspoll; 198*8ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 199*8ac5aef8SEnji Cooper 200*8ac5aef8SEnji Cooper int cap_ro = dup(fd); 201*8ac5aef8SEnji Cooper EXPECT_OK(cap_ro); 202*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); 203*8ac5aef8SEnji Cooper int cap_wo = dup(fd); 204*8ac5aef8SEnji Cooper EXPECT_OK(cap_wo); 205*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); 206*8ac5aef8SEnji Cooper int cap_rw = dup(fd); 207*8ac5aef8SEnji Cooper EXPECT_OK(cap_rw); 208*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_rw, &r_rws)); 209*8ac5aef8SEnji Cooper int cap_all = dup(fd); 210*8ac5aef8SEnji Cooper EXPECT_OK(cap_all); 211*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll)); 212*8ac5aef8SEnji Cooper 213*8ac5aef8SEnji Cooper pid_t child = fork(); 214*8ac5aef8SEnji Cooper if (child == 0) { 215*8ac5aef8SEnji Cooper // Child: write counter to eventfd 216*8ac5aef8SEnji Cooper uint64_t u = 42; 217*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u))); 218*8ac5aef8SEnji Cooper EXPECT_OK(write(cap_wo, &u, sizeof(u))); 219*8ac5aef8SEnji Cooper exit(HasFailure()); 220*8ac5aef8SEnji Cooper } 221*8ac5aef8SEnji Cooper 222*8ac5aef8SEnji Cooper sleep(1); // Allow child to write 223*8ac5aef8SEnji Cooper 224*8ac5aef8SEnji Cooper struct pollfd poll_fd; 225*8ac5aef8SEnji Cooper poll_fd.revents = 0; 226*8ac5aef8SEnji Cooper poll_fd.events = POLLIN; 227*8ac5aef8SEnji Cooper poll_fd.fd = cap_rw; 228*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 229*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 230*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 231*8ac5aef8SEnji Cooper 232*8ac5aef8SEnji Cooper poll_fd.fd = cap_all; 233*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 400)); 234*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN)); 235*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 236*8ac5aef8SEnji Cooper 237*8ac5aef8SEnji Cooper uint64_t u; 238*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u))); 239*8ac5aef8SEnji Cooper EXPECT_OK(read(cap_ro, &u, sizeof(u))); 240*8ac5aef8SEnji Cooper EXPECT_EQ(42, (int)u); 241*8ac5aef8SEnji Cooper 242*8ac5aef8SEnji Cooper // Wait for the child. 243*8ac5aef8SEnji Cooper int status; 244*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0)); 245*8ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 246*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 247*8ac5aef8SEnji Cooper 248*8ac5aef8SEnji Cooper close(cap_all); 249*8ac5aef8SEnji Cooper close(cap_rw); 250*8ac5aef8SEnji Cooper close(cap_wo); 251*8ac5aef8SEnji Cooper close(cap_ro); 252*8ac5aef8SEnji Cooper close(fd); 253*8ac5aef8SEnji Cooper } 254*8ac5aef8SEnji Cooper 255*8ac5aef8SEnji Cooper FORK_TEST(Linux, epoll) { 256*8ac5aef8SEnji Cooper int sock_fds[2]; 257*8ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); 258*8ac5aef8SEnji Cooper // Queue some data. 259*8ac5aef8SEnji Cooper char buffer[4] = {1, 2, 3, 4}; 260*8ac5aef8SEnji Cooper EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer))); 261*8ac5aef8SEnji Cooper 262*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode. 263*8ac5aef8SEnji Cooper 264*8ac5aef8SEnji Cooper int epoll_fd = epoll_create(1); 265*8ac5aef8SEnji Cooper EXPECT_OK(epoll_fd); 266*8ac5aef8SEnji Cooper 267*8ac5aef8SEnji Cooper cap_rights_t r_rs; 268*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 269*8ac5aef8SEnji Cooper cap_rights_t r_ws; 270*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 271*8ac5aef8SEnji Cooper cap_rights_t r_rws; 272*8ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 273*8ac5aef8SEnji Cooper cap_rights_t r_rwspoll; 274*8ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 275*8ac5aef8SEnji Cooper cap_rights_t r_epoll; 276*8ac5aef8SEnji Cooper cap_rights_init(&r_epoll, CAP_EPOLL_CTL); 277*8ac5aef8SEnji Cooper 278*8ac5aef8SEnji Cooper int cap_epoll_wo = dup(epoll_fd); 279*8ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_wo); 280*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws)); 281*8ac5aef8SEnji Cooper int cap_epoll_ro = dup(epoll_fd); 282*8ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_ro); 283*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs)); 284*8ac5aef8SEnji Cooper int cap_epoll_rw = dup(epoll_fd); 285*8ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_rw); 286*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws)); 287*8ac5aef8SEnji Cooper int cap_epoll_poll = dup(epoll_fd); 288*8ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_poll); 289*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll)); 290*8ac5aef8SEnji Cooper int cap_epoll_ctl = dup(epoll_fd); 291*8ac5aef8SEnji Cooper EXPECT_OK(cap_epoll_ctl); 292*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll)); 293*8ac5aef8SEnji Cooper 294*8ac5aef8SEnji Cooper // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present. 295*8ac5aef8SEnji Cooper struct epoll_event eev; 296*8ac5aef8SEnji Cooper memset(&eev, 0, sizeof(eev)); 297*8ac5aef8SEnji Cooper eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI; 298*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev)); 299*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev)); 300*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev)); 301*8ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev)); 302*8ac5aef8SEnji Cooper eev.events = EPOLLIN|EPOLLOUT; 303*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev)); 304*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev)); 305*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev)); 306*8ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev)); 307*8ac5aef8SEnji Cooper 308*8ac5aef8SEnji Cooper // Running epoll_pwait(2) requires CAP_EVENT. 309*8ac5aef8SEnji Cooper eev.events = 0; 310*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL)); 311*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL)); 312*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL)); 313*8ac5aef8SEnji Cooper EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL)); 314*8ac5aef8SEnji Cooper EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN); 315*8ac5aef8SEnji Cooper 316*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev)); 317*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev)); 318*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev)); 319*8ac5aef8SEnji Cooper EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev)); 320*8ac5aef8SEnji Cooper 321*8ac5aef8SEnji Cooper close(cap_epoll_ctl); 322*8ac5aef8SEnji Cooper close(cap_epoll_poll); 323*8ac5aef8SEnji Cooper close(cap_epoll_rw); 324*8ac5aef8SEnji Cooper close(cap_epoll_ro); 325*8ac5aef8SEnji Cooper close(cap_epoll_wo); 326*8ac5aef8SEnji Cooper close(epoll_fd); 327*8ac5aef8SEnji Cooper close(sock_fds[1]); 328*8ac5aef8SEnji Cooper close(sock_fds[0]); 329*8ac5aef8SEnji Cooper } 330*8ac5aef8SEnji Cooper 331*8ac5aef8SEnji Cooper TEST(Linux, fstatat) { 332*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644); 333*8ac5aef8SEnji Cooper EXPECT_OK(fd); 334*8ac5aef8SEnji Cooper unsigned char buffer[] = {1, 2, 3, 4}; 335*8ac5aef8SEnji Cooper EXPECT_OK(write(fd, buffer, sizeof(buffer))); 336*8ac5aef8SEnji Cooper cap_rights_t rights; 337*8ac5aef8SEnji Cooper int cap_rf = dup(fd); 338*8ac5aef8SEnji Cooper EXPECT_OK(cap_rf); 339*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); 340*8ac5aef8SEnji Cooper int cap_ro = dup(fd); 341*8ac5aef8SEnji Cooper EXPECT_OK(cap_ro); 342*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ))); 343*8ac5aef8SEnji Cooper 344*8ac5aef8SEnji Cooper struct stat info; 345*8ac5aef8SEnji Cooper EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH)); 346*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH)); 347*8ac5aef8SEnji Cooper EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH)); 348*8ac5aef8SEnji Cooper 349*8ac5aef8SEnji Cooper close(cap_ro); 350*8ac5aef8SEnji Cooper close(cap_rf); 351*8ac5aef8SEnji Cooper close(fd); 352*8ac5aef8SEnji Cooper 353*8ac5aef8SEnji Cooper int dir = open(tmpdir.c_str(), O_RDONLY); 354*8ac5aef8SEnji Cooper EXPECT_OK(dir); 355*8ac5aef8SEnji Cooper int dir_rf = dup(dir); 356*8ac5aef8SEnji Cooper EXPECT_OK(dir_rf); 357*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); 358*8ac5aef8SEnji Cooper int dir_ro = dup(fd); 359*8ac5aef8SEnji Cooper EXPECT_OK(dir_ro); 360*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ))); 361*8ac5aef8SEnji Cooper 362*8ac5aef8SEnji Cooper EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH)); 363*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH)); 364*8ac5aef8SEnji Cooper EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH)); 365*8ac5aef8SEnji Cooper 366*8ac5aef8SEnji Cooper close(dir_ro); 367*8ac5aef8SEnji Cooper close(dir_rf); 368*8ac5aef8SEnji Cooper close(dir); 369*8ac5aef8SEnji Cooper 370*8ac5aef8SEnji Cooper unlink(TmpFile("cap_fstatat")); 371*8ac5aef8SEnji Cooper } 372*8ac5aef8SEnji Cooper 373*8ac5aef8SEnji Cooper // fanotify support may not be available at compile-time 374*8ac5aef8SEnji Cooper #ifdef __NR_fanotify_init 375*8ac5aef8SEnji Cooper TEST(Linux, fanotify) { 376*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 377*8ac5aef8SEnji Cooper int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR); 378*8ac5aef8SEnji Cooper EXPECT_OK(fa_fd); 379*8ac5aef8SEnji Cooper if (fa_fd < 0) return; // May not be enabled 380*8ac5aef8SEnji Cooper 381*8ac5aef8SEnji Cooper cap_rights_t r_rs; 382*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 383*8ac5aef8SEnji Cooper cap_rights_t r_ws; 384*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 385*8ac5aef8SEnji Cooper cap_rights_t r_rws; 386*8ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 387*8ac5aef8SEnji Cooper cap_rights_t r_rwspoll; 388*8ac5aef8SEnji Cooper cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 389*8ac5aef8SEnji Cooper cap_rights_t r_rwsnotify; 390*8ac5aef8SEnji Cooper cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); 391*8ac5aef8SEnji Cooper cap_rights_t r_rsl; 392*8ac5aef8SEnji Cooper cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP); 393*8ac5aef8SEnji Cooper cap_rights_t r_rslstat; 394*8ac5aef8SEnji Cooper cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT); 395*8ac5aef8SEnji Cooper cap_rights_t r_rsstat; 396*8ac5aef8SEnji Cooper cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT); 397*8ac5aef8SEnji Cooper 398*8ac5aef8SEnji Cooper int cap_fd_ro = dup(fa_fd); 399*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro); 400*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); 401*8ac5aef8SEnji Cooper int cap_fd_wo = dup(fa_fd); 402*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo); 403*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); 404*8ac5aef8SEnji Cooper int cap_fd_rw = dup(fa_fd); 405*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw); 406*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); 407*8ac5aef8SEnji Cooper int cap_fd_poll = dup(fa_fd); 408*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_poll); 409*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll)); 410*8ac5aef8SEnji Cooper int cap_fd_not = dup(fa_fd); 411*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_not); 412*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify)); 413*8ac5aef8SEnji Cooper 414*8ac5aef8SEnji Cooper int rc = mkdir(TmpFile("cap_notify"), 0755); 415*8ac5aef8SEnji Cooper EXPECT_TRUE(rc == 0 || errno == EEXIST); 416*8ac5aef8SEnji Cooper int dfd = open(TmpFile("cap_notify"), O_RDONLY); 417*8ac5aef8SEnji Cooper EXPECT_OK(dfd); 418*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644); 419*8ac5aef8SEnji Cooper close(fd); 420*8ac5aef8SEnji Cooper int cap_dfd = dup(dfd); 421*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd); 422*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat)); 423*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd); 424*8ac5aef8SEnji Cooper int cap_dfd_rs = dup(dfd); 425*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rs); 426*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs)); 427*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rs); 428*8ac5aef8SEnji Cooper int cap_dfd_rsstat = dup(dfd); 429*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsstat); 430*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat)); 431*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsstat); 432*8ac5aef8SEnji Cooper int cap_dfd_rsl = dup(dfd); 433*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsl); 434*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl)); 435*8ac5aef8SEnji Cooper EXPECT_OK(cap_dfd_rsl); 436*8ac5aef8SEnji Cooper 437*8ac5aef8SEnji Cooper // Need CAP_NOTIFY to change what's monitored. 438*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 439*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 440*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 441*8ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 442*8ac5aef8SEnji Cooper 443*8ac5aef8SEnji Cooper // Need CAP_FSTAT on the thing monitored. 444*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL)); 445*8ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL)); 446*8ac5aef8SEnji Cooper 447*8ac5aef8SEnji Cooper // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd. 448*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file")); 449*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file")); 450*8ac5aef8SEnji Cooper EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file")); 451*8ac5aef8SEnji Cooper 452*8ac5aef8SEnji Cooper pid_t child = fork(); 453*8ac5aef8SEnji Cooper if (child == 0) { 454*8ac5aef8SEnji Cooper // Child: Perform activity in the directory under notify. 455*8ac5aef8SEnji Cooper sleep(1); 456*8ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/temp")); 457*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644); 458*8ac5aef8SEnji Cooper close(fd); 459*8ac5aef8SEnji Cooper exit(0); 460*8ac5aef8SEnji Cooper } 461*8ac5aef8SEnji Cooper 462*8ac5aef8SEnji Cooper // Need CAP_EVENT to poll. 463*8ac5aef8SEnji Cooper struct pollfd poll_fd; 464*8ac5aef8SEnji Cooper poll_fd.revents = 0; 465*8ac5aef8SEnji Cooper poll_fd.events = POLLIN; 466*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_rw; 467*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400)); 468*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 469*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 470*8ac5aef8SEnji Cooper 471*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_not; 472*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400)); 473*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 474*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 475*8ac5aef8SEnji Cooper 476*8ac5aef8SEnji Cooper poll_fd.fd = cap_fd_poll; 477*8ac5aef8SEnji Cooper EXPECT_OK(poll(&poll_fd, 1, 1400)); 478*8ac5aef8SEnji Cooper EXPECT_NE(0, (poll_fd.revents & POLLIN)); 479*8ac5aef8SEnji Cooper EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 480*8ac5aef8SEnji Cooper 481*8ac5aef8SEnji Cooper // Need CAP_READ to read. 482*8ac5aef8SEnji Cooper struct fanotify_event_metadata ev; 483*8ac5aef8SEnji Cooper memset(&ev, 0, sizeof(ev)); 484*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev))); 485*8ac5aef8SEnji Cooper rc = read(fa_fd, &ev, sizeof(ev)); 486*8ac5aef8SEnji Cooper EXPECT_OK(rc); 487*8ac5aef8SEnji Cooper EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc); 488*8ac5aef8SEnji Cooper EXPECT_EQ(child, ev.pid); 489*8ac5aef8SEnji Cooper EXPECT_NE(0, ev.fd); 490*8ac5aef8SEnji Cooper 491*8ac5aef8SEnji Cooper // TODO(drysdale): reinstate if/when capsicum-linux propagates rights 492*8ac5aef8SEnji Cooper // to fanotify-generated FDs. 493*8ac5aef8SEnji Cooper #ifdef OMIT 494*8ac5aef8SEnji Cooper // fanotify(7) gives us a FD for the changed file. This should 495*8ac5aef8SEnji Cooper // only have rights that are a subset of those for the original 496*8ac5aef8SEnji Cooper // monitored directory file descriptor. 497*8ac5aef8SEnji Cooper cap_rights_t rights; 498*8ac5aef8SEnji Cooper CAP_SET_ALL(&rights); 499*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_get(ev.fd, &rights)); 500*8ac5aef8SEnji Cooper EXPECT_RIGHTS_IN(&rights, &r_rslstat); 501*8ac5aef8SEnji Cooper #endif 502*8ac5aef8SEnji Cooper 503*8ac5aef8SEnji Cooper // Wait for the child. 504*8ac5aef8SEnji Cooper int status; 505*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0)); 506*8ac5aef8SEnji Cooper rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 507*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 508*8ac5aef8SEnji Cooper 509*8ac5aef8SEnji Cooper close(cap_dfd_rsstat); 510*8ac5aef8SEnji Cooper close(cap_dfd_rsl); 511*8ac5aef8SEnji Cooper close(cap_dfd_rs); 512*8ac5aef8SEnji Cooper close(cap_dfd); 513*8ac5aef8SEnji Cooper close(dfd); 514*8ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/file")); 515*8ac5aef8SEnji Cooper unlink(TmpFile("cap_notify/temp")); 516*8ac5aef8SEnji Cooper rmdir(TmpFile("cap_notify")); 517*8ac5aef8SEnji Cooper close(cap_fd_not); 518*8ac5aef8SEnji Cooper close(cap_fd_poll); 519*8ac5aef8SEnji Cooper close(cap_fd_rw); 520*8ac5aef8SEnji Cooper close(cap_fd_wo); 521*8ac5aef8SEnji Cooper close(cap_fd_ro); 522*8ac5aef8SEnji Cooper close(fa_fd); 523*8ac5aef8SEnji Cooper } 524*8ac5aef8SEnji Cooper #endif 525*8ac5aef8SEnji Cooper 526*8ac5aef8SEnji Cooper TEST(Linux, inotify) { 527*8ac5aef8SEnji Cooper int i_fd = inotify_init(); 528*8ac5aef8SEnji Cooper EXPECT_OK(i_fd); 529*8ac5aef8SEnji Cooper 530*8ac5aef8SEnji Cooper cap_rights_t r_rs; 531*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 532*8ac5aef8SEnji Cooper cap_rights_t r_ws; 533*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 534*8ac5aef8SEnji Cooper cap_rights_t r_rws; 535*8ac5aef8SEnji Cooper cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 536*8ac5aef8SEnji Cooper cap_rights_t r_rwsnotify; 537*8ac5aef8SEnji Cooper cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); 538*8ac5aef8SEnji Cooper 539*8ac5aef8SEnji Cooper int cap_fd_ro = dup(i_fd); 540*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_ro); 541*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); 542*8ac5aef8SEnji Cooper int cap_fd_wo = dup(i_fd); 543*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_wo); 544*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); 545*8ac5aef8SEnji Cooper int cap_fd_rw = dup(i_fd); 546*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_rw); 547*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); 548*8ac5aef8SEnji Cooper int cap_fd_all = dup(i_fd); 549*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_all); 550*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify)); 551*8ac5aef8SEnji Cooper 552*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644); 553*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY)); 554*8ac5aef8SEnji Cooper int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY); 555*8ac5aef8SEnji Cooper EXPECT_OK(wd); 556*8ac5aef8SEnji Cooper 557*8ac5aef8SEnji Cooper unsigned char buffer[] = {1, 2, 3, 4}; 558*8ac5aef8SEnji Cooper EXPECT_OK(write(fd, buffer, sizeof(buffer))); 559*8ac5aef8SEnji Cooper 560*8ac5aef8SEnji Cooper struct inotify_event iev; 561*8ac5aef8SEnji Cooper memset(&iev, 0, sizeof(iev)); 562*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev))); 563*8ac5aef8SEnji Cooper int rc = read(cap_fd_ro, &iev, sizeof(iev)); 564*8ac5aef8SEnji Cooper EXPECT_OK(rc); 565*8ac5aef8SEnji Cooper EXPECT_EQ((int)sizeof(iev), rc); 566*8ac5aef8SEnji Cooper EXPECT_EQ(wd, iev.wd); 567*8ac5aef8SEnji Cooper 568*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd)); 569*8ac5aef8SEnji Cooper EXPECT_OK(inotify_rm_watch(cap_fd_all, wd)); 570*8ac5aef8SEnji Cooper 571*8ac5aef8SEnji Cooper close(fd); 572*8ac5aef8SEnji Cooper close(cap_fd_all); 573*8ac5aef8SEnji Cooper close(cap_fd_rw); 574*8ac5aef8SEnji Cooper close(cap_fd_wo); 575*8ac5aef8SEnji Cooper close(cap_fd_ro); 576*8ac5aef8SEnji Cooper close(i_fd); 577*8ac5aef8SEnji Cooper unlink(TmpFile("cap_inotify")); 578*8ac5aef8SEnji Cooper } 579*8ac5aef8SEnji Cooper 580*8ac5aef8SEnji Cooper TEST(Linux, ArchChange) { 581*8ac5aef8SEnji Cooper const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"}; 582*8ac5aef8SEnji Cooper const char* progs[] = {NULL, NULL, NULL}; 583*8ac5aef8SEnji Cooper char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL}; 584*8ac5aef8SEnji Cooper char* null_envp[] = {NULL}; 585*8ac5aef8SEnji Cooper int fds[3]; 586*8ac5aef8SEnji Cooper int count = 0; 587*8ac5aef8SEnji Cooper 588*8ac5aef8SEnji Cooper for (int ii = 0; ii < 3; ii++) { 589*8ac5aef8SEnji Cooper fds[count] = open(prog_candidates[ii], O_RDONLY); 590*8ac5aef8SEnji Cooper if (fds[count] >= 0) { 591*8ac5aef8SEnji Cooper progs[count] = prog_candidates[ii]; 592*8ac5aef8SEnji Cooper count++; 593*8ac5aef8SEnji Cooper } 594*8ac5aef8SEnji Cooper } 595*8ac5aef8SEnji Cooper if (count == 0) { 596*8ac5aef8SEnji Cooper TEST_SKIPPED("no different-architecture programs available"); 597*8ac5aef8SEnji Cooper return; 598*8ac5aef8SEnji Cooper } 599*8ac5aef8SEnji Cooper 600*8ac5aef8SEnji Cooper for (int ii = 0; ii < count; ii++) { 601*8ac5aef8SEnji Cooper // Fork-and-exec a binary of this architecture. 602*8ac5aef8SEnji Cooper pid_t child = fork(); 603*8ac5aef8SEnji Cooper if (child == 0) { 604*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode 605*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n", 606*8ac5aef8SEnji Cooper getpid_(), progs[ii], argv_pass[1]); 607*8ac5aef8SEnji Cooper argv_pass[0] = (char *)progs[ii]; 608*8ac5aef8SEnji Cooper int rc = fexecve_(fds[ii], argv_pass, null_envp); 609*8ac5aef8SEnji Cooper fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno); 610*8ac5aef8SEnji Cooper exit(99); // Should not reach here. 611*8ac5aef8SEnji Cooper } 612*8ac5aef8SEnji Cooper int status; 613*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0)); 614*8ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 615*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 616*8ac5aef8SEnji Cooper close(fds[ii]); 617*8ac5aef8SEnji Cooper } 618*8ac5aef8SEnji Cooper } 619*8ac5aef8SEnji Cooper 620*8ac5aef8SEnji Cooper FORK_TEST(Linux, Namespace) { 621*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 622*8ac5aef8SEnji Cooper pid_t me = getpid_(); 623*8ac5aef8SEnji Cooper 624*8ac5aef8SEnji Cooper // Create a new UTS namespace. 625*8ac5aef8SEnji Cooper EXPECT_OK(unshare(CLONE_NEWUTS)); 626*8ac5aef8SEnji Cooper // Open an FD to its symlink. 627*8ac5aef8SEnji Cooper char buffer[256]; 628*8ac5aef8SEnji Cooper sprintf(buffer, "/proc/%d/ns/uts", me); 629*8ac5aef8SEnji Cooper int ns_fd = open(buffer, O_RDONLY); 630*8ac5aef8SEnji Cooper 631*8ac5aef8SEnji Cooper cap_rights_t r_rwlstat; 632*8ac5aef8SEnji Cooper cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT); 633*8ac5aef8SEnji Cooper cap_rights_t r_rwlstatns; 634*8ac5aef8SEnji Cooper cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS); 635*8ac5aef8SEnji Cooper 636*8ac5aef8SEnji Cooper int cap_fd = dup(ns_fd); 637*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd); 638*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat)); 639*8ac5aef8SEnji Cooper int cap_fd_setns = dup(ns_fd); 640*8ac5aef8SEnji Cooper EXPECT_OK(cap_fd_setns); 641*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns)); 642*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS)); 643*8ac5aef8SEnji Cooper EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS)); 644*8ac5aef8SEnji Cooper 645*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode. 646*8ac5aef8SEnji Cooper 647*8ac5aef8SEnji Cooper // No setns(2) but unshare(2) is allowed. 648*8ac5aef8SEnji Cooper EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS)); 649*8ac5aef8SEnji Cooper EXPECT_OK(unshare(CLONE_NEWUTS)); 650*8ac5aef8SEnji Cooper } 651*8ac5aef8SEnji Cooper 652*8ac5aef8SEnji Cooper static void SendFD(int fd, int over) { 653*8ac5aef8SEnji Cooper struct msghdr mh; 654*8ac5aef8SEnji Cooper mh.msg_name = NULL; // No address needed 655*8ac5aef8SEnji Cooper mh.msg_namelen = 0; 656*8ac5aef8SEnji Cooper char buffer1[1024]; 657*8ac5aef8SEnji Cooper struct iovec iov[1]; 658*8ac5aef8SEnji Cooper iov[0].iov_base = buffer1; 659*8ac5aef8SEnji Cooper iov[0].iov_len = sizeof(buffer1); 660*8ac5aef8SEnji Cooper mh.msg_iov = iov; 661*8ac5aef8SEnji Cooper mh.msg_iovlen = 1; 662*8ac5aef8SEnji Cooper char buffer2[1024]; 663*8ac5aef8SEnji Cooper mh.msg_control = buffer2; 664*8ac5aef8SEnji Cooper mh.msg_controllen = CMSG_LEN(sizeof(int)); 665*8ac5aef8SEnji Cooper struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); 666*8ac5aef8SEnji Cooper cmptr->cmsg_level = SOL_SOCKET; 667*8ac5aef8SEnji Cooper cmptr->cmsg_type = SCM_RIGHTS; 668*8ac5aef8SEnji Cooper cmptr->cmsg_len = CMSG_LEN(sizeof(int)); 669*8ac5aef8SEnji Cooper *(int *)CMSG_DATA(cmptr) = fd; 670*8ac5aef8SEnji Cooper buffer1[0] = 0; 671*8ac5aef8SEnji Cooper iov[0].iov_len = 1; 672*8ac5aef8SEnji Cooper int rc = sendmsg(over, &mh, 0); 673*8ac5aef8SEnji Cooper EXPECT_OK(rc); 674*8ac5aef8SEnji Cooper } 675*8ac5aef8SEnji Cooper 676*8ac5aef8SEnji Cooper static int ReceiveFD(int over) { 677*8ac5aef8SEnji Cooper struct msghdr mh; 678*8ac5aef8SEnji Cooper mh.msg_name = NULL; // No address needed 679*8ac5aef8SEnji Cooper mh.msg_namelen = 0; 680*8ac5aef8SEnji Cooper char buffer1[1024]; 681*8ac5aef8SEnji Cooper struct iovec iov[1]; 682*8ac5aef8SEnji Cooper iov[0].iov_base = buffer1; 683*8ac5aef8SEnji Cooper iov[0].iov_len = sizeof(buffer1); 684*8ac5aef8SEnji Cooper mh.msg_iov = iov; 685*8ac5aef8SEnji Cooper mh.msg_iovlen = 1; 686*8ac5aef8SEnji Cooper char buffer2[1024]; 687*8ac5aef8SEnji Cooper mh.msg_control = buffer2; 688*8ac5aef8SEnji Cooper mh.msg_controllen = sizeof(buffer2); 689*8ac5aef8SEnji Cooper int rc = recvmsg(over, &mh, 0); 690*8ac5aef8SEnji Cooper EXPECT_OK(rc); 691*8ac5aef8SEnji Cooper EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen); 692*8ac5aef8SEnji Cooper struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); 693*8ac5aef8SEnji Cooper int fd = *(int*)CMSG_DATA(cmptr); 694*8ac5aef8SEnji Cooper EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len); 695*8ac5aef8SEnji Cooper cmptr = CMSG_NXTHDR(&mh, cmptr); 696*8ac5aef8SEnji Cooper EXPECT_TRUE(cmptr == NULL); 697*8ac5aef8SEnji Cooper return fd; 698*8ac5aef8SEnji Cooper } 699*8ac5aef8SEnji Cooper 700*8ac5aef8SEnji Cooper static int shared_pd = -1; 701*8ac5aef8SEnji Cooper static int shared_sock_fds[2]; 702*8ac5aef8SEnji Cooper 703*8ac5aef8SEnji Cooper static int ChildFunc(void *arg) { 704*8ac5aef8SEnji Cooper // This function is running in a new PID namespace, and so is pid 1. 705*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); 706*8ac5aef8SEnji Cooper EXPECT_EQ(1, getpid_()); 707*8ac5aef8SEnji Cooper EXPECT_EQ(0, getppid()); 708*8ac5aef8SEnji Cooper 709*8ac5aef8SEnji Cooper // The shared process descriptor is outside our namespace, so we cannot 710*8ac5aef8SEnji Cooper // get its pid. 711*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd); 712*8ac5aef8SEnji Cooper pid_t shared_child = -1; 713*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(shared_pd, &shared_child)); 714*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child); 715*8ac5aef8SEnji Cooper EXPECT_EQ(0, shared_child); 716*8ac5aef8SEnji Cooper 717*8ac5aef8SEnji Cooper // But we can pdkill() it even so. 718*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd); 719*8ac5aef8SEnji Cooper EXPECT_OK(pdkill(shared_pd, SIGINT)); 720*8ac5aef8SEnji Cooper 721*8ac5aef8SEnji Cooper int pd; 722*8ac5aef8SEnji Cooper pid_t child = pdfork(&pd, 0); 723*8ac5aef8SEnji Cooper EXPECT_OK(child); 724*8ac5aef8SEnji Cooper if (child == 0) { 725*8ac5aef8SEnji Cooper // Child: expect pid 2. 726*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); 727*8ac5aef8SEnji Cooper EXPECT_EQ(2, getpid_()); 728*8ac5aef8SEnji Cooper EXPECT_EQ(1, getppid()); 729*8ac5aef8SEnji Cooper while (true) { 730*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n"); 731*8ac5aef8SEnji Cooper sleep(1); 732*8ac5aef8SEnji Cooper } 733*8ac5aef8SEnji Cooper exit(0); 734*8ac5aef8SEnji Cooper } 735*8ac5aef8SEnji Cooper EXPECT_EQ(2, child); 736*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child); 737*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", 738*8ac5aef8SEnji Cooper pd, child, ProcessState(child)); 739*8ac5aef8SEnji Cooper 740*8ac5aef8SEnji Cooper pid_t pid; 741*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(pd, &pid)); 742*8ac5aef8SEnji Cooper EXPECT_EQ(child, pid); 743*8ac5aef8SEnji Cooper 744*8ac5aef8SEnji Cooper sleep(2); 745*8ac5aef8SEnji Cooper 746*8ac5aef8SEnji Cooper // Send the process descriptor over UNIX domain socket back to parent. 747*8ac5aef8SEnji Cooper SendFD(pd, shared_sock_fds[1]); 748*8ac5aef8SEnji Cooper 749*8ac5aef8SEnji Cooper // Wait for death of (grand)child, killed by our parent. 750*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child); 751*8ac5aef8SEnji Cooper int status; 752*8ac5aef8SEnji Cooper EXPECT_EQ(child, wait4(child, &status, __WALL, NULL)); 753*8ac5aef8SEnji Cooper 754*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " ChildFunc: return 0\n"); 755*8ac5aef8SEnji Cooper return 0; 756*8ac5aef8SEnji Cooper } 757*8ac5aef8SEnji Cooper 758*8ac5aef8SEnji Cooper #define STACK_SIZE (1024 * 1024) 759*8ac5aef8SEnji Cooper static char child_stack[STACK_SIZE]; 760*8ac5aef8SEnji Cooper 761*8ac5aef8SEnji Cooper // TODO(drysdale): fork into a user namespace first so REQUIRE_ROOT can be removed. 762*8ac5aef8SEnji Cooper TEST(Linux, PidNamespacePdFork) { 763*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 764*8ac5aef8SEnji Cooper // Pass process descriptors in both directions across a PID namespace boundary. 765*8ac5aef8SEnji Cooper // pdfork() off a child before we start, holding its process descriptor in a global 766*8ac5aef8SEnji Cooper // variable that's accessible to children. 767*8ac5aef8SEnji Cooper pid_t firstborn = pdfork(&shared_pd, 0); 768*8ac5aef8SEnji Cooper EXPECT_OK(firstborn); 769*8ac5aef8SEnji Cooper if (firstborn == 0) { 770*8ac5aef8SEnji Cooper while (true) { 771*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n"); 772*8ac5aef8SEnji Cooper sleep(1); 773*8ac5aef8SEnji Cooper } 774*8ac5aef8SEnji Cooper exit(0); 775*8ac5aef8SEnji Cooper } 776*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(firstborn); 777*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n", 778*8ac5aef8SEnji Cooper shared_pd, firstborn, ProcessState(firstborn)); 779*8ac5aef8SEnji Cooper sleep(2); 780*8ac5aef8SEnji Cooper 781*8ac5aef8SEnji Cooper // Prepare sockets to communicate with child process. 782*8ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 783*8ac5aef8SEnji Cooper 784*8ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace. 785*8ac5aef8SEnji Cooper pid_t child = clone(ChildFunc, child_stack + STACK_SIZE, 786*8ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 787*8ac5aef8SEnji Cooper EXPECT_OK(child); 788*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child); 789*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 790*8ac5aef8SEnji Cooper 791*8ac5aef8SEnji Cooper // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd. 792*8ac5aef8SEnji Cooper sleep(1); 793*8ac5aef8SEnji Cooper EXPECT_PID_DEAD(firstborn); 794*8ac5aef8SEnji Cooper 795*8ac5aef8SEnji Cooper // But we can still retrieve firstborn's PID, as it's not been reaped yet. 796*8ac5aef8SEnji Cooper pid_t child0; 797*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(shared_pd, &child0)); 798*8ac5aef8SEnji Cooper EXPECT_EQ(firstborn, child0); 799*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n", 800*8ac5aef8SEnji Cooper shared_pd, child0, ProcessState(child0)); 801*8ac5aef8SEnji Cooper 802*8ac5aef8SEnji Cooper // Now reap it. 803*8ac5aef8SEnji Cooper int status; 804*8ac5aef8SEnji Cooper EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL)); 805*8ac5aef8SEnji Cooper 806*8ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer. 807*8ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 808*8ac5aef8SEnji Cooper 809*8ac5aef8SEnji Cooper // Our notion of the pid associated with the grandchild is in the main PID namespace. 810*8ac5aef8SEnji Cooper pid_t grandchild; 811*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 812*8ac5aef8SEnji Cooper EXPECT_NE(2, grandchild); 813*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", 814*8ac5aef8SEnji Cooper grandchild_pd, grandchild, ProcessState(grandchild)); 815*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(grandchild); 816*8ac5aef8SEnji Cooper 817*8ac5aef8SEnji Cooper // Kill the grandchild via the process descriptor. 818*8ac5aef8SEnji Cooper EXPECT_OK(pdkill(grandchild_pd, SIGINT)); 819*8ac5aef8SEnji Cooper usleep(10000); 820*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", 821*8ac5aef8SEnji Cooper grandchild_pd, grandchild, ProcessState(grandchild)); 822*8ac5aef8SEnji Cooper EXPECT_PID_DEAD(grandchild); 823*8ac5aef8SEnji Cooper 824*8ac5aef8SEnji Cooper sleep(2); 825*8ac5aef8SEnji Cooper 826*8ac5aef8SEnji Cooper // Wait for the child. 827*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 828*8ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 829*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 830*8ac5aef8SEnji Cooper 831*8ac5aef8SEnji Cooper close(shared_sock_fds[0]); 832*8ac5aef8SEnji Cooper close(shared_sock_fds[1]); 833*8ac5aef8SEnji Cooper close(shared_pd); 834*8ac5aef8SEnji Cooper close(grandchild_pd); 835*8ac5aef8SEnji Cooper } 836*8ac5aef8SEnji Cooper 837*8ac5aef8SEnji Cooper int NSInit(void *data) { 838*8ac5aef8SEnji Cooper // This function is running in a new PID namespace, and so is pid 1. 839*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); 840*8ac5aef8SEnji Cooper EXPECT_EQ(1, getpid_()); 841*8ac5aef8SEnji Cooper EXPECT_EQ(0, getppid()); 842*8ac5aef8SEnji Cooper 843*8ac5aef8SEnji Cooper int pd; 844*8ac5aef8SEnji Cooper pid_t child = pdfork(&pd, 0); 845*8ac5aef8SEnji Cooper EXPECT_OK(child); 846*8ac5aef8SEnji Cooper if (child == 0) { 847*8ac5aef8SEnji Cooper // Child: loop forever until terminated. 848*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); 849*8ac5aef8SEnji Cooper while (true) { 850*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n"); 851*8ac5aef8SEnji Cooper usleep(100000); 852*8ac5aef8SEnji Cooper } 853*8ac5aef8SEnji Cooper exit(0); 854*8ac5aef8SEnji Cooper } 855*8ac5aef8SEnji Cooper EXPECT_EQ(2, child); 856*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child); 857*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", 858*8ac5aef8SEnji Cooper pd, child, ProcessState(child)); 859*8ac5aef8SEnji Cooper sleep(1); 860*8ac5aef8SEnji Cooper 861*8ac5aef8SEnji Cooper // Send the process descriptor over UNIX domain socket back to parent. 862*8ac5aef8SEnji Cooper SendFD(pd, shared_sock_fds[1]); 863*8ac5aef8SEnji Cooper close(pd); 864*8ac5aef8SEnji Cooper 865*8ac5aef8SEnji Cooper // Wait for a byte back in the other direction. 866*8ac5aef8SEnji Cooper int value; 867*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: block waiting for value\n"); 868*8ac5aef8SEnji Cooper read(shared_sock_fds[1], &value, sizeof(value)); 869*8ac5aef8SEnji Cooper 870*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, " NSInit: return 0\n"); 871*8ac5aef8SEnji Cooper return 0; 872*8ac5aef8SEnji Cooper } 873*8ac5aef8SEnji Cooper 874*8ac5aef8SEnji Cooper TEST(Linux, DeadNSInit) { 875*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 876*8ac5aef8SEnji Cooper 877*8ac5aef8SEnji Cooper // Prepare sockets to communicate with child process. 878*8ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 879*8ac5aef8SEnji Cooper 880*8ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace. 881*8ac5aef8SEnji Cooper pid_t child = clone(NSInit, child_stack + STACK_SIZE, 882*8ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 883*8ac5aef8SEnji Cooper usleep(10000); 884*8ac5aef8SEnji Cooper EXPECT_OK(child); 885*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child); 886*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 887*8ac5aef8SEnji Cooper 888*8ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer. 889*8ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 890*8ac5aef8SEnji Cooper pid_t grandchild; 891*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 892*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); 893*8ac5aef8SEnji Cooper 894*8ac5aef8SEnji Cooper // Send an int to the child to trigger its termination. Grandchild should also 895*8ac5aef8SEnji Cooper // go, as its init process is gone. 896*8ac5aef8SEnji Cooper int zero = 0; 897*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); 898*8ac5aef8SEnji Cooper write(shared_sock_fds[0], &zero, sizeof(zero)); 899*8ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(child); 900*8ac5aef8SEnji Cooper EXPECT_PID_GONE(grandchild); 901*8ac5aef8SEnji Cooper 902*8ac5aef8SEnji Cooper // Wait for the child. 903*8ac5aef8SEnji Cooper int status; 904*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 905*8ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 906*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 907*8ac5aef8SEnji Cooper EXPECT_PID_GONE(child); 908*8ac5aef8SEnji Cooper 909*8ac5aef8SEnji Cooper close(shared_sock_fds[0]); 910*8ac5aef8SEnji Cooper close(shared_sock_fds[1]); 911*8ac5aef8SEnji Cooper close(grandchild_pd); 912*8ac5aef8SEnji Cooper 913*8ac5aef8SEnji Cooper if (verbose) { 914*8ac5aef8SEnji Cooper fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); 915*8ac5aef8SEnji Cooper fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); 916*8ac5aef8SEnji Cooper } 917*8ac5aef8SEnji Cooper } 918*8ac5aef8SEnji Cooper 919*8ac5aef8SEnji Cooper TEST(Linux, DeadNSInit2) { 920*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 921*8ac5aef8SEnji Cooper 922*8ac5aef8SEnji Cooper // Prepare sockets to communicate with child process. 923*8ac5aef8SEnji Cooper EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 924*8ac5aef8SEnji Cooper 925*8ac5aef8SEnji Cooper // Clone into a child process with a new pid namespace. 926*8ac5aef8SEnji Cooper pid_t child = clone(NSInit, child_stack + STACK_SIZE, 927*8ac5aef8SEnji Cooper CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 928*8ac5aef8SEnji Cooper usleep(10000); 929*8ac5aef8SEnji Cooper EXPECT_OK(child); 930*8ac5aef8SEnji Cooper EXPECT_PID_ALIVE(child); 931*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 932*8ac5aef8SEnji Cooper 933*8ac5aef8SEnji Cooper // Get the process descriptor of the child-of-child via socket transfer. 934*8ac5aef8SEnji Cooper int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 935*8ac5aef8SEnji Cooper pid_t grandchild; 936*8ac5aef8SEnji Cooper EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 937*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); 938*8ac5aef8SEnji Cooper 939*8ac5aef8SEnji Cooper // Kill the grandchild 940*8ac5aef8SEnji Cooper EXPECT_OK(pdkill(grandchild_pd, SIGINT)); 941*8ac5aef8SEnji Cooper usleep(10000); 942*8ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(grandchild); 943*8ac5aef8SEnji Cooper // Close the process descriptor, so there are now no procdesc references to grandchild. 944*8ac5aef8SEnji Cooper close(grandchild_pd); 945*8ac5aef8SEnji Cooper 946*8ac5aef8SEnji Cooper // Send an int to the child to trigger its termination. Grandchild should also 947*8ac5aef8SEnji Cooper // go, as its init process is gone. 948*8ac5aef8SEnji Cooper int zero = 0; 949*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); 950*8ac5aef8SEnji Cooper write(shared_sock_fds[0], &zero, sizeof(zero)); 951*8ac5aef8SEnji Cooper EXPECT_PID_ZOMBIE(child); 952*8ac5aef8SEnji Cooper EXPECT_PID_GONE(grandchild); 953*8ac5aef8SEnji Cooper 954*8ac5aef8SEnji Cooper // Wait for the child. 955*8ac5aef8SEnji Cooper int status; 956*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 957*8ac5aef8SEnji Cooper int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 958*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 959*8ac5aef8SEnji Cooper 960*8ac5aef8SEnji Cooper close(shared_sock_fds[0]); 961*8ac5aef8SEnji Cooper close(shared_sock_fds[1]); 962*8ac5aef8SEnji Cooper 963*8ac5aef8SEnji Cooper if (verbose) { 964*8ac5aef8SEnji Cooper fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); 965*8ac5aef8SEnji Cooper fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); 966*8ac5aef8SEnji Cooper } 967*8ac5aef8SEnji Cooper } 968*8ac5aef8SEnji Cooper 969*8ac5aef8SEnji Cooper #ifdef __x86_64__ 970*8ac5aef8SEnji Cooper FORK_TEST(Linux, CheckHighWord) { 971*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode. 972*8ac5aef8SEnji Cooper 973*8ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 974*8ac5aef8SEnji Cooper EXPECT_OK(rc); 975*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1 976*8ac5aef8SEnji Cooper 977*8ac5aef8SEnji Cooper // Set some of the high 32-bits of argument zero. 978*8ac5aef8SEnji Cooper uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL; 979*8ac5aef8SEnji Cooper EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0)); 980*8ac5aef8SEnji Cooper } 981*8ac5aef8SEnji Cooper #endif 982*8ac5aef8SEnji Cooper 983*8ac5aef8SEnji Cooper FORK_TEST(Linux, PrctlOpenatBeneath) { 984*8ac5aef8SEnji Cooper // Set no_new_privs = 1 985*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 986*8ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 987*8ac5aef8SEnji Cooper EXPECT_OK(rc); 988*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1 989*8ac5aef8SEnji Cooper 990*8ac5aef8SEnji Cooper // Set openat-beneath mode 991*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0)); 992*8ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 993*8ac5aef8SEnji Cooper EXPECT_OK(rc); 994*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1 995*8ac5aef8SEnji Cooper 996*8ac5aef8SEnji Cooper // Clear openat-beneath mode 997*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); 998*8ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 999*8ac5aef8SEnji Cooper EXPECT_OK(rc); 1000*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); // openat_beneath = 0 1001*8ac5aef8SEnji Cooper 1002*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode 1003*8ac5aef8SEnji Cooper 1004*8ac5aef8SEnji Cooper // Expect to be in openat_beneath mode 1005*8ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 1006*8ac5aef8SEnji Cooper EXPECT_OK(rc); 1007*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1 1008*8ac5aef8SEnji Cooper 1009*8ac5aef8SEnji Cooper // Expect this to be immutable. 1010*8ac5aef8SEnji Cooper EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); 1011*8ac5aef8SEnji Cooper rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 1012*8ac5aef8SEnji Cooper EXPECT_OK(rc); 1013*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // openat_beneath = 1 1014*8ac5aef8SEnji Cooper 1015*8ac5aef8SEnji Cooper } 1016*8ac5aef8SEnji Cooper 1017*8ac5aef8SEnji Cooper FORK_TEST(Linux, NoNewPrivs) { 1018*8ac5aef8SEnji Cooper if (getuid() == 0) { 1019*8ac5aef8SEnji Cooper // If root, drop CAP_SYS_ADMIN POSIX.1e capability. 1020*8ac5aef8SEnji Cooper struct __user_cap_header_struct hdr; 1021*8ac5aef8SEnji Cooper hdr.version = _LINUX_CAPABILITY_VERSION_3; 1022*8ac5aef8SEnji Cooper hdr.pid = getpid_(); 1023*8ac5aef8SEnji Cooper struct __user_cap_data_struct data[3]; 1024*8ac5aef8SEnji Cooper EXPECT_OK(capget(&hdr, &data[0])); 1025*8ac5aef8SEnji Cooper data[0].effective &= ~(1 << CAP_SYS_ADMIN); 1026*8ac5aef8SEnji Cooper data[0].permitted &= ~(1 << CAP_SYS_ADMIN); 1027*8ac5aef8SEnji Cooper data[0].inheritable &= ~(1 << CAP_SYS_ADMIN); 1028*8ac5aef8SEnji Cooper EXPECT_OK(capset(&hdr, &data[0])); 1029*8ac5aef8SEnji Cooper } 1030*8ac5aef8SEnji Cooper int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 1031*8ac5aef8SEnji Cooper EXPECT_OK(rc); 1032*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); // no_new_privs == 0 1033*8ac5aef8SEnji Cooper 1034*8ac5aef8SEnji Cooper // Can't enter seccomp-bpf mode with no_new_privs == 0 1035*8ac5aef8SEnji Cooper struct sock_filter filter[] = { 1036*8ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 1037*8ac5aef8SEnji Cooper }; 1038*8ac5aef8SEnji Cooper struct sock_fprog bpf; 1039*8ac5aef8SEnji Cooper bpf.len = (sizeof(filter) / sizeof(filter[0])); 1040*8ac5aef8SEnji Cooper bpf.filter = filter; 1041*8ac5aef8SEnji Cooper rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0); 1042*8ac5aef8SEnji Cooper EXPECT_EQ(-1, rc); 1043*8ac5aef8SEnji Cooper EXPECT_EQ(EACCES, errno); 1044*8ac5aef8SEnji Cooper 1045*8ac5aef8SEnji Cooper // Set no_new_privs = 1 1046*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 1047*8ac5aef8SEnji Cooper rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 1048*8ac5aef8SEnji Cooper EXPECT_OK(rc); 1049*8ac5aef8SEnji Cooper EXPECT_EQ(1, rc); // no_new_privs = 1 1050*8ac5aef8SEnji Cooper 1051*8ac5aef8SEnji Cooper // Can now turn on seccomp mode 1052*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); 1053*8ac5aef8SEnji Cooper } 1054*8ac5aef8SEnji Cooper 1055*8ac5aef8SEnji Cooper /* Macros for BPF generation */ 1056*8ac5aef8SEnji Cooper #define BPF_RETURN_ERRNO(err) \ 1057*8ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF)) 1058*8ac5aef8SEnji Cooper #define BPF_KILL_PROCESS \ 1059*8ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) 1060*8ac5aef8SEnji Cooper #define BPF_ALLOW \ 1061*8ac5aef8SEnji Cooper BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 1062*8ac5aef8SEnji Cooper #define EXAMINE_SYSCALL \ 1063*8ac5aef8SEnji Cooper BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)) 1064*8ac5aef8SEnji Cooper #define ALLOW_SYSCALL(name) \ 1065*8ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1066*8ac5aef8SEnji Cooper BPF_ALLOW 1067*8ac5aef8SEnji Cooper #define KILL_SYSCALL(name) \ 1068*8ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1069*8ac5aef8SEnji Cooper BPF_KILL_PROCESS 1070*8ac5aef8SEnji Cooper #define FAIL_SYSCALL(name, err) \ 1071*8ac5aef8SEnji Cooper BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1072*8ac5aef8SEnji Cooper BPF_RETURN_ERRNO(err) 1073*8ac5aef8SEnji Cooper 1074*8ac5aef8SEnji Cooper TEST(Linux, CapModeWithBPF) { 1075*8ac5aef8SEnji Cooper pid_t child = fork(); 1076*8ac5aef8SEnji Cooper EXPECT_OK(child); 1077*8ac5aef8SEnji Cooper if (child == 0) { 1078*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644); 1079*8ac5aef8SEnji Cooper cap_rights_t rights; 1080*8ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); 1081*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd, &rights)); 1082*8ac5aef8SEnji Cooper 1083*8ac5aef8SEnji Cooper struct sock_filter filter[] = { EXAMINE_SYSCALL, 1084*8ac5aef8SEnji Cooper FAIL_SYSCALL(fchmod, ENOMEM), 1085*8ac5aef8SEnji Cooper FAIL_SYSCALL(fstat, ENOEXEC), 1086*8ac5aef8SEnji Cooper ALLOW_SYSCALL(close), 1087*8ac5aef8SEnji Cooper KILL_SYSCALL(fsync), 1088*8ac5aef8SEnji Cooper BPF_ALLOW }; 1089*8ac5aef8SEnji Cooper struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])), 1090*8ac5aef8SEnji Cooper .filter = filter}; 1091*8ac5aef8SEnji Cooper // Set up seccomp-bpf first. 1092*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 1093*8ac5aef8SEnji Cooper EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); 1094*8ac5aef8SEnji Cooper 1095*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode. 1096*8ac5aef8SEnji Cooper 1097*8ac5aef8SEnji Cooper // fchmod is allowed by Capsicum, but failed by BPF. 1098*8ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644)); 1099*8ac5aef8SEnji Cooper // open is allowed by BPF, but failed by Capsicum 1100*8ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY)); 1101*8ac5aef8SEnji Cooper // fstat is failed by both BPF and Capsicum; tie-break is on errno 1102*8ac5aef8SEnji Cooper struct stat buf; 1103*8ac5aef8SEnji Cooper EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf)); 1104*8ac5aef8SEnji Cooper // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence 1105*8ac5aef8SEnji Cooper fsync(fd); // terminate with unhandled SIGSYS 1106*8ac5aef8SEnji Cooper exit(0); 1107*8ac5aef8SEnji Cooper } 1108*8ac5aef8SEnji Cooper int status; 1109*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0)); 1110*8ac5aef8SEnji Cooper EXPECT_TRUE(WIFSIGNALED(status)); 1111*8ac5aef8SEnji Cooper EXPECT_EQ(SIGSYS, WTERMSIG(status)); 1112*8ac5aef8SEnji Cooper unlink(TmpFile("cap_bpf_capmode")); 1113*8ac5aef8SEnji Cooper } 1114*8ac5aef8SEnji Cooper 1115*8ac5aef8SEnji Cooper TEST(Linux, AIO) { 1116*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644); 1117*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1118*8ac5aef8SEnji Cooper 1119*8ac5aef8SEnji Cooper cap_rights_t r_rs; 1120*8ac5aef8SEnji Cooper cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 1121*8ac5aef8SEnji Cooper cap_rights_t r_ws; 1122*8ac5aef8SEnji Cooper cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 1123*8ac5aef8SEnji Cooper cap_rights_t r_rwssync; 1124*8ac5aef8SEnji Cooper cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); 1125*8ac5aef8SEnji Cooper 1126*8ac5aef8SEnji Cooper int cap_ro = dup(fd); 1127*8ac5aef8SEnji Cooper EXPECT_OK(cap_ro); 1128*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); 1129*8ac5aef8SEnji Cooper EXPECT_OK(cap_ro); 1130*8ac5aef8SEnji Cooper int cap_wo = dup(fd); 1131*8ac5aef8SEnji Cooper EXPECT_OK(cap_wo); 1132*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); 1133*8ac5aef8SEnji Cooper EXPECT_OK(cap_wo); 1134*8ac5aef8SEnji Cooper int cap_all = dup(fd); 1135*8ac5aef8SEnji Cooper EXPECT_OK(cap_all); 1136*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync)); 1137*8ac5aef8SEnji Cooper EXPECT_OK(cap_all); 1138*8ac5aef8SEnji Cooper 1139*8ac5aef8SEnji Cooper // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy 1140*8ac5aef8SEnji Cooper aio_context_t ctx = 0; 1141*8ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_setup, 10, &ctx)); 1142*8ac5aef8SEnji Cooper 1143*8ac5aef8SEnji Cooper unsigned char buffer[32] = {1, 2, 3, 4}; 1144*8ac5aef8SEnji Cooper struct iocb req; 1145*8ac5aef8SEnji Cooper memset(&req, 0, sizeof(req)); 1146*8ac5aef8SEnji Cooper req.aio_reqprio = 0; 1147*8ac5aef8SEnji Cooper req.aio_fildes = fd; 1148*8ac5aef8SEnji Cooper uintptr_t bufaddr = (uintptr_t)buffer; 1149*8ac5aef8SEnji Cooper req.aio_buf = (__u64)bufaddr; 1150*8ac5aef8SEnji Cooper req.aio_nbytes = 4; 1151*8ac5aef8SEnji Cooper req.aio_offset = 0; 1152*8ac5aef8SEnji Cooper struct iocb* reqs[1] = {&req}; 1153*8ac5aef8SEnji Cooper 1154*8ac5aef8SEnji Cooper // Write operation 1155*8ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_PWRITE; 1156*8ac5aef8SEnji Cooper req.aio_fildes = cap_ro; 1157*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1158*8ac5aef8SEnji Cooper req.aio_fildes = cap_wo; 1159*8ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); 1160*8ac5aef8SEnji Cooper 1161*8ac5aef8SEnji Cooper // Sync operation 1162*8ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FSYNC; 1163*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1164*8ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FDSYNC; 1165*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1166*8ac5aef8SEnji Cooper // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented 1167*8ac5aef8SEnji Cooper req.aio_fildes = cap_all; 1168*8ac5aef8SEnji Cooper EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1169*8ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_FSYNC; 1170*8ac5aef8SEnji Cooper EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1171*8ac5aef8SEnji Cooper 1172*8ac5aef8SEnji Cooper // Read operation 1173*8ac5aef8SEnji Cooper req.aio_lio_opcode = IOCB_CMD_PREAD; 1174*8ac5aef8SEnji Cooper req.aio_fildes = cap_wo; 1175*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1176*8ac5aef8SEnji Cooper req.aio_fildes = cap_ro; 1177*8ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); 1178*8ac5aef8SEnji Cooper 1179*8ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_io_destroy, ctx)); 1180*8ac5aef8SEnji Cooper 1181*8ac5aef8SEnji Cooper close(cap_all); 1182*8ac5aef8SEnji Cooper close(cap_wo); 1183*8ac5aef8SEnji Cooper close(cap_ro); 1184*8ac5aef8SEnji Cooper close(fd); 1185*8ac5aef8SEnji Cooper unlink(TmpFile("cap_aio")); 1186*8ac5aef8SEnji Cooper } 1187*8ac5aef8SEnji Cooper 1188*8ac5aef8SEnji Cooper #ifndef KCMP_FILE 1189*8ac5aef8SEnji Cooper #define KCMP_FILE 0 1190*8ac5aef8SEnji Cooper #endif 1191*8ac5aef8SEnji Cooper TEST(Linux, Kcmp) { 1192*8ac5aef8SEnji Cooper // This requires CONFIG_CHECKPOINT_RESTORE in kernel config. 1193*8ac5aef8SEnji Cooper int fd = open("/etc/passwd", O_RDONLY); 1194*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1195*8ac5aef8SEnji Cooper pid_t parent = getpid_(); 1196*8ac5aef8SEnji Cooper 1197*8ac5aef8SEnji Cooper errno = 0; 1198*8ac5aef8SEnji Cooper int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd); 1199*8ac5aef8SEnji Cooper if (rc == -1 && errno == ENOSYS) { 1200*8ac5aef8SEnji Cooper TEST_SKIPPED("kcmp(2) gives -ENOSYS"); 1201*8ac5aef8SEnji Cooper return; 1202*8ac5aef8SEnji Cooper } 1203*8ac5aef8SEnji Cooper 1204*8ac5aef8SEnji Cooper pid_t child = fork(); 1205*8ac5aef8SEnji Cooper if (child == 0) { 1206*8ac5aef8SEnji Cooper // Child: limit rights on FD. 1207*8ac5aef8SEnji Cooper child = getpid_(); 1208*8ac5aef8SEnji Cooper EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); 1209*8ac5aef8SEnji Cooper cap_rights_t rights; 1210*8ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE); 1211*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd, &rights)); 1212*8ac5aef8SEnji Cooper // A capability wrapping a normal FD is different (from a kcmp(2) perspective) 1213*8ac5aef8SEnji Cooper // than the original file. 1214*8ac5aef8SEnji Cooper EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); 1215*8ac5aef8SEnji Cooper exit(HasFailure()); 1216*8ac5aef8SEnji Cooper } 1217*8ac5aef8SEnji Cooper // Wait for the child. 1218*8ac5aef8SEnji Cooper int status; 1219*8ac5aef8SEnji Cooper EXPECT_EQ(child, waitpid(child, &status, 0)); 1220*8ac5aef8SEnji Cooper rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 1221*8ac5aef8SEnji Cooper EXPECT_EQ(0, rc); 1222*8ac5aef8SEnji Cooper 1223*8ac5aef8SEnji Cooper close(fd); 1224*8ac5aef8SEnji Cooper } 1225*8ac5aef8SEnji Cooper 1226*8ac5aef8SEnji Cooper TEST(Linux, ProcFS) { 1227*8ac5aef8SEnji Cooper cap_rights_t rights; 1228*8ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_SEEK); 1229*8ac5aef8SEnji Cooper int fd = open("/etc/passwd", O_RDONLY); 1230*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1231*8ac5aef8SEnji Cooper lseek(fd, 4, SEEK_SET); 1232*8ac5aef8SEnji Cooper int cap = dup(fd); 1233*8ac5aef8SEnji Cooper EXPECT_OK(cap); 1234*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(cap, &rights)); 1235*8ac5aef8SEnji Cooper pid_t me = getpid_(); 1236*8ac5aef8SEnji Cooper 1237*8ac5aef8SEnji Cooper char buffer[1024]; 1238*8ac5aef8SEnji Cooper sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap); 1239*8ac5aef8SEnji Cooper int procfd = open(buffer, O_RDONLY); 1240*8ac5aef8SEnji Cooper EXPECT_OK(procfd) << " failed to open " << buffer; 1241*8ac5aef8SEnji Cooper if (procfd < 0) return; 1242*8ac5aef8SEnji Cooper int proccap = dup(procfd); 1243*8ac5aef8SEnji Cooper EXPECT_OK(proccap); 1244*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(proccap, &rights)); 1245*8ac5aef8SEnji Cooper 1246*8ac5aef8SEnji Cooper EXPECT_OK(read(proccap, buffer, sizeof(buffer))); 1247*8ac5aef8SEnji Cooper // The fdinfo should include the file pos of the underlying file 1248*8ac5aef8SEnji Cooper EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4")); 1249*8ac5aef8SEnji Cooper // ...and the rights of the Capsicum capability. 1250*8ac5aef8SEnji Cooper EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x")); 1251*8ac5aef8SEnji Cooper 1252*8ac5aef8SEnji Cooper close(procfd); 1253*8ac5aef8SEnji Cooper close(proccap); 1254*8ac5aef8SEnji Cooper close(cap); 1255*8ac5aef8SEnji Cooper close(fd); 1256*8ac5aef8SEnji Cooper } 1257*8ac5aef8SEnji Cooper 1258*8ac5aef8SEnji Cooper FORK_TEST(Linux, ProcessClocks) { 1259*8ac5aef8SEnji Cooper pid_t self = getpid_(); 1260*8ac5aef8SEnji Cooper pid_t child = fork(); 1261*8ac5aef8SEnji Cooper EXPECT_OK(child); 1262*8ac5aef8SEnji Cooper if (child == 0) { 1263*8ac5aef8SEnji Cooper child = getpid_(); 1264*8ac5aef8SEnji Cooper usleep(100000); 1265*8ac5aef8SEnji Cooper exit(0); 1266*8ac5aef8SEnji Cooper } 1267*8ac5aef8SEnji Cooper 1268*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); // Enter capability mode. 1269*8ac5aef8SEnji Cooper 1270*8ac5aef8SEnji Cooper // Nefariously build a clock ID for the child's CPU time. 1271*8ac5aef8SEnji Cooper // This relies on knowledge of the internal layout of clock IDs. 1272*8ac5aef8SEnji Cooper clockid_t child_clock; 1273*8ac5aef8SEnji Cooper child_clock = ((~child) << 3) | 0x0; 1274*8ac5aef8SEnji Cooper struct timespec ts; 1275*8ac5aef8SEnji Cooper memset(&ts, 0, sizeof(ts)); 1276*8ac5aef8SEnji Cooper 1277*8ac5aef8SEnji Cooper // TODO(drysdale): Should not be possible to retrieve info about a 1278*8ac5aef8SEnji Cooper // different process, as the PID global namespace should be locked 1279*8ac5aef8SEnji Cooper // down. 1280*8ac5aef8SEnji Cooper EXPECT_OK(clock_gettime(child_clock, &ts)); 1281*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n", 1282*8ac5aef8SEnji Cooper self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); 1283*8ac5aef8SEnji Cooper 1284*8ac5aef8SEnji Cooper child_clock = ((~1) << 3) | 0x0; 1285*8ac5aef8SEnji Cooper memset(&ts, 0, sizeof(ts)); 1286*8ac5aef8SEnji Cooper EXPECT_OK(clock_gettime(child_clock, &ts)); 1287*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n", 1288*8ac5aef8SEnji Cooper self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); 1289*8ac5aef8SEnji Cooper 1290*8ac5aef8SEnji Cooper // Orphan the child. 1291*8ac5aef8SEnji Cooper } 1292*8ac5aef8SEnji Cooper 1293*8ac5aef8SEnji Cooper TEST(Linux, SetLease) { 1294*8ac5aef8SEnji Cooper int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644); 1295*8ac5aef8SEnji Cooper EXPECT_OK(fd_all); 1296*8ac5aef8SEnji Cooper int fd_rw = dup(fd_all); 1297*8ac5aef8SEnji Cooper EXPECT_OK(fd_rw); 1298*8ac5aef8SEnji Cooper 1299*8ac5aef8SEnji Cooper cap_rights_t r_all; 1300*8ac5aef8SEnji Cooper cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL); 1301*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd_all, &r_all)); 1302*8ac5aef8SEnji Cooper 1303*8ac5aef8SEnji Cooper cap_rights_t r_rw; 1304*8ac5aef8SEnji Cooper cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); 1305*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(fd_rw, &r_rw)); 1306*8ac5aef8SEnji Cooper 1307*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK)); 1308*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE)); 1309*8ac5aef8SEnji Cooper 1310*8ac5aef8SEnji Cooper if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases 1311*8ac5aef8SEnji Cooper EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK)); 1312*8ac5aef8SEnji Cooper EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE)); 1313*8ac5aef8SEnji Cooper 1314*8ac5aef8SEnji Cooper EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0)); 1315*8ac5aef8SEnji Cooper EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE)); 1316*8ac5aef8SEnji Cooper } 1317*8ac5aef8SEnji Cooper close(fd_all); 1318*8ac5aef8SEnji Cooper close(fd_rw); 1319*8ac5aef8SEnji Cooper unlink(TmpFile("cap_lease")); 1320*8ac5aef8SEnji Cooper } 1321*8ac5aef8SEnji Cooper 1322*8ac5aef8SEnji Cooper TEST(Linux, InvalidRightsSyscall) { 1323*8ac5aef8SEnji Cooper int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644); 1324*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1325*8ac5aef8SEnji Cooper 1326*8ac5aef8SEnji Cooper cap_rights_t rights; 1327*8ac5aef8SEnji Cooper cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT); 1328*8ac5aef8SEnji Cooper 1329*8ac5aef8SEnji Cooper // Use the raw syscall throughout. 1330*8ac5aef8SEnji Cooper EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1331*8ac5aef8SEnji Cooper 1332*8ac5aef8SEnji Cooper // Directly access the syscall, and find all unseemly manner of use for it. 1333*8ac5aef8SEnji Cooper // - Invalid flags 1334*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1)); 1335*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1336*8ac5aef8SEnji Cooper // - Specify an fcntl subright, but no CAP_FCNTL set 1337*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0)); 1338*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1339*8ac5aef8SEnji Cooper // - Specify an ioctl subright, but no CAP_IOCTL set 1340*8ac5aef8SEnji Cooper unsigned int ioctl1 = 1; 1341*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0)); 1342*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1343*8ac5aef8SEnji Cooper // - N ioctls, but null pointer passed 1344*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0)); 1345*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1346*8ac5aef8SEnji Cooper // - Invalid nioctls 1347*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0)); 1348*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1349*8ac5aef8SEnji Cooper // - Null primary rights 1350*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0)); 1351*8ac5aef8SEnji Cooper EXPECT_EQ(EFAULT, errno); 1352*8ac5aef8SEnji Cooper // - Invalid index bitmask 1353*8ac5aef8SEnji Cooper rights.cr_rights[0] |= 3ULL << 57; 1354*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1355*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1356*8ac5aef8SEnji Cooper // - Invalid version 1357*8ac5aef8SEnji Cooper rights.cr_rights[0] |= 2ULL << 62; 1358*8ac5aef8SEnji Cooper EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1359*8ac5aef8SEnji Cooper EXPECT_EQ(EINVAL, errno); 1360*8ac5aef8SEnji Cooper 1361*8ac5aef8SEnji Cooper close(fd); 1362*8ac5aef8SEnji Cooper unlink(TmpFile("cap_invalid_rights")); 1363*8ac5aef8SEnji Cooper } 1364*8ac5aef8SEnji Cooper 1365*8ac5aef8SEnji Cooper FORK_TEST_ON(Linux, OpenByHandleAt, TmpFile("cap_openbyhandle_testfile")) { 1366*8ac5aef8SEnji Cooper REQUIRE_ROOT(); 1367*8ac5aef8SEnji Cooper int dir = open(tmpdir.c_str(), O_RDONLY); 1368*8ac5aef8SEnji Cooper EXPECT_OK(dir); 1369*8ac5aef8SEnji Cooper int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644); 1370*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1371*8ac5aef8SEnji Cooper const char* message = "Saved text"; 1372*8ac5aef8SEnji Cooper EXPECT_OK(write(fd, message, strlen(message))); 1373*8ac5aef8SEnji Cooper close(fd); 1374*8ac5aef8SEnji Cooper 1375*8ac5aef8SEnji Cooper struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); 1376*8ac5aef8SEnji Cooper fhandle->handle_bytes = MAX_HANDLE_SZ; 1377*8ac5aef8SEnji Cooper int mount_id; 1378*8ac5aef8SEnji Cooper EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0)); 1379*8ac5aef8SEnji Cooper 1380*8ac5aef8SEnji Cooper fd = open_by_handle_at(dir, fhandle, O_RDONLY); 1381*8ac5aef8SEnji Cooper EXPECT_OK(fd); 1382*8ac5aef8SEnji Cooper char buffer[200]; 1383*8ac5aef8SEnji Cooper EXPECT_OK(read(fd, buffer, 199)); 1384*8ac5aef8SEnji Cooper EXPECT_EQ(std::string(message), std::string(buffer)); 1385*8ac5aef8SEnji Cooper close(fd); 1386*8ac5aef8SEnji Cooper 1387*8ac5aef8SEnji Cooper // Cannot issue open_by_handle_at after entering capability mode. 1388*8ac5aef8SEnji Cooper cap_enter(); 1389*8ac5aef8SEnji Cooper EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY)); 1390*8ac5aef8SEnji Cooper 1391*8ac5aef8SEnji Cooper close(dir); 1392*8ac5aef8SEnji Cooper } 1393*8ac5aef8SEnji Cooper 1394*8ac5aef8SEnji Cooper int getrandom_(void *buf, size_t buflen, unsigned int flags) { 1395*8ac5aef8SEnji Cooper #ifdef __NR_getrandom 1396*8ac5aef8SEnji Cooper return syscall(__NR_getrandom, buf, buflen, flags); 1397*8ac5aef8SEnji Cooper #else 1398*8ac5aef8SEnji Cooper errno = ENOSYS; 1399*8ac5aef8SEnji Cooper return -1; 1400*8ac5aef8SEnji Cooper #endif 1401*8ac5aef8SEnji Cooper } 1402*8ac5aef8SEnji Cooper 1403*8ac5aef8SEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) 1404*8ac5aef8SEnji Cooper #include <linux/random.h> // Requires 3.17 kernel 1405*8ac5aef8SEnji Cooper FORK_TEST(Linux, GetRandom) { 1406*8ac5aef8SEnji Cooper EXPECT_OK(cap_enter()); 1407*8ac5aef8SEnji Cooper unsigned char buffer[1024]; 1408*8ac5aef8SEnji Cooper unsigned char buffer2[1024]; 1409*8ac5aef8SEnji Cooper EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK)); 1410*8ac5aef8SEnji Cooper EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK)); 1411*8ac5aef8SEnji Cooper EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer))); 1412*8ac5aef8SEnji Cooper } 1413*8ac5aef8SEnji Cooper #endif 1414*8ac5aef8SEnji Cooper 1415*8ac5aef8SEnji Cooper int memfd_create_(const char *name, unsigned int flags) { 1416*8ac5aef8SEnji Cooper #ifdef __NR_memfd_create 1417*8ac5aef8SEnji Cooper return syscall(__NR_memfd_create, name, flags); 1418*8ac5aef8SEnji Cooper #else 1419*8ac5aef8SEnji Cooper errno = ENOSYS; 1420*8ac5aef8SEnji Cooper return -1; 1421*8ac5aef8SEnji Cooper #endif 1422*8ac5aef8SEnji Cooper } 1423*8ac5aef8SEnji Cooper 1424*8ac5aef8SEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) 1425*8ac5aef8SEnji Cooper #include <linux/memfd.h> // Requires 3.17 kernel 1426*8ac5aef8SEnji Cooper TEST(Linux, MemFDDeathTest) { 1427*8ac5aef8SEnji Cooper int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING); 1428*8ac5aef8SEnji Cooper if (memfd == -1 && errno == ENOSYS) { 1429*8ac5aef8SEnji Cooper TEST_SKIPPED("memfd_create(2) gives -ENOSYS"); 1430*8ac5aef8SEnji Cooper return; 1431*8ac5aef8SEnji Cooper } 1432*8ac5aef8SEnji Cooper const int LEN = 16; 1433*8ac5aef8SEnji Cooper EXPECT_OK(ftruncate(memfd, LEN)); 1434*8ac5aef8SEnji Cooper int memfd_ro = dup(memfd); 1435*8ac5aef8SEnji Cooper int memfd_rw = dup(memfd); 1436*8ac5aef8SEnji Cooper EXPECT_OK(memfd_ro); 1437*8ac5aef8SEnji Cooper EXPECT_OK(memfd_rw); 1438*8ac5aef8SEnji Cooper cap_rights_t rights; 1439*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT))); 1440*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD))); 1441*8ac5aef8SEnji Cooper 1442*8ac5aef8SEnji Cooper unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0); 1443*8ac5aef8SEnji Cooper EXPECT_NE((unsigned char *)MAP_FAILED, p_ro); 1444*8ac5aef8SEnji Cooper unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0); 1445*8ac5aef8SEnji Cooper EXPECT_NE((unsigned char *)MAP_FAILED, p_rw); 1446*8ac5aef8SEnji Cooper EXPECT_EQ(MAP_FAILED, 1447*8ac5aef8SEnji Cooper mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0)); 1448*8ac5aef8SEnji Cooper 1449*8ac5aef8SEnji Cooper *p_rw = 42; 1450*8ac5aef8SEnji Cooper EXPECT_EQ(42, *p_ro); 1451*8ac5aef8SEnji Cooper EXPECT_DEATH(*p_ro = 42, ""); 1452*8ac5aef8SEnji Cooper 1453*8ac5aef8SEnji Cooper #ifndef F_ADD_SEALS 1454*8ac5aef8SEnji Cooper // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17 1455*8ac5aef8SEnji Cooper #define _F_LINUX_SPECIFIC_BASE F_SETLEASE 1456*8ac5aef8SEnji Cooper #define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9) 1457*8ac5aef8SEnji Cooper #define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10) 1458*8ac5aef8SEnji Cooper #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ 1459*8ac5aef8SEnji Cooper #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ 1460*8ac5aef8SEnji Cooper #define F_SEAL_GROW 0x0004 /* prevent file from growing */ 1461*8ac5aef8SEnji Cooper #define F_SEAL_WRITE 0x0008 /* prevent writes */ 1462*8ac5aef8SEnji Cooper #endif 1463*8ac5aef8SEnji Cooper 1464*8ac5aef8SEnji Cooper // Reading the seal information requires CAP_FSTAT. 1465*8ac5aef8SEnji Cooper int seals = fcntl(memfd, F_GET_SEALS); 1466*8ac5aef8SEnji Cooper EXPECT_OK(seals); 1467*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); 1468*8ac5aef8SEnji Cooper int seals_ro = fcntl(memfd_ro, F_GET_SEALS); 1469*8ac5aef8SEnji Cooper EXPECT_EQ(seals, seals_ro); 1470*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); 1471*8ac5aef8SEnji Cooper int seals_rw = fcntl(memfd_rw, F_GET_SEALS); 1472*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(seals_rw); 1473*8ac5aef8SEnji Cooper 1474*8ac5aef8SEnji Cooper // Fail to seal as a writable mapping exists. 1475*8ac5aef8SEnji Cooper EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1476*8ac5aef8SEnji Cooper EXPECT_EQ(EBUSY, errno); 1477*8ac5aef8SEnji Cooper *p_rw = 42; 1478*8ac5aef8SEnji Cooper 1479*8ac5aef8SEnji Cooper // Seal the rw version; need to unmap first. 1480*8ac5aef8SEnji Cooper munmap(p_rw, LEN); 1481*8ac5aef8SEnji Cooper munmap(p_ro, LEN); 1482*8ac5aef8SEnji Cooper EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1483*8ac5aef8SEnji Cooper 1484*8ac5aef8SEnji Cooper seals = fcntl(memfd, F_GET_SEALS); 1485*8ac5aef8SEnji Cooper EXPECT_OK(seals); 1486*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); 1487*8ac5aef8SEnji Cooper seals_ro = fcntl(memfd_ro, F_GET_SEALS); 1488*8ac5aef8SEnji Cooper EXPECT_EQ(seals, seals_ro); 1489*8ac5aef8SEnji Cooper if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); 1490*8ac5aef8SEnji Cooper 1491*8ac5aef8SEnji Cooper // Remove the CAP_FCHMOD right, can no longer add seals. 1492*8ac5aef8SEnji Cooper EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW))); 1493*8ac5aef8SEnji Cooper EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1494*8ac5aef8SEnji Cooper 1495*8ac5aef8SEnji Cooper close(memfd); 1496*8ac5aef8SEnji Cooper close(memfd_ro); 1497*8ac5aef8SEnji Cooper close(memfd_rw); 1498*8ac5aef8SEnji Cooper } 1499*8ac5aef8SEnji Cooper #endif 1500*8ac5aef8SEnji Cooper 1501*8ac5aef8SEnji Cooper #else 1502*8ac5aef8SEnji Cooper void noop() {} 1503*8ac5aef8SEnji Cooper #endif 1504