1 // Tests of Linux-specific functionality 2 #ifdef __linux__ 3 4 #include <sys/types.h> 5 #include <sys/stat.h> 6 #include <sys/socket.h> 7 #include <sys/timerfd.h> 8 #include <sys/signalfd.h> 9 #include <sys/eventfd.h> 10 #include <sys/epoll.h> 11 #include <sys/inotify.h> 12 #include <sys/fanotify.h> 13 #include <sys/mman.h> 14 #include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers 15 #include <linux/aio_abi.h> 16 #include <linux/filter.h> 17 #include <linux/seccomp.h> 18 #include <linux/version.h> 19 #include <poll.h> 20 #include <sched.h> 21 #include <signal.h> 22 #include <fcntl.h> 23 #include <unistd.h> 24 25 #include <string> 26 27 #include "capsicum.h" 28 #include "syscalls.h" 29 #include "capsicum-test.h" 30 31 TEST(Linux, TimerFD) { 32 int fd = timerfd_create(CLOCK_MONOTONIC, 0); 33 34 cap_rights_t r_ro; 35 cap_rights_init(&r_ro, CAP_READ); 36 cap_rights_t r_wo; 37 cap_rights_init(&r_wo, CAP_WRITE); 38 cap_rights_t r_rw; 39 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); 40 cap_rights_t r_rwpoll; 41 cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT); 42 43 int cap_fd_ro = dup(fd); 44 EXPECT_OK(cap_fd_ro); 45 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro)); 46 int cap_fd_wo = dup(fd); 47 EXPECT_OK(cap_fd_wo); 48 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo)); 49 int cap_fd_rw = dup(fd); 50 EXPECT_OK(cap_fd_rw); 51 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw)); 52 int cap_fd_all = dup(fd); 53 EXPECT_OK(cap_fd_all); 54 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll)); 55 56 struct itimerspec old_ispec; 57 struct itimerspec ispec; 58 ispec.it_interval.tv_sec = 0; 59 ispec.it_interval.tv_nsec = 0; 60 ispec.it_value.tv_sec = 0; 61 ispec.it_value.tv_nsec = 100000000; // 100ms 62 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL)); 63 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec)); 64 EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL)); 65 EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL)); 66 EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL)); 67 68 EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec)); 69 EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec)); 70 EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec)); 71 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); 72 73 // To be able to poll() for the timer pop, still need CAP_EVENT. 74 struct pollfd poll_fd; 75 for (int ii = 0; ii < 3; ii++) { 76 poll_fd.revents = 0; 77 poll_fd.events = POLLIN; 78 switch (ii) { 79 case 0: poll_fd.fd = cap_fd_ro; break; 80 case 1: poll_fd.fd = cap_fd_wo; break; 81 case 2: poll_fd.fd = cap_fd_rw; break; 82 } 83 // Poll immediately returns with POLLNVAL 84 EXPECT_OK(poll(&poll_fd, 1, 400)); 85 EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 86 EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 87 } 88 89 poll_fd.fd = cap_fd_all; 90 EXPECT_OK(poll(&poll_fd, 1, 400)); 91 EXPECT_NE(0, (poll_fd.revents & POLLIN)); 92 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 93 94 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); 95 EXPECT_EQ(0, old_ispec.it_value.tv_sec); 96 EXPECT_EQ(0, old_ispec.it_value.tv_nsec); 97 EXPECT_EQ(0, old_ispec.it_interval.tv_sec); 98 EXPECT_EQ(0, old_ispec.it_interval.tv_nsec); 99 100 close(cap_fd_all); 101 close(cap_fd_rw); 102 close(cap_fd_wo); 103 close(cap_fd_ro); 104 close(fd); 105 } 106 107 FORK_TEST(Linux, SignalFDIfSingleThreaded) { 108 if (force_mt) { 109 GTEST_SKIP() << "multi-threaded run clashes with signals"; 110 } 111 pid_t me = getpid(); 112 sigset_t mask; 113 sigemptyset(&mask); 114 sigaddset(&mask, SIGUSR1); 115 116 // Block signals before registering against a new signal FD. 117 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); 118 int fd = signalfd(-1, &mask, 0); 119 EXPECT_OK(fd); 120 121 cap_rights_t r_rs; 122 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 123 cap_rights_t r_ws; 124 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 125 cap_rights_t r_sig; 126 cap_rights_init(&r_sig, CAP_FSIGNAL); 127 cap_rights_t r_rssig; 128 cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK); 129 cap_rights_t r_rssig_poll; 130 cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT); 131 132 // Various capability variants. 133 int cap_fd_none = dup(fd); 134 EXPECT_OK(cap_fd_none); 135 EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws)); 136 int cap_fd_read = dup(fd); 137 EXPECT_OK(cap_fd_read); 138 EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs)); 139 int cap_fd_sig = dup(fd); 140 EXPECT_OK(cap_fd_sig); 141 EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig)); 142 int cap_fd_sig_read = dup(fd); 143 EXPECT_OK(cap_fd_sig_read); 144 EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig)); 145 int cap_fd_all = dup(fd); 146 EXPECT_OK(cap_fd_all); 147 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll)); 148 149 struct signalfd_siginfo fdsi; 150 151 // Need CAP_READ to read the signal information 152 kill(me, SIGUSR1); 153 EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo))); 154 EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo))); 155 int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo)); 156 EXPECT_OK(len); 157 EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len); 158 EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo); 159 160 // Need CAP_FSIGNAL to modify the signal mask. 161 sigemptyset(&mask); 162 sigaddset(&mask, SIGUSR1); 163 sigaddset(&mask, SIGUSR2); 164 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); 165 EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0)); 166 EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0)); 167 EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0)); 168 169 // Need CAP_EVENT to get notification of a signal in poll(2). 170 kill(me, SIGUSR2); 171 172 struct pollfd poll_fd; 173 poll_fd.revents = 0; 174 poll_fd.events = POLLIN; 175 poll_fd.fd = cap_fd_sig_read; 176 EXPECT_OK(poll(&poll_fd, 1, 400)); 177 EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 178 EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 179 180 poll_fd.fd = cap_fd_all; 181 EXPECT_OK(poll(&poll_fd, 1, 400)); 182 EXPECT_NE(0, (poll_fd.revents & POLLIN)); 183 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 184 } 185 186 TEST(Linux, EventFD) { 187 int fd = eventfd(0, 0); 188 EXPECT_OK(fd); 189 190 cap_rights_t r_rs; 191 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 192 cap_rights_t r_ws; 193 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 194 cap_rights_t r_rws; 195 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 196 cap_rights_t r_rwspoll; 197 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 198 199 int cap_ro = dup(fd); 200 EXPECT_OK(cap_ro); 201 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); 202 int cap_wo = dup(fd); 203 EXPECT_OK(cap_wo); 204 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); 205 int cap_rw = dup(fd); 206 EXPECT_OK(cap_rw); 207 EXPECT_OK(cap_rights_limit(cap_rw, &r_rws)); 208 int cap_all = dup(fd); 209 EXPECT_OK(cap_all); 210 EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll)); 211 212 pid_t child = fork(); 213 if (child == 0) { 214 // Child: write counter to eventfd 215 uint64_t u = 42; 216 EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u))); 217 EXPECT_OK(write(cap_wo, &u, sizeof(u))); 218 exit(HasFailure()); 219 } 220 221 sleep(1); // Allow child to write 222 223 struct pollfd poll_fd; 224 poll_fd.revents = 0; 225 poll_fd.events = POLLIN; 226 poll_fd.fd = cap_rw; 227 EXPECT_OK(poll(&poll_fd, 1, 400)); 228 EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 229 EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 230 231 poll_fd.fd = cap_all; 232 EXPECT_OK(poll(&poll_fd, 1, 400)); 233 EXPECT_NE(0, (poll_fd.revents & POLLIN)); 234 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 235 236 uint64_t u; 237 EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u))); 238 EXPECT_OK(read(cap_ro, &u, sizeof(u))); 239 EXPECT_EQ(42, (int)u); 240 241 // Wait for the child. 242 int status; 243 EXPECT_EQ(child, waitpid(child, &status, 0)); 244 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 245 EXPECT_EQ(0, rc); 246 247 close(cap_all); 248 close(cap_rw); 249 close(cap_wo); 250 close(cap_ro); 251 close(fd); 252 } 253 254 FORK_TEST(Linux, epoll) { 255 int sock_fds[2]; 256 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); 257 // Queue some data. 258 char buffer[4] = {1, 2, 3, 4}; 259 EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer))); 260 261 EXPECT_OK(cap_enter()); // Enter capability mode. 262 263 int epoll_fd = epoll_create(1); 264 EXPECT_OK(epoll_fd); 265 266 cap_rights_t r_rs; 267 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 268 cap_rights_t r_ws; 269 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 270 cap_rights_t r_rws; 271 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 272 cap_rights_t r_rwspoll; 273 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 274 cap_rights_t r_epoll; 275 cap_rights_init(&r_epoll, CAP_EPOLL_CTL); 276 277 int cap_epoll_wo = dup(epoll_fd); 278 EXPECT_OK(cap_epoll_wo); 279 EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws)); 280 int cap_epoll_ro = dup(epoll_fd); 281 EXPECT_OK(cap_epoll_ro); 282 EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs)); 283 int cap_epoll_rw = dup(epoll_fd); 284 EXPECT_OK(cap_epoll_rw); 285 EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws)); 286 int cap_epoll_poll = dup(epoll_fd); 287 EXPECT_OK(cap_epoll_poll); 288 EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll)); 289 int cap_epoll_ctl = dup(epoll_fd); 290 EXPECT_OK(cap_epoll_ctl); 291 EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll)); 292 293 // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present. 294 struct epoll_event eev; 295 memset(&eev, 0, sizeof(eev)); 296 eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI; 297 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev)); 298 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev)); 299 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev)); 300 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev)); 301 eev.events = EPOLLIN|EPOLLOUT; 302 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev)); 303 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev)); 304 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev)); 305 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev)); 306 307 // Running epoll_pwait(2) requires CAP_EVENT. 308 eev.events = 0; 309 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL)); 310 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL)); 311 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL)); 312 EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL)); 313 EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN); 314 315 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev)); 316 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev)); 317 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev)); 318 EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev)); 319 320 close(cap_epoll_ctl); 321 close(cap_epoll_poll); 322 close(cap_epoll_rw); 323 close(cap_epoll_ro); 324 close(cap_epoll_wo); 325 close(epoll_fd); 326 close(sock_fds[1]); 327 close(sock_fds[0]); 328 } 329 330 TEST(Linux, fstatat) { 331 int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644); 332 EXPECT_OK(fd); 333 unsigned char buffer[] = {1, 2, 3, 4}; 334 EXPECT_OK(write(fd, buffer, sizeof(buffer))); 335 cap_rights_t rights; 336 int cap_rf = dup(fd); 337 EXPECT_OK(cap_rf); 338 EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); 339 int cap_ro = dup(fd); 340 EXPECT_OK(cap_ro); 341 EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ))); 342 343 struct stat info; 344 EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH)); 345 EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH)); 346 EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH)); 347 348 close(cap_ro); 349 close(cap_rf); 350 close(fd); 351 352 int dir = open(tmpdir.c_str(), O_RDONLY); 353 EXPECT_OK(dir); 354 int dir_rf = dup(dir); 355 EXPECT_OK(dir_rf); 356 EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); 357 int dir_ro = dup(fd); 358 EXPECT_OK(dir_ro); 359 EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ))); 360 361 EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH)); 362 EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH)); 363 EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH)); 364 365 close(dir_ro); 366 close(dir_rf); 367 close(dir); 368 369 unlink(TmpFile("cap_fstatat")); 370 } 371 372 // fanotify support may not be available at compile-time 373 #ifdef __NR_fanotify_init 374 TEST(Linux, FanotifyIfRoot) { 375 GTEST_SKIP_IF_NOT_ROOT(); 376 int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR); 377 EXPECT_OK(fa_fd); 378 if (fa_fd < 0) return; // May not be enabled 379 380 cap_rights_t r_rs; 381 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 382 cap_rights_t r_ws; 383 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 384 cap_rights_t r_rws; 385 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 386 cap_rights_t r_rwspoll; 387 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); 388 cap_rights_t r_rwsnotify; 389 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); 390 cap_rights_t r_rsl; 391 cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP); 392 cap_rights_t r_rslstat; 393 cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT); 394 cap_rights_t r_rsstat; 395 cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT); 396 397 int cap_fd_ro = dup(fa_fd); 398 EXPECT_OK(cap_fd_ro); 399 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); 400 int cap_fd_wo = dup(fa_fd); 401 EXPECT_OK(cap_fd_wo); 402 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); 403 int cap_fd_rw = dup(fa_fd); 404 EXPECT_OK(cap_fd_rw); 405 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); 406 int cap_fd_poll = dup(fa_fd); 407 EXPECT_OK(cap_fd_poll); 408 EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll)); 409 int cap_fd_not = dup(fa_fd); 410 EXPECT_OK(cap_fd_not); 411 EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify)); 412 413 int rc = mkdir(TmpFile("cap_notify"), 0755); 414 EXPECT_TRUE(rc == 0 || errno == EEXIST); 415 int dfd = open(TmpFile("cap_notify"), O_RDONLY); 416 EXPECT_OK(dfd); 417 int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644); 418 close(fd); 419 int cap_dfd = dup(dfd); 420 EXPECT_OK(cap_dfd); 421 EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat)); 422 EXPECT_OK(cap_dfd); 423 int cap_dfd_rs = dup(dfd); 424 EXPECT_OK(cap_dfd_rs); 425 EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs)); 426 EXPECT_OK(cap_dfd_rs); 427 int cap_dfd_rsstat = dup(dfd); 428 EXPECT_OK(cap_dfd_rsstat); 429 EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat)); 430 EXPECT_OK(cap_dfd_rsstat); 431 int cap_dfd_rsl = dup(dfd); 432 EXPECT_OK(cap_dfd_rsl); 433 EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl)); 434 EXPECT_OK(cap_dfd_rsl); 435 436 // Need CAP_NOTIFY to change what's monitored. 437 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 438 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 439 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 440 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); 441 442 // Need CAP_FSTAT on the thing monitored. 443 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL)); 444 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL)); 445 446 // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd. 447 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file")); 448 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file")); 449 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file")); 450 451 pid_t child = fork(); 452 if (child == 0) { 453 // Child: Perform activity in the directory under notify. 454 sleep(1); 455 unlink(TmpFile("cap_notify/temp")); 456 int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644); 457 close(fd); 458 exit(0); 459 } 460 461 // Need CAP_EVENT to poll. 462 struct pollfd poll_fd; 463 poll_fd.revents = 0; 464 poll_fd.events = POLLIN; 465 poll_fd.fd = cap_fd_rw; 466 EXPECT_OK(poll(&poll_fd, 1, 1400)); 467 EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 468 EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 469 470 poll_fd.fd = cap_fd_not; 471 EXPECT_OK(poll(&poll_fd, 1, 1400)); 472 EXPECT_EQ(0, (poll_fd.revents & POLLIN)); 473 EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); 474 475 poll_fd.fd = cap_fd_poll; 476 EXPECT_OK(poll(&poll_fd, 1, 1400)); 477 EXPECT_NE(0, (poll_fd.revents & POLLIN)); 478 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); 479 480 // Need CAP_READ to read. 481 struct fanotify_event_metadata ev; 482 memset(&ev, 0, sizeof(ev)); 483 EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev))); 484 rc = read(fa_fd, &ev, sizeof(ev)); 485 EXPECT_OK(rc); 486 EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc); 487 EXPECT_EQ(child, ev.pid); 488 EXPECT_NE(0, ev.fd); 489 490 // TODO(drysdale): reinstate if/when capsicum-linux propagates rights 491 // to fanotify-generated FDs. 492 #ifdef OMIT 493 // fanotify(7) gives us a FD for the changed file. This should 494 // only have rights that are a subset of those for the original 495 // monitored directory file descriptor. 496 cap_rights_t rights; 497 CAP_SET_ALL(&rights); 498 EXPECT_OK(cap_rights_get(ev.fd, &rights)); 499 EXPECT_RIGHTS_IN(&rights, &r_rslstat); 500 #endif 501 502 // Wait for the child. 503 int status; 504 EXPECT_EQ(child, waitpid(child, &status, 0)); 505 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 506 EXPECT_EQ(0, rc); 507 508 close(cap_dfd_rsstat); 509 close(cap_dfd_rsl); 510 close(cap_dfd_rs); 511 close(cap_dfd); 512 close(dfd); 513 unlink(TmpFile("cap_notify/file")); 514 unlink(TmpFile("cap_notify/temp")); 515 rmdir(TmpFile("cap_notify")); 516 close(cap_fd_not); 517 close(cap_fd_poll); 518 close(cap_fd_rw); 519 close(cap_fd_wo); 520 close(cap_fd_ro); 521 close(fa_fd); 522 } 523 #endif 524 525 TEST(Linux, inotify) { 526 int i_fd = inotify_init(); 527 EXPECT_OK(i_fd); 528 529 cap_rights_t r_rs; 530 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 531 cap_rights_t r_ws; 532 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 533 cap_rights_t r_rws; 534 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); 535 cap_rights_t r_rwsnotify; 536 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); 537 538 int cap_fd_ro = dup(i_fd); 539 EXPECT_OK(cap_fd_ro); 540 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); 541 int cap_fd_wo = dup(i_fd); 542 EXPECT_OK(cap_fd_wo); 543 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); 544 int cap_fd_rw = dup(i_fd); 545 EXPECT_OK(cap_fd_rw); 546 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); 547 int cap_fd_all = dup(i_fd); 548 EXPECT_OK(cap_fd_all); 549 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify)); 550 551 int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644); 552 EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY)); 553 int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY); 554 EXPECT_OK(wd); 555 556 unsigned char buffer[] = {1, 2, 3, 4}; 557 EXPECT_OK(write(fd, buffer, sizeof(buffer))); 558 559 struct inotify_event iev; 560 memset(&iev, 0, sizeof(iev)); 561 EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev))); 562 int rc = read(cap_fd_ro, &iev, sizeof(iev)); 563 EXPECT_OK(rc); 564 EXPECT_EQ((int)sizeof(iev), rc); 565 EXPECT_EQ(wd, iev.wd); 566 567 EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd)); 568 EXPECT_OK(inotify_rm_watch(cap_fd_all, wd)); 569 570 close(fd); 571 close(cap_fd_all); 572 close(cap_fd_rw); 573 close(cap_fd_wo); 574 close(cap_fd_ro); 575 close(i_fd); 576 unlink(TmpFile("cap_inotify")); 577 } 578 579 TEST(Linux, ArchChangeIfAvailable) { 580 const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"}; 581 const char* progs[] = {NULL, NULL, NULL}; 582 char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL}; 583 char* null_envp[] = {NULL}; 584 int fds[3]; 585 int count = 0; 586 587 for (int ii = 0; ii < 3; ii++) { 588 fds[count] = open(prog_candidates[ii], O_RDONLY); 589 if (fds[count] >= 0) { 590 progs[count] = prog_candidates[ii]; 591 count++; 592 } 593 } 594 if (count == 0) { 595 GTEST_SKIP() << "no different-architecture programs available"; 596 } 597 598 for (int ii = 0; ii < count; ii++) { 599 // Fork-and-exec a binary of this architecture. 600 pid_t child = fork(); 601 if (child == 0) { 602 EXPECT_OK(cap_enter()); // Enter capability mode 603 if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n", 604 getpid_(), progs[ii], argv_pass[1]); 605 argv_pass[0] = (char *)progs[ii]; 606 int rc = fexecve_(fds[ii], argv_pass, null_envp); 607 fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno); 608 exit(99); // Should not reach here. 609 } 610 int status; 611 EXPECT_EQ(child, waitpid(child, &status, 0)); 612 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 613 EXPECT_EQ(0, rc); 614 close(fds[ii]); 615 } 616 } 617 618 FORK_TEST(Linux, NamespaceIfRoot) { 619 GTEST_SKIP_IF_NOT_ROOT(); 620 pid_t me = getpid_(); 621 622 // Create a new UTS namespace. 623 EXPECT_OK(unshare(CLONE_NEWUTS)); 624 // Open an FD to its symlink. 625 char buffer[256]; 626 sprintf(buffer, "/proc/%d/ns/uts", me); 627 int ns_fd = open(buffer, O_RDONLY); 628 629 cap_rights_t r_rwlstat; 630 cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT); 631 cap_rights_t r_rwlstatns; 632 cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS); 633 634 int cap_fd = dup(ns_fd); 635 EXPECT_OK(cap_fd); 636 EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat)); 637 int cap_fd_setns = dup(ns_fd); 638 EXPECT_OK(cap_fd_setns); 639 EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns)); 640 EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS)); 641 EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS)); 642 643 EXPECT_OK(cap_enter()); // Enter capability mode. 644 645 // No setns(2) but unshare(2) is allowed. 646 EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS)); 647 EXPECT_OK(unshare(CLONE_NEWUTS)); 648 } 649 650 static void SendFD(int fd, int over) { 651 struct msghdr mh; 652 mh.msg_name = NULL; // No address needed 653 mh.msg_namelen = 0; 654 char buffer1[1024]; 655 struct iovec iov[1]; 656 iov[0].iov_base = buffer1; 657 iov[0].iov_len = sizeof(buffer1); 658 mh.msg_iov = iov; 659 mh.msg_iovlen = 1; 660 char buffer2[1024]; 661 mh.msg_control = buffer2; 662 mh.msg_controllen = CMSG_LEN(sizeof(int)); 663 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); 664 cmptr->cmsg_level = SOL_SOCKET; 665 cmptr->cmsg_type = SCM_RIGHTS; 666 cmptr->cmsg_len = CMSG_LEN(sizeof(int)); 667 *(int *)CMSG_DATA(cmptr) = fd; 668 buffer1[0] = 0; 669 iov[0].iov_len = 1; 670 int rc = sendmsg(over, &mh, 0); 671 EXPECT_OK(rc); 672 } 673 674 static int ReceiveFD(int over) { 675 struct msghdr mh; 676 mh.msg_name = NULL; // No address needed 677 mh.msg_namelen = 0; 678 char buffer1[1024]; 679 struct iovec iov[1]; 680 iov[0].iov_base = buffer1; 681 iov[0].iov_len = sizeof(buffer1); 682 mh.msg_iov = iov; 683 mh.msg_iovlen = 1; 684 char buffer2[1024]; 685 mh.msg_control = buffer2; 686 mh.msg_controllen = sizeof(buffer2); 687 int rc = recvmsg(over, &mh, 0); 688 EXPECT_OK(rc); 689 EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen); 690 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); 691 int fd = *(int*)CMSG_DATA(cmptr); 692 EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len); 693 cmptr = CMSG_NXTHDR(&mh, cmptr); 694 EXPECT_TRUE(cmptr == NULL); 695 return fd; 696 } 697 698 static int shared_pd = -1; 699 static int shared_sock_fds[2]; 700 701 static int ChildFunc(void *arg) { 702 // This function is running in a new PID namespace, and so is pid 1. 703 if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); 704 EXPECT_EQ(1, getpid_()); 705 EXPECT_EQ(0, getppid()); 706 707 // The shared process descriptor is outside our namespace, so we cannot 708 // get its pid. 709 if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd); 710 pid_t shared_child = -1; 711 EXPECT_OK(pdgetpid(shared_pd, &shared_child)); 712 if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child); 713 EXPECT_EQ(0, shared_child); 714 715 // But we can pdkill() it even so. 716 if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd); 717 EXPECT_OK(pdkill(shared_pd, SIGINT)); 718 719 int pd; 720 pid_t child = pdfork(&pd, 0); 721 EXPECT_OK(child); 722 if (child == 0) { 723 // Child: expect pid 2. 724 if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); 725 EXPECT_EQ(2, getpid_()); 726 EXPECT_EQ(1, getppid()); 727 while (true) { 728 if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n"); 729 sleep(1); 730 } 731 exit(0); 732 } 733 EXPECT_EQ(2, child); 734 EXPECT_PID_ALIVE(child); 735 if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", 736 pd, child, ProcessState(child)); 737 738 pid_t pid; 739 EXPECT_OK(pdgetpid(pd, &pid)); 740 EXPECT_EQ(child, pid); 741 742 sleep(2); 743 744 // Send the process descriptor over UNIX domain socket back to parent. 745 SendFD(pd, shared_sock_fds[1]); 746 747 // Wait for death of (grand)child, killed by our parent. 748 if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child); 749 int status; 750 EXPECT_EQ(child, wait4(child, &status, __WALL, NULL)); 751 752 if (verbose) fprintf(stderr, " ChildFunc: return 0\n"); 753 return 0; 754 } 755 756 #define STACK_SIZE (1024 * 1024) 757 static char child_stack[STACK_SIZE]; 758 759 // TODO(drysdale): fork into a user namespace first so GTEST_SKIP_IF_NOT_ROOT can be removed. 760 TEST(Linux, PidNamespacePdForkIfRoot) { 761 GTEST_SKIP_IF_NOT_ROOT(); 762 // Pass process descriptors in both directions across a PID namespace boundary. 763 // pdfork() off a child before we start, holding its process descriptor in a global 764 // variable that's accessible to children. 765 pid_t firstborn = pdfork(&shared_pd, 0); 766 EXPECT_OK(firstborn); 767 if (firstborn == 0) { 768 while (true) { 769 if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n"); 770 sleep(1); 771 } 772 exit(0); 773 } 774 EXPECT_PID_ALIVE(firstborn); 775 if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n", 776 shared_pd, firstborn, ProcessState(firstborn)); 777 sleep(2); 778 779 // Prepare sockets to communicate with child process. 780 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 781 782 // Clone into a child process with a new pid namespace. 783 pid_t child = clone(ChildFunc, child_stack + STACK_SIZE, 784 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 785 EXPECT_OK(child); 786 EXPECT_PID_ALIVE(child); 787 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 788 789 // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd. 790 sleep(1); 791 EXPECT_PID_DEAD(firstborn); 792 793 // But we can still retrieve firstborn's PID, as it's not been reaped yet. 794 pid_t child0; 795 EXPECT_OK(pdgetpid(shared_pd, &child0)); 796 EXPECT_EQ(firstborn, child0); 797 if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n", 798 shared_pd, child0, ProcessState(child0)); 799 800 // Now reap it. 801 int status; 802 EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL)); 803 804 // Get the process descriptor of the child-of-child via socket transfer. 805 int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 806 807 // Our notion of the pid associated with the grandchild is in the main PID namespace. 808 pid_t grandchild; 809 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 810 EXPECT_NE(2, grandchild); 811 if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", 812 grandchild_pd, grandchild, ProcessState(grandchild)); 813 EXPECT_PID_ALIVE(grandchild); 814 815 // Kill the grandchild via the process descriptor. 816 EXPECT_OK(pdkill(grandchild_pd, SIGINT)); 817 usleep(10000); 818 if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", 819 grandchild_pd, grandchild, ProcessState(grandchild)); 820 EXPECT_PID_DEAD(grandchild); 821 822 sleep(2); 823 824 // Wait for the child. 825 EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 826 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 827 EXPECT_EQ(0, rc); 828 829 close(shared_sock_fds[0]); 830 close(shared_sock_fds[1]); 831 close(shared_pd); 832 close(grandchild_pd); 833 } 834 835 int NSInit(void *data) { 836 // This function is running in a new PID namespace, and so is pid 1. 837 if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); 838 EXPECT_EQ(1, getpid_()); 839 EXPECT_EQ(0, getppid()); 840 841 int pd; 842 pid_t child = pdfork(&pd, 0); 843 EXPECT_OK(child); 844 if (child == 0) { 845 // Child: loop forever until terminated. 846 if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); 847 while (true) { 848 if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n"); 849 usleep(100000); 850 } 851 exit(0); 852 } 853 EXPECT_EQ(2, child); 854 EXPECT_PID_ALIVE(child); 855 if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", 856 pd, child, ProcessState(child)); 857 sleep(1); 858 859 // Send the process descriptor over UNIX domain socket back to parent. 860 SendFD(pd, shared_sock_fds[1]); 861 close(pd); 862 863 // Wait for a byte back in the other direction. 864 int value; 865 if (verbose) fprintf(stderr, " NSInit: block waiting for value\n"); 866 read(shared_sock_fds[1], &value, sizeof(value)); 867 868 if (verbose) fprintf(stderr, " NSInit: return 0\n"); 869 return 0; 870 } 871 872 TEST(Linux, DeadNSInitIfRoot) { 873 GTEST_SKIP_IF_NOT_ROOT(); 874 875 // Prepare sockets to communicate with child process. 876 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 877 878 // Clone into a child process with a new pid namespace. 879 pid_t child = clone(NSInit, child_stack + STACK_SIZE, 880 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 881 usleep(10000); 882 EXPECT_OK(child); 883 EXPECT_PID_ALIVE(child); 884 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 885 886 // Get the process descriptor of the child-of-child via socket transfer. 887 int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 888 pid_t grandchild; 889 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 890 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); 891 892 // Send an int to the child to trigger its termination. Grandchild should also 893 // go, as its init process is gone. 894 int zero = 0; 895 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); 896 write(shared_sock_fds[0], &zero, sizeof(zero)); 897 EXPECT_PID_ZOMBIE(child); 898 EXPECT_PID_GONE(grandchild); 899 900 // Wait for the child. 901 int status; 902 EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 903 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 904 EXPECT_EQ(0, rc); 905 EXPECT_PID_GONE(child); 906 907 close(shared_sock_fds[0]); 908 close(shared_sock_fds[1]); 909 close(grandchild_pd); 910 911 if (verbose) { 912 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); 913 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); 914 } 915 } 916 917 TEST(Linux, DeadNSInit2IfRoot) { 918 GTEST_SKIP_IF_NOT_ROOT(); 919 920 // Prepare sockets to communicate with child process. 921 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); 922 923 // Clone into a child process with a new pid namespace. 924 pid_t child = clone(NSInit, child_stack + STACK_SIZE, 925 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); 926 usleep(10000); 927 EXPECT_OK(child); 928 EXPECT_PID_ALIVE(child); 929 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); 930 931 // Get the process descriptor of the child-of-child via socket transfer. 932 int grandchild_pd = ReceiveFD(shared_sock_fds[0]); 933 pid_t grandchild; 934 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); 935 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); 936 937 // Kill the grandchild 938 EXPECT_OK(pdkill(grandchild_pd, SIGINT)); 939 usleep(10000); 940 EXPECT_PID_ZOMBIE(grandchild); 941 // Close the process descriptor, so there are now no procdesc references to grandchild. 942 close(grandchild_pd); 943 944 // Send an int to the child to trigger its termination. Grandchild should also 945 // go, as its init process is gone. 946 int zero = 0; 947 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); 948 write(shared_sock_fds[0], &zero, sizeof(zero)); 949 EXPECT_PID_ZOMBIE(child); 950 EXPECT_PID_GONE(grandchild); 951 952 // Wait for the child. 953 int status; 954 EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); 955 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 956 EXPECT_EQ(0, rc); 957 958 close(shared_sock_fds[0]); 959 close(shared_sock_fds[1]); 960 961 if (verbose) { 962 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); 963 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); 964 } 965 } 966 967 #ifdef __x86_64__ 968 FORK_TEST(Linux, CheckHighWord) { 969 EXPECT_OK(cap_enter()); // Enter capability mode. 970 971 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 972 EXPECT_OK(rc); 973 EXPECT_EQ(1, rc); // no_new_privs = 1 974 975 // Set some of the high 32-bits of argument zero. 976 uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL; 977 EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0)); 978 } 979 #endif 980 981 FORK_TEST(Linux, PrctlOpenatBeneath) { 982 // Set no_new_privs = 1 983 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 984 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 985 EXPECT_OK(rc); 986 EXPECT_EQ(1, rc); // no_new_privs = 1 987 988 // Set openat-beneath mode 989 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0)); 990 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 991 EXPECT_OK(rc); 992 EXPECT_EQ(1, rc); // openat_beneath = 1 993 994 // Clear openat-beneath mode 995 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); 996 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 997 EXPECT_OK(rc); 998 EXPECT_EQ(0, rc); // openat_beneath = 0 999 1000 EXPECT_OK(cap_enter()); // Enter capability mode 1001 1002 // Expect to be in openat_beneath mode 1003 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 1004 EXPECT_OK(rc); 1005 EXPECT_EQ(1, rc); // openat_beneath = 1 1006 1007 // Expect this to be immutable. 1008 EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); 1009 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); 1010 EXPECT_OK(rc); 1011 EXPECT_EQ(1, rc); // openat_beneath = 1 1012 1013 } 1014 1015 FORK_TEST(Linux, NoNewPrivs) { 1016 if (getuid() == 0) { 1017 // If root, drop CAP_SYS_ADMIN POSIX.1e capability. 1018 struct __user_cap_header_struct hdr; 1019 hdr.version = _LINUX_CAPABILITY_VERSION_3; 1020 hdr.pid = getpid_(); 1021 struct __user_cap_data_struct data[3]; 1022 EXPECT_OK(capget(&hdr, &data[0])); 1023 data[0].effective &= ~(1 << CAP_SYS_ADMIN); 1024 data[0].permitted &= ~(1 << CAP_SYS_ADMIN); 1025 data[0].inheritable &= ~(1 << CAP_SYS_ADMIN); 1026 EXPECT_OK(capset(&hdr, &data[0])); 1027 } 1028 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 1029 EXPECT_OK(rc); 1030 EXPECT_EQ(0, rc); // no_new_privs == 0 1031 1032 // Can't enter seccomp-bpf mode with no_new_privs == 0 1033 struct sock_filter filter[] = { 1034 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 1035 }; 1036 struct sock_fprog bpf; 1037 bpf.len = (sizeof(filter) / sizeof(filter[0])); 1038 bpf.filter = filter; 1039 rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0); 1040 EXPECT_EQ(-1, rc); 1041 EXPECT_EQ(EACCES, errno); 1042 1043 // Set no_new_privs = 1 1044 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 1045 rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 1046 EXPECT_OK(rc); 1047 EXPECT_EQ(1, rc); // no_new_privs = 1 1048 1049 // Can now turn on seccomp mode 1050 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); 1051 } 1052 1053 /* Macros for BPF generation */ 1054 #define BPF_RETURN_ERRNO(err) \ 1055 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF)) 1056 #define BPF_KILL_PROCESS \ 1057 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) 1058 #define BPF_ALLOW \ 1059 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 1060 #define EXAMINE_SYSCALL \ 1061 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)) 1062 #define ALLOW_SYSCALL(name) \ 1063 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1064 BPF_ALLOW 1065 #define KILL_SYSCALL(name) \ 1066 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1067 BPF_KILL_PROCESS 1068 #define FAIL_SYSCALL(name, err) \ 1069 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ 1070 BPF_RETURN_ERRNO(err) 1071 1072 TEST(Linux, CapModeWithBPF) { 1073 pid_t child = fork(); 1074 EXPECT_OK(child); 1075 if (child == 0) { 1076 int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644); 1077 cap_rights_t rights; 1078 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); 1079 EXPECT_OK(cap_rights_limit(fd, &rights)); 1080 1081 struct sock_filter filter[] = { EXAMINE_SYSCALL, 1082 FAIL_SYSCALL(fchmod, ENOMEM), 1083 FAIL_SYSCALL(fstat, ENOEXEC), 1084 ALLOW_SYSCALL(close), 1085 KILL_SYSCALL(fsync), 1086 BPF_ALLOW }; 1087 struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])), 1088 .filter = filter}; 1089 // Set up seccomp-bpf first. 1090 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 1091 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); 1092 1093 EXPECT_OK(cap_enter()); // Enter capability mode. 1094 1095 // fchmod is allowed by Capsicum, but failed by BPF. 1096 EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644)); 1097 // open is allowed by BPF, but failed by Capsicum 1098 EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY)); 1099 // fstat is failed by both BPF and Capsicum; tie-break is on errno 1100 struct stat buf; 1101 EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf)); 1102 // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence 1103 fsync(fd); // terminate with unhandled SIGSYS 1104 exit(0); 1105 } 1106 int status; 1107 EXPECT_EQ(child, waitpid(child, &status, 0)); 1108 EXPECT_TRUE(WIFSIGNALED(status)); 1109 EXPECT_EQ(SIGSYS, WTERMSIG(status)); 1110 unlink(TmpFile("cap_bpf_capmode")); 1111 } 1112 1113 TEST(Linux, AIO) { 1114 int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644); 1115 EXPECT_OK(fd); 1116 1117 cap_rights_t r_rs; 1118 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); 1119 cap_rights_t r_ws; 1120 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); 1121 cap_rights_t r_rwssync; 1122 cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); 1123 1124 int cap_ro = dup(fd); 1125 EXPECT_OK(cap_ro); 1126 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); 1127 EXPECT_OK(cap_ro); 1128 int cap_wo = dup(fd); 1129 EXPECT_OK(cap_wo); 1130 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); 1131 EXPECT_OK(cap_wo); 1132 int cap_all = dup(fd); 1133 EXPECT_OK(cap_all); 1134 EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync)); 1135 EXPECT_OK(cap_all); 1136 1137 // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy 1138 aio_context_t ctx = 0; 1139 EXPECT_OK(syscall(__NR_io_setup, 10, &ctx)); 1140 1141 unsigned char buffer[32] = {1, 2, 3, 4}; 1142 struct iocb req; 1143 memset(&req, 0, sizeof(req)); 1144 req.aio_reqprio = 0; 1145 req.aio_fildes = fd; 1146 uintptr_t bufaddr = (uintptr_t)buffer; 1147 req.aio_buf = (__u64)bufaddr; 1148 req.aio_nbytes = 4; 1149 req.aio_offset = 0; 1150 struct iocb* reqs[1] = {&req}; 1151 1152 // Write operation 1153 req.aio_lio_opcode = IOCB_CMD_PWRITE; 1154 req.aio_fildes = cap_ro; 1155 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1156 req.aio_fildes = cap_wo; 1157 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); 1158 1159 // Sync operation 1160 req.aio_lio_opcode = IOCB_CMD_FSYNC; 1161 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1162 req.aio_lio_opcode = IOCB_CMD_FDSYNC; 1163 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1164 // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented 1165 req.aio_fildes = cap_all; 1166 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1167 req.aio_lio_opcode = IOCB_CMD_FSYNC; 1168 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1169 1170 // Read operation 1171 req.aio_lio_opcode = IOCB_CMD_PREAD; 1172 req.aio_fildes = cap_wo; 1173 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); 1174 req.aio_fildes = cap_ro; 1175 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); 1176 1177 EXPECT_OK(syscall(__NR_io_destroy, ctx)); 1178 1179 close(cap_all); 1180 close(cap_wo); 1181 close(cap_ro); 1182 close(fd); 1183 unlink(TmpFile("cap_aio")); 1184 } 1185 1186 #ifndef KCMP_FILE 1187 #define KCMP_FILE 0 1188 #endif 1189 TEST(Linux, KcmpIfAvailable) { 1190 // This requires CONFIG_CHECKPOINT_RESTORE in kernel config. 1191 int fd = open("/etc/passwd", O_RDONLY); 1192 EXPECT_OK(fd); 1193 pid_t parent = getpid_(); 1194 1195 errno = 0; 1196 int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd); 1197 if (rc == -1 && errno == ENOSYS) { 1198 GTEST_SKIP() << "kcmp(2) gives -ENOSYS"; 1199 } 1200 1201 pid_t child = fork(); 1202 if (child == 0) { 1203 // Child: limit rights on FD. 1204 child = getpid_(); 1205 EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); 1206 cap_rights_t rights; 1207 cap_rights_init(&rights, CAP_READ, CAP_WRITE); 1208 EXPECT_OK(cap_rights_limit(fd, &rights)); 1209 // A capability wrapping a normal FD is different (from a kcmp(2) perspective) 1210 // than the original file. 1211 EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); 1212 exit(HasFailure()); 1213 } 1214 // Wait for the child. 1215 int status; 1216 EXPECT_EQ(child, waitpid(child, &status, 0)); 1217 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; 1218 EXPECT_EQ(0, rc); 1219 1220 close(fd); 1221 } 1222 1223 TEST(Linux, ProcFS) { 1224 cap_rights_t rights; 1225 cap_rights_init(&rights, CAP_READ, CAP_SEEK); 1226 int fd = open("/etc/passwd", O_RDONLY); 1227 EXPECT_OK(fd); 1228 lseek(fd, 4, SEEK_SET); 1229 int cap = dup(fd); 1230 EXPECT_OK(cap); 1231 EXPECT_OK(cap_rights_limit(cap, &rights)); 1232 pid_t me = getpid_(); 1233 1234 char buffer[1024]; 1235 sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap); 1236 int procfd = open(buffer, O_RDONLY); 1237 EXPECT_OK(procfd) << " failed to open " << buffer; 1238 if (procfd < 0) return; 1239 int proccap = dup(procfd); 1240 EXPECT_OK(proccap); 1241 EXPECT_OK(cap_rights_limit(proccap, &rights)); 1242 1243 EXPECT_OK(read(proccap, buffer, sizeof(buffer))); 1244 // The fdinfo should include the file pos of the underlying file 1245 EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4")); 1246 // ...and the rights of the Capsicum capability. 1247 EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x")); 1248 1249 close(procfd); 1250 close(proccap); 1251 close(cap); 1252 close(fd); 1253 } 1254 1255 FORK_TEST(Linux, ProcessClocks) { 1256 pid_t self = getpid_(); 1257 pid_t child = fork(); 1258 EXPECT_OK(child); 1259 if (child == 0) { 1260 child = getpid_(); 1261 usleep(100000); 1262 exit(0); 1263 } 1264 1265 EXPECT_OK(cap_enter()); // Enter capability mode. 1266 1267 // Nefariously build a clock ID for the child's CPU time. 1268 // This relies on knowledge of the internal layout of clock IDs. 1269 clockid_t child_clock; 1270 child_clock = ((~child) << 3) | 0x0; 1271 struct timespec ts; 1272 memset(&ts, 0, sizeof(ts)); 1273 1274 // TODO(drysdale): Should not be possible to retrieve info about a 1275 // different process, as the PID global namespace should be locked 1276 // down. 1277 EXPECT_OK(clock_gettime(child_clock, &ts)); 1278 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n", 1279 self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); 1280 1281 child_clock = ((~1) << 3) | 0x0; 1282 memset(&ts, 0, sizeof(ts)); 1283 EXPECT_OK(clock_gettime(child_clock, &ts)); 1284 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n", 1285 self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); 1286 1287 // Orphan the child. 1288 } 1289 1290 TEST(Linux, SetLease) { 1291 int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644); 1292 EXPECT_OK(fd_all); 1293 int fd_rw = dup(fd_all); 1294 EXPECT_OK(fd_rw); 1295 1296 cap_rights_t r_all; 1297 cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL); 1298 EXPECT_OK(cap_rights_limit(fd_all, &r_all)); 1299 1300 cap_rights_t r_rw; 1301 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); 1302 EXPECT_OK(cap_rights_limit(fd_rw, &r_rw)); 1303 1304 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK)); 1305 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE)); 1306 1307 if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases 1308 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK)); 1309 EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE)); 1310 1311 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0)); 1312 EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE)); 1313 } 1314 close(fd_all); 1315 close(fd_rw); 1316 unlink(TmpFile("cap_lease")); 1317 } 1318 1319 TEST(Linux, InvalidRightsSyscall) { 1320 int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644); 1321 EXPECT_OK(fd); 1322 1323 cap_rights_t rights; 1324 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT); 1325 1326 // Use the raw syscall throughout. 1327 EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1328 1329 // Directly access the syscall, and find all unseemly manner of use for it. 1330 // - Invalid flags 1331 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1)); 1332 EXPECT_EQ(EINVAL, errno); 1333 // - Specify an fcntl subright, but no CAP_FCNTL set 1334 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0)); 1335 EXPECT_EQ(EINVAL, errno); 1336 // - Specify an ioctl subright, but no CAP_IOCTL set 1337 unsigned int ioctl1 = 1; 1338 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0)); 1339 EXPECT_EQ(EINVAL, errno); 1340 // - N ioctls, but null pointer passed 1341 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0)); 1342 EXPECT_EQ(EINVAL, errno); 1343 // - Invalid nioctls 1344 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0)); 1345 EXPECT_EQ(EINVAL, errno); 1346 // - Null primary rights 1347 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0)); 1348 EXPECT_EQ(EFAULT, errno); 1349 // - Invalid index bitmask 1350 rights.cr_rights[0] |= 3ULL << 57; 1351 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1352 EXPECT_EQ(EINVAL, errno); 1353 // - Invalid version 1354 rights.cr_rights[0] |= 2ULL << 62; 1355 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); 1356 EXPECT_EQ(EINVAL, errno); 1357 1358 close(fd); 1359 unlink(TmpFile("cap_invalid_rights")); 1360 } 1361 1362 FORK_TEST_ON(Linux, OpenByHandleAtIfRoot, TmpFile("cap_openbyhandle_testfile")) { 1363 GTEST_SKIP_IF_NOT_ROOT(); 1364 int dir = open(tmpdir.c_str(), O_RDONLY); 1365 EXPECT_OK(dir); 1366 int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644); 1367 EXPECT_OK(fd); 1368 const char* message = "Saved text"; 1369 EXPECT_OK(write(fd, message, strlen(message))); 1370 close(fd); 1371 1372 struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); 1373 fhandle->handle_bytes = MAX_HANDLE_SZ; 1374 int mount_id; 1375 EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0)); 1376 1377 fd = open_by_handle_at(dir, fhandle, O_RDONLY); 1378 EXPECT_OK(fd); 1379 char buffer[200]; 1380 ssize_t len = read(fd, buffer, 199); 1381 EXPECT_OK(len); 1382 EXPECT_EQ(std::string(message), std::string(buffer, len)); 1383 close(fd); 1384 1385 // Cannot issue open_by_handle_at after entering capability mode. 1386 cap_enter(); 1387 EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY)); 1388 1389 close(dir); 1390 } 1391 1392 int getrandom_(void *buf, size_t buflen, unsigned int flags) { 1393 #ifdef __NR_getrandom 1394 return syscall(__NR_getrandom, buf, buflen, flags); 1395 #else 1396 errno = ENOSYS; 1397 return -1; 1398 #endif 1399 } 1400 1401 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) 1402 #include <linux/random.h> // Requires 3.17 kernel 1403 FORK_TEST(Linux, GetRandom) { 1404 EXPECT_OK(cap_enter()); 1405 unsigned char buffer[1024]; 1406 unsigned char buffer2[1024]; 1407 EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK)); 1408 EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK)); 1409 EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer))); 1410 } 1411 #endif 1412 1413 int memfd_create_(const char *name, unsigned int flags) { 1414 #ifdef __NR_memfd_create 1415 return syscall(__NR_memfd_create, name, flags); 1416 #else 1417 errno = ENOSYS; 1418 return -1; 1419 #endif 1420 } 1421 1422 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) 1423 #include <linux/memfd.h> // Requires 3.17 kernel 1424 TEST(Linux, MemFDDeathTestIfAvailable) { 1425 int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING); 1426 if (memfd == -1 && errno == ENOSYS) { 1427 GTEST_SKIP() << "memfd_create(2) gives -ENOSYS"; 1428 } 1429 const int LEN = 16; 1430 EXPECT_OK(ftruncate(memfd, LEN)); 1431 int memfd_ro = dup(memfd); 1432 int memfd_rw = dup(memfd); 1433 EXPECT_OK(memfd_ro); 1434 EXPECT_OK(memfd_rw); 1435 cap_rights_t rights; 1436 EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT))); 1437 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD))); 1438 1439 unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0); 1440 EXPECT_NE((unsigned char *)MAP_FAILED, p_ro); 1441 unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0); 1442 EXPECT_NE((unsigned char *)MAP_FAILED, p_rw); 1443 EXPECT_EQ(MAP_FAILED, 1444 mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0)); 1445 1446 *p_rw = 42; 1447 EXPECT_EQ(42, *p_ro); 1448 EXPECT_DEATH(*p_ro = 42, ""); 1449 1450 #ifndef F_ADD_SEALS 1451 // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17 1452 #define _F_LINUX_SPECIFIC_BASE F_SETLEASE 1453 #define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9) 1454 #define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10) 1455 #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ 1456 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ 1457 #define F_SEAL_GROW 0x0004 /* prevent file from growing */ 1458 #define F_SEAL_WRITE 0x0008 /* prevent writes */ 1459 #endif 1460 1461 // Reading the seal information requires CAP_FSTAT. 1462 int seals = fcntl(memfd, F_GET_SEALS); 1463 EXPECT_OK(seals); 1464 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); 1465 int seals_ro = fcntl(memfd_ro, F_GET_SEALS); 1466 EXPECT_EQ(seals, seals_ro); 1467 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); 1468 int seals_rw = fcntl(memfd_rw, F_GET_SEALS); 1469 EXPECT_NOTCAPABLE(seals_rw); 1470 1471 // Fail to seal as a writable mapping exists. 1472 EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1473 EXPECT_EQ(EBUSY, errno); 1474 *p_rw = 42; 1475 1476 // Seal the rw version; need to unmap first. 1477 munmap(p_rw, LEN); 1478 munmap(p_ro, LEN); 1479 EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1480 1481 seals = fcntl(memfd, F_GET_SEALS); 1482 EXPECT_OK(seals); 1483 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); 1484 seals_ro = fcntl(memfd_ro, F_GET_SEALS); 1485 EXPECT_EQ(seals, seals_ro); 1486 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); 1487 1488 // Remove the CAP_FCHMOD right, can no longer add seals. 1489 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW))); 1490 EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); 1491 1492 close(memfd); 1493 close(memfd_ro); 1494 close(memfd_rw); 1495 } 1496 #endif 1497 1498 #else 1499 void noop() {} 1500 #endif 1501