1 // Tests of Linux-specific functionality
2 #ifdef __linux__
3
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <sys/socket.h>
7 #include <sys/timerfd.h>
8 #include <sys/signalfd.h>
9 #include <sys/eventfd.h>
10 #include <sys/epoll.h>
11 #include <sys/inotify.h>
12 #include <sys/fanotify.h>
13 #include <sys/mman.h>
14 #include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
15 #include <linux/aio_abi.h>
16 #include <linux/filter.h>
17 #include <linux/seccomp.h>
18 #include <linux/version.h>
19 #include <poll.h>
20 #include <sched.h>
21 #include <signal.h>
22 #include <fcntl.h>
23 #include <unistd.h>
24
25 #include <string>
26
27 #include "capsicum.h"
28 #include "syscalls.h"
29 #include "capsicum-test.h"
30
TEST(Linux,TimerFD)31 TEST(Linux, TimerFD) {
32 int fd = timerfd_create(CLOCK_MONOTONIC, 0);
33
34 cap_rights_t r_ro;
35 cap_rights_init(&r_ro, CAP_READ);
36 cap_rights_t r_wo;
37 cap_rights_init(&r_wo, CAP_WRITE);
38 cap_rights_t r_rw;
39 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
40 cap_rights_t r_rwpoll;
41 cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
42
43 int cap_fd_ro = dup(fd);
44 EXPECT_OK(cap_fd_ro);
45 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
46 int cap_fd_wo = dup(fd);
47 EXPECT_OK(cap_fd_wo);
48 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
49 int cap_fd_rw = dup(fd);
50 EXPECT_OK(cap_fd_rw);
51 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
52 int cap_fd_all = dup(fd);
53 EXPECT_OK(cap_fd_all);
54 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
55
56 struct itimerspec old_ispec;
57 struct itimerspec ispec;
58 ispec.it_interval.tv_sec = 0;
59 ispec.it_interval.tv_nsec = 0;
60 ispec.it_value.tv_sec = 0;
61 ispec.it_value.tv_nsec = 100000000; // 100ms
62 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
63 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
64 EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
65 EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
66 EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
67
68 EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
69 EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
70 EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
71 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
72
73 // To be able to poll() for the timer pop, still need CAP_EVENT.
74 struct pollfd poll_fd;
75 for (int ii = 0; ii < 3; ii++) {
76 poll_fd.revents = 0;
77 poll_fd.events = POLLIN;
78 switch (ii) {
79 case 0: poll_fd.fd = cap_fd_ro; break;
80 case 1: poll_fd.fd = cap_fd_wo; break;
81 case 2: poll_fd.fd = cap_fd_rw; break;
82 }
83 // Poll immediately returns with POLLNVAL
84 EXPECT_OK(poll(&poll_fd, 1, 400));
85 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
86 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
87 }
88
89 poll_fd.fd = cap_fd_all;
90 EXPECT_OK(poll(&poll_fd, 1, 400));
91 EXPECT_NE(0, (poll_fd.revents & POLLIN));
92 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
93
94 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
95 EXPECT_EQ(0, old_ispec.it_value.tv_sec);
96 EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
97 EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
98 EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
99
100 close(cap_fd_all);
101 close(cap_fd_rw);
102 close(cap_fd_wo);
103 close(cap_fd_ro);
104 close(fd);
105 }
106
FORK_TEST(Linux,SignalFDIfSingleThreaded)107 FORK_TEST(Linux, SignalFDIfSingleThreaded) {
108 if (force_mt) {
109 GTEST_SKIP() << "multi-threaded run clashes with signals";
110 }
111 pid_t me = getpid();
112 sigset_t mask;
113 sigemptyset(&mask);
114 sigaddset(&mask, SIGUSR1);
115
116 // Block signals before registering against a new signal FD.
117 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
118 int fd = signalfd(-1, &mask, 0);
119 EXPECT_OK(fd);
120
121 cap_rights_t r_rs;
122 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
123 cap_rights_t r_ws;
124 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
125 cap_rights_t r_sig;
126 cap_rights_init(&r_sig, CAP_FSIGNAL);
127 cap_rights_t r_rssig;
128 cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
129 cap_rights_t r_rssig_poll;
130 cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
131
132 // Various capability variants.
133 int cap_fd_none = dup(fd);
134 EXPECT_OK(cap_fd_none);
135 EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
136 int cap_fd_read = dup(fd);
137 EXPECT_OK(cap_fd_read);
138 EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
139 int cap_fd_sig = dup(fd);
140 EXPECT_OK(cap_fd_sig);
141 EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
142 int cap_fd_sig_read = dup(fd);
143 EXPECT_OK(cap_fd_sig_read);
144 EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
145 int cap_fd_all = dup(fd);
146 EXPECT_OK(cap_fd_all);
147 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
148
149 struct signalfd_siginfo fdsi;
150
151 // Need CAP_READ to read the signal information
152 kill(me, SIGUSR1);
153 EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
154 EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
155 int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
156 EXPECT_OK(len);
157 EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
158 EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
159
160 // Need CAP_FSIGNAL to modify the signal mask.
161 sigemptyset(&mask);
162 sigaddset(&mask, SIGUSR1);
163 sigaddset(&mask, SIGUSR2);
164 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
165 EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
166 EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
167 EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
168
169 // Need CAP_EVENT to get notification of a signal in poll(2).
170 kill(me, SIGUSR2);
171
172 struct pollfd poll_fd;
173 poll_fd.revents = 0;
174 poll_fd.events = POLLIN;
175 poll_fd.fd = cap_fd_sig_read;
176 EXPECT_OK(poll(&poll_fd, 1, 400));
177 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
178 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
179
180 poll_fd.fd = cap_fd_all;
181 EXPECT_OK(poll(&poll_fd, 1, 400));
182 EXPECT_NE(0, (poll_fd.revents & POLLIN));
183 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
184 }
185
TEST(Linux,EventFD)186 TEST(Linux, EventFD) {
187 int fd = eventfd(0, 0);
188 EXPECT_OK(fd);
189
190 cap_rights_t r_rs;
191 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
192 cap_rights_t r_ws;
193 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
194 cap_rights_t r_rws;
195 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
196 cap_rights_t r_rwspoll;
197 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
198
199 int cap_ro = dup(fd);
200 EXPECT_OK(cap_ro);
201 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
202 int cap_wo = dup(fd);
203 EXPECT_OK(cap_wo);
204 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
205 int cap_rw = dup(fd);
206 EXPECT_OK(cap_rw);
207 EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
208 int cap_all = dup(fd);
209 EXPECT_OK(cap_all);
210 EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
211
212 pid_t child = fork();
213 if (child == 0) {
214 // Child: write counter to eventfd
215 uint64_t u = 42;
216 EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
217 EXPECT_OK(write(cap_wo, &u, sizeof(u)));
218 exit(HasFailure());
219 }
220
221 sleep(1); // Allow child to write
222
223 struct pollfd poll_fd;
224 poll_fd.revents = 0;
225 poll_fd.events = POLLIN;
226 poll_fd.fd = cap_rw;
227 EXPECT_OK(poll(&poll_fd, 1, 400));
228 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
229 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
230
231 poll_fd.fd = cap_all;
232 EXPECT_OK(poll(&poll_fd, 1, 400));
233 EXPECT_NE(0, (poll_fd.revents & POLLIN));
234 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
235
236 uint64_t u;
237 EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
238 EXPECT_OK(read(cap_ro, &u, sizeof(u)));
239 EXPECT_EQ(42, (int)u);
240
241 // Wait for the child.
242 int status;
243 EXPECT_EQ(child, waitpid(child, &status, 0));
244 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
245 EXPECT_EQ(0, rc);
246
247 close(cap_all);
248 close(cap_rw);
249 close(cap_wo);
250 close(cap_ro);
251 close(fd);
252 }
253
FORK_TEST(Linux,epoll)254 FORK_TEST(Linux, epoll) {
255 int sock_fds[2];
256 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
257 // Queue some data.
258 char buffer[4] = {1, 2, 3, 4};
259 EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
260
261 EXPECT_OK(cap_enter()); // Enter capability mode.
262
263 int epoll_fd = epoll_create(1);
264 EXPECT_OK(epoll_fd);
265
266 cap_rights_t r_rs;
267 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
268 cap_rights_t r_ws;
269 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
270 cap_rights_t r_rws;
271 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
272 cap_rights_t r_rwspoll;
273 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
274 cap_rights_t r_epoll;
275 cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
276
277 int cap_epoll_wo = dup(epoll_fd);
278 EXPECT_OK(cap_epoll_wo);
279 EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
280 int cap_epoll_ro = dup(epoll_fd);
281 EXPECT_OK(cap_epoll_ro);
282 EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
283 int cap_epoll_rw = dup(epoll_fd);
284 EXPECT_OK(cap_epoll_rw);
285 EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
286 int cap_epoll_poll = dup(epoll_fd);
287 EXPECT_OK(cap_epoll_poll);
288 EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
289 int cap_epoll_ctl = dup(epoll_fd);
290 EXPECT_OK(cap_epoll_ctl);
291 EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
292
293 // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
294 struct epoll_event eev;
295 memset(&eev, 0, sizeof(eev));
296 eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
297 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
298 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
299 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
300 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
301 eev.events = EPOLLIN|EPOLLOUT;
302 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
303 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
304 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
305 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
306
307 // Running epoll_pwait(2) requires CAP_EVENT.
308 eev.events = 0;
309 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
310 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
311 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
312 EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
313 EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
314
315 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
316 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
317 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
318 EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
319
320 close(cap_epoll_ctl);
321 close(cap_epoll_poll);
322 close(cap_epoll_rw);
323 close(cap_epoll_ro);
324 close(cap_epoll_wo);
325 close(epoll_fd);
326 close(sock_fds[1]);
327 close(sock_fds[0]);
328 }
329
TEST(Linux,fstatat)330 TEST(Linux, fstatat) {
331 int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
332 EXPECT_OK(fd);
333 unsigned char buffer[] = {1, 2, 3, 4};
334 EXPECT_OK(write(fd, buffer, sizeof(buffer)));
335 cap_rights_t rights;
336 int cap_rf = dup(fd);
337 EXPECT_OK(cap_rf);
338 EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
339 int cap_ro = dup(fd);
340 EXPECT_OK(cap_ro);
341 EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
342
343 struct stat info;
344 EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
345 EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
346 EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
347
348 close(cap_ro);
349 close(cap_rf);
350 close(fd);
351
352 int dir = open(tmpdir.c_str(), O_RDONLY);
353 EXPECT_OK(dir);
354 int dir_rf = dup(dir);
355 EXPECT_OK(dir_rf);
356 EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
357 int dir_ro = dup(fd);
358 EXPECT_OK(dir_ro);
359 EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
360
361 EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
362 EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
363 EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
364
365 close(dir_ro);
366 close(dir_rf);
367 close(dir);
368
369 unlink(TmpFile("cap_fstatat"));
370 }
371
372 // fanotify support may not be available at compile-time
373 #ifdef __NR_fanotify_init
TEST(Linux,FanotifyIfRoot)374 TEST(Linux, FanotifyIfRoot) {
375 GTEST_SKIP_IF_NOT_ROOT();
376 int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
377 EXPECT_OK(fa_fd);
378 if (fa_fd < 0) return; // May not be enabled
379
380 cap_rights_t r_rs;
381 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
382 cap_rights_t r_ws;
383 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
384 cap_rights_t r_rws;
385 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
386 cap_rights_t r_rwspoll;
387 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
388 cap_rights_t r_rwsnotify;
389 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
390 cap_rights_t r_rsl;
391 cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
392 cap_rights_t r_rslstat;
393 cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
394 cap_rights_t r_rsstat;
395 cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
396
397 int cap_fd_ro = dup(fa_fd);
398 EXPECT_OK(cap_fd_ro);
399 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
400 int cap_fd_wo = dup(fa_fd);
401 EXPECT_OK(cap_fd_wo);
402 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
403 int cap_fd_rw = dup(fa_fd);
404 EXPECT_OK(cap_fd_rw);
405 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
406 int cap_fd_poll = dup(fa_fd);
407 EXPECT_OK(cap_fd_poll);
408 EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
409 int cap_fd_not = dup(fa_fd);
410 EXPECT_OK(cap_fd_not);
411 EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
412
413 int rc = mkdir(TmpFile("cap_notify"), 0755);
414 EXPECT_TRUE(rc == 0 || errno == EEXIST);
415 int dfd = open(TmpFile("cap_notify"), O_RDONLY);
416 EXPECT_OK(dfd);
417 int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
418 close(fd);
419 int cap_dfd = dup(dfd);
420 EXPECT_OK(cap_dfd);
421 EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
422 EXPECT_OK(cap_dfd);
423 int cap_dfd_rs = dup(dfd);
424 EXPECT_OK(cap_dfd_rs);
425 EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
426 EXPECT_OK(cap_dfd_rs);
427 int cap_dfd_rsstat = dup(dfd);
428 EXPECT_OK(cap_dfd_rsstat);
429 EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
430 EXPECT_OK(cap_dfd_rsstat);
431 int cap_dfd_rsl = dup(dfd);
432 EXPECT_OK(cap_dfd_rsl);
433 EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
434 EXPECT_OK(cap_dfd_rsl);
435
436 // Need CAP_NOTIFY to change what's monitored.
437 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
438 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
439 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
440 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
441
442 // Need CAP_FSTAT on the thing monitored.
443 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
444 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
445
446 // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
447 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
448 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
449 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
450
451 pid_t child = fork();
452 if (child == 0) {
453 // Child: Perform activity in the directory under notify.
454 sleep(1);
455 unlink(TmpFile("cap_notify/temp"));
456 int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
457 close(fd);
458 exit(0);
459 }
460
461 // Need CAP_EVENT to poll.
462 struct pollfd poll_fd;
463 poll_fd.revents = 0;
464 poll_fd.events = POLLIN;
465 poll_fd.fd = cap_fd_rw;
466 EXPECT_OK(poll(&poll_fd, 1, 1400));
467 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
468 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
469
470 poll_fd.fd = cap_fd_not;
471 EXPECT_OK(poll(&poll_fd, 1, 1400));
472 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
473 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
474
475 poll_fd.fd = cap_fd_poll;
476 EXPECT_OK(poll(&poll_fd, 1, 1400));
477 EXPECT_NE(0, (poll_fd.revents & POLLIN));
478 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
479
480 // Need CAP_READ to read.
481 struct fanotify_event_metadata ev;
482 memset(&ev, 0, sizeof(ev));
483 EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
484 rc = read(fa_fd, &ev, sizeof(ev));
485 EXPECT_OK(rc);
486 EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
487 EXPECT_EQ(child, ev.pid);
488 EXPECT_NE(0, ev.fd);
489
490 // TODO(drysdale): reinstate if/when capsicum-linux propagates rights
491 // to fanotify-generated FDs.
492 #ifdef OMIT
493 // fanotify(7) gives us a FD for the changed file. This should
494 // only have rights that are a subset of those for the original
495 // monitored directory file descriptor.
496 cap_rights_t rights;
497 CAP_SET_ALL(&rights);
498 EXPECT_OK(cap_rights_get(ev.fd, &rights));
499 EXPECT_RIGHTS_IN(&rights, &r_rslstat);
500 #endif
501
502 // Wait for the child.
503 int status;
504 EXPECT_EQ(child, waitpid(child, &status, 0));
505 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
506 EXPECT_EQ(0, rc);
507
508 close(cap_dfd_rsstat);
509 close(cap_dfd_rsl);
510 close(cap_dfd_rs);
511 close(cap_dfd);
512 close(dfd);
513 unlink(TmpFile("cap_notify/file"));
514 unlink(TmpFile("cap_notify/temp"));
515 rmdir(TmpFile("cap_notify"));
516 close(cap_fd_not);
517 close(cap_fd_poll);
518 close(cap_fd_rw);
519 close(cap_fd_wo);
520 close(cap_fd_ro);
521 close(fa_fd);
522 }
523 #endif
524
TEST(Linux,inotify)525 TEST(Linux, inotify) {
526 int i_fd = inotify_init();
527 EXPECT_OK(i_fd);
528
529 cap_rights_t r_rs;
530 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
531 cap_rights_t r_ws;
532 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
533 cap_rights_t r_rws;
534 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
535 cap_rights_t r_rwsnotify;
536 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
537
538 int cap_fd_ro = dup(i_fd);
539 EXPECT_OK(cap_fd_ro);
540 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
541 int cap_fd_wo = dup(i_fd);
542 EXPECT_OK(cap_fd_wo);
543 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
544 int cap_fd_rw = dup(i_fd);
545 EXPECT_OK(cap_fd_rw);
546 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
547 int cap_fd_all = dup(i_fd);
548 EXPECT_OK(cap_fd_all);
549 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
550
551 int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
552 EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
553 int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
554 EXPECT_OK(wd);
555
556 unsigned char buffer[] = {1, 2, 3, 4};
557 EXPECT_OK(write(fd, buffer, sizeof(buffer)));
558
559 struct inotify_event iev;
560 memset(&iev, 0, sizeof(iev));
561 EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
562 int rc = read(cap_fd_ro, &iev, sizeof(iev));
563 EXPECT_OK(rc);
564 EXPECT_EQ((int)sizeof(iev), rc);
565 EXPECT_EQ(wd, iev.wd);
566
567 EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
568 EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
569
570 close(fd);
571 close(cap_fd_all);
572 close(cap_fd_rw);
573 close(cap_fd_wo);
574 close(cap_fd_ro);
575 close(i_fd);
576 unlink(TmpFile("cap_inotify"));
577 }
578
TEST(Linux,ArchChangeIfAvailable)579 TEST(Linux, ArchChangeIfAvailable) {
580 const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
581 const char* progs[] = {NULL, NULL, NULL};
582 char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
583 char* null_envp[] = {NULL};
584 int fds[3];
585 int count = 0;
586
587 for (int ii = 0; ii < 3; ii++) {
588 fds[count] = open(prog_candidates[ii], O_RDONLY);
589 if (fds[count] >= 0) {
590 progs[count] = prog_candidates[ii];
591 count++;
592 }
593 }
594 if (count == 0) {
595 GTEST_SKIP() << "no different-architecture programs available";
596 }
597
598 for (int ii = 0; ii < count; ii++) {
599 // Fork-and-exec a binary of this architecture.
600 pid_t child = fork();
601 if (child == 0) {
602 EXPECT_OK(cap_enter()); // Enter capability mode
603 if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
604 getpid_(), progs[ii], argv_pass[1]);
605 argv_pass[0] = (char *)progs[ii];
606 int rc = fexecve_(fds[ii], argv_pass, null_envp);
607 fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
608 exit(99); // Should not reach here.
609 }
610 int status;
611 EXPECT_EQ(child, waitpid(child, &status, 0));
612 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
613 EXPECT_EQ(0, rc);
614 close(fds[ii]);
615 }
616 }
617
FORK_TEST(Linux,NamespaceIfRoot)618 FORK_TEST(Linux, NamespaceIfRoot) {
619 GTEST_SKIP_IF_NOT_ROOT();
620 pid_t me = getpid_();
621
622 // Create a new UTS namespace.
623 EXPECT_OK(unshare(CLONE_NEWUTS));
624 // Open an FD to its symlink.
625 char buffer[256];
626 sprintf(buffer, "/proc/%d/ns/uts", me);
627 int ns_fd = open(buffer, O_RDONLY);
628
629 cap_rights_t r_rwlstat;
630 cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
631 cap_rights_t r_rwlstatns;
632 cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
633
634 int cap_fd = dup(ns_fd);
635 EXPECT_OK(cap_fd);
636 EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
637 int cap_fd_setns = dup(ns_fd);
638 EXPECT_OK(cap_fd_setns);
639 EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
640 EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
641 EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
642
643 EXPECT_OK(cap_enter()); // Enter capability mode.
644
645 // No setns(2) but unshare(2) is allowed.
646 EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
647 EXPECT_OK(unshare(CLONE_NEWUTS));
648 }
649
SendFD(int fd,int over)650 static void SendFD(int fd, int over) {
651 struct msghdr mh;
652 mh.msg_name = NULL; // No address needed
653 mh.msg_namelen = 0;
654 char buffer1[1024];
655 struct iovec iov[1];
656 iov[0].iov_base = buffer1;
657 iov[0].iov_len = sizeof(buffer1);
658 mh.msg_iov = iov;
659 mh.msg_iovlen = 1;
660 char buffer2[1024];
661 mh.msg_control = buffer2;
662 mh.msg_controllen = CMSG_LEN(sizeof(int));
663 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
664 cmptr->cmsg_level = SOL_SOCKET;
665 cmptr->cmsg_type = SCM_RIGHTS;
666 cmptr->cmsg_len = CMSG_LEN(sizeof(int));
667 *(int *)CMSG_DATA(cmptr) = fd;
668 buffer1[0] = 0;
669 iov[0].iov_len = 1;
670 int rc = sendmsg(over, &mh, 0);
671 EXPECT_OK(rc);
672 }
673
ReceiveFD(int over)674 static int ReceiveFD(int over) {
675 struct msghdr mh;
676 mh.msg_name = NULL; // No address needed
677 mh.msg_namelen = 0;
678 char buffer1[1024];
679 struct iovec iov[1];
680 iov[0].iov_base = buffer1;
681 iov[0].iov_len = sizeof(buffer1);
682 mh.msg_iov = iov;
683 mh.msg_iovlen = 1;
684 char buffer2[1024];
685 mh.msg_control = buffer2;
686 mh.msg_controllen = sizeof(buffer2);
687 int rc = recvmsg(over, &mh, 0);
688 EXPECT_OK(rc);
689 EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
690 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
691 int fd = *(int*)CMSG_DATA(cmptr);
692 EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
693 cmptr = CMSG_NXTHDR(&mh, cmptr);
694 EXPECT_TRUE(cmptr == NULL);
695 return fd;
696 }
697
698 static int shared_pd = -1;
699 static int shared_sock_fds[2];
700
ChildFunc(void * arg)701 static int ChildFunc(void *arg) {
702 // This function is running in a new PID namespace, and so is pid 1.
703 if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
704 EXPECT_EQ(1, getpid_());
705 EXPECT_EQ(0, getppid());
706
707 // The shared process descriptor is outside our namespace, so we cannot
708 // get its pid.
709 if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd);
710 pid_t shared_child = -1;
711 EXPECT_OK(pdgetpid(shared_pd, &shared_child));
712 if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child);
713 EXPECT_EQ(0, shared_child);
714
715 // But we can pdkill() it even so.
716 if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd);
717 EXPECT_OK(pdkill(shared_pd, SIGINT));
718
719 int pd;
720 pid_t child = pdfork(&pd, 0);
721 EXPECT_OK(child);
722 if (child == 0) {
723 // Child: expect pid 2.
724 if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
725 EXPECT_EQ(2, getpid_());
726 EXPECT_EQ(1, getppid());
727 while (true) {
728 if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n");
729 sleep(1);
730 }
731 exit(0);
732 }
733 EXPECT_EQ(2, child);
734 EXPECT_PID_ALIVE(child);
735 if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
736 pd, child, ProcessState(child));
737
738 pid_t pid;
739 EXPECT_OK(pdgetpid(pd, &pid));
740 EXPECT_EQ(child, pid);
741
742 sleep(2);
743
744 // Send the process descriptor over UNIX domain socket back to parent.
745 SendFD(pd, shared_sock_fds[1]);
746
747 // Wait for death of (grand)child, killed by our parent.
748 if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child);
749 int status;
750 EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
751
752 if (verbose) fprintf(stderr, " ChildFunc: return 0\n");
753 return 0;
754 }
755
756 #define STACK_SIZE (1024 * 1024)
757 static char child_stack[STACK_SIZE];
758
759 // TODO(drysdale): fork into a user namespace first so GTEST_SKIP_IF_NOT_ROOT can be removed.
TEST(Linux,PidNamespacePdForkIfRoot)760 TEST(Linux, PidNamespacePdForkIfRoot) {
761 GTEST_SKIP_IF_NOT_ROOT();
762 // Pass process descriptors in both directions across a PID namespace boundary.
763 // pdfork() off a child before we start, holding its process descriptor in a global
764 // variable that's accessible to children.
765 pid_t firstborn = pdfork(&shared_pd, 0);
766 EXPECT_OK(firstborn);
767 if (firstborn == 0) {
768 while (true) {
769 if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n");
770 sleep(1);
771 }
772 exit(0);
773 }
774 EXPECT_PID_ALIVE(firstborn);
775 if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
776 shared_pd, firstborn, ProcessState(firstborn));
777 sleep(2);
778
779 // Prepare sockets to communicate with child process.
780 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
781
782 // Clone into a child process with a new pid namespace.
783 pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
784 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
785 EXPECT_OK(child);
786 EXPECT_PID_ALIVE(child);
787 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
788
789 // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd.
790 sleep(1);
791 EXPECT_PID_DEAD(firstborn);
792
793 // But we can still retrieve firstborn's PID, as it's not been reaped yet.
794 pid_t child0;
795 EXPECT_OK(pdgetpid(shared_pd, &child0));
796 EXPECT_EQ(firstborn, child0);
797 if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
798 shared_pd, child0, ProcessState(child0));
799
800 // Now reap it.
801 int status;
802 EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
803
804 // Get the process descriptor of the child-of-child via socket transfer.
805 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
806
807 // Our notion of the pid associated with the grandchild is in the main PID namespace.
808 pid_t grandchild;
809 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
810 EXPECT_NE(2, grandchild);
811 if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
812 grandchild_pd, grandchild, ProcessState(grandchild));
813 EXPECT_PID_ALIVE(grandchild);
814
815 // Kill the grandchild via the process descriptor.
816 EXPECT_OK(pdkill(grandchild_pd, SIGINT));
817 usleep(10000);
818 if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
819 grandchild_pd, grandchild, ProcessState(grandchild));
820 EXPECT_PID_DEAD(grandchild);
821
822 sleep(2);
823
824 // Wait for the child.
825 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
826 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
827 EXPECT_EQ(0, rc);
828
829 close(shared_sock_fds[0]);
830 close(shared_sock_fds[1]);
831 close(shared_pd);
832 close(grandchild_pd);
833 }
834
NSInit(void * data)835 int NSInit(void *data) {
836 // This function is running in a new PID namespace, and so is pid 1.
837 if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
838 EXPECT_EQ(1, getpid_());
839 EXPECT_EQ(0, getppid());
840
841 int pd;
842 pid_t child = pdfork(&pd, 0);
843 EXPECT_OK(child);
844 if (child == 0) {
845 // Child: loop forever until terminated.
846 if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
847 while (true) {
848 if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n");
849 usleep(100000);
850 }
851 exit(0);
852 }
853 EXPECT_EQ(2, child);
854 EXPECT_PID_ALIVE(child);
855 if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
856 pd, child, ProcessState(child));
857 sleep(1);
858
859 // Send the process descriptor over UNIX domain socket back to parent.
860 SendFD(pd, shared_sock_fds[1]);
861 close(pd);
862
863 // Wait for a byte back in the other direction.
864 int value;
865 if (verbose) fprintf(stderr, " NSInit: block waiting for value\n");
866 read(shared_sock_fds[1], &value, sizeof(value));
867
868 if (verbose) fprintf(stderr, " NSInit: return 0\n");
869 return 0;
870 }
871
TEST(Linux,DeadNSInitIfRoot)872 TEST(Linux, DeadNSInitIfRoot) {
873 GTEST_SKIP_IF_NOT_ROOT();
874
875 // Prepare sockets to communicate with child process.
876 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
877
878 // Clone into a child process with a new pid namespace.
879 pid_t child = clone(NSInit, child_stack + STACK_SIZE,
880 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
881 usleep(10000);
882 EXPECT_OK(child);
883 EXPECT_PID_ALIVE(child);
884 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
885
886 // Get the process descriptor of the child-of-child via socket transfer.
887 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
888 pid_t grandchild;
889 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
890 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
891
892 // Send an int to the child to trigger its termination. Grandchild should also
893 // go, as its init process is gone.
894 int zero = 0;
895 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
896 write(shared_sock_fds[0], &zero, sizeof(zero));
897 EXPECT_PID_ZOMBIE(child);
898 EXPECT_PID_GONE(grandchild);
899
900 // Wait for the child.
901 int status;
902 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
903 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
904 EXPECT_EQ(0, rc);
905 EXPECT_PID_GONE(child);
906
907 close(shared_sock_fds[0]);
908 close(shared_sock_fds[1]);
909 close(grandchild_pd);
910
911 if (verbose) {
912 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
913 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
914 }
915 }
916
TEST(Linux,DeadNSInit2IfRoot)917 TEST(Linux, DeadNSInit2IfRoot) {
918 GTEST_SKIP_IF_NOT_ROOT();
919
920 // Prepare sockets to communicate with child process.
921 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
922
923 // Clone into a child process with a new pid namespace.
924 pid_t child = clone(NSInit, child_stack + STACK_SIZE,
925 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
926 usleep(10000);
927 EXPECT_OK(child);
928 EXPECT_PID_ALIVE(child);
929 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
930
931 // Get the process descriptor of the child-of-child via socket transfer.
932 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
933 pid_t grandchild;
934 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
935 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
936
937 // Kill the grandchild
938 EXPECT_OK(pdkill(grandchild_pd, SIGINT));
939 usleep(10000);
940 EXPECT_PID_ZOMBIE(grandchild);
941 // Close the process descriptor, so there are now no procdesc references to grandchild.
942 close(grandchild_pd);
943
944 // Send an int to the child to trigger its termination. Grandchild should also
945 // go, as its init process is gone.
946 int zero = 0;
947 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
948 write(shared_sock_fds[0], &zero, sizeof(zero));
949 EXPECT_PID_ZOMBIE(child);
950 EXPECT_PID_GONE(grandchild);
951
952 // Wait for the child.
953 int status;
954 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
955 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
956 EXPECT_EQ(0, rc);
957
958 close(shared_sock_fds[0]);
959 close(shared_sock_fds[1]);
960
961 if (verbose) {
962 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
963 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
964 }
965 }
966
967 #ifdef __x86_64__
FORK_TEST(Linux,CheckHighWord)968 FORK_TEST(Linux, CheckHighWord) {
969 EXPECT_OK(cap_enter()); // Enter capability mode.
970
971 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
972 EXPECT_OK(rc);
973 EXPECT_EQ(1, rc); // no_new_privs = 1
974
975 // Set some of the high 32-bits of argument zero.
976 uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
977 EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
978 }
979 #endif
980
FORK_TEST(Linux,PrctlOpenatBeneath)981 FORK_TEST(Linux, PrctlOpenatBeneath) {
982 // Set no_new_privs = 1
983 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
984 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
985 EXPECT_OK(rc);
986 EXPECT_EQ(1, rc); // no_new_privs = 1
987
988 // Set openat-beneath mode
989 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
990 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
991 EXPECT_OK(rc);
992 EXPECT_EQ(1, rc); // openat_beneath = 1
993
994 // Clear openat-beneath mode
995 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
996 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
997 EXPECT_OK(rc);
998 EXPECT_EQ(0, rc); // openat_beneath = 0
999
1000 EXPECT_OK(cap_enter()); // Enter capability mode
1001
1002 // Expect to be in openat_beneath mode
1003 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1004 EXPECT_OK(rc);
1005 EXPECT_EQ(1, rc); // openat_beneath = 1
1006
1007 // Expect this to be immutable.
1008 EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
1009 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1010 EXPECT_OK(rc);
1011 EXPECT_EQ(1, rc); // openat_beneath = 1
1012
1013 }
1014
FORK_TEST(Linux,NoNewPrivs)1015 FORK_TEST(Linux, NoNewPrivs) {
1016 if (getuid() == 0) {
1017 // If root, drop CAP_SYS_ADMIN POSIX.1e capability.
1018 struct __user_cap_header_struct hdr;
1019 hdr.version = _LINUX_CAPABILITY_VERSION_3;
1020 hdr.pid = getpid_();
1021 struct __user_cap_data_struct data[3];
1022 EXPECT_OK(capget(&hdr, &data[0]));
1023 data[0].effective &= ~(1 << CAP_SYS_ADMIN);
1024 data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
1025 data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
1026 EXPECT_OK(capset(&hdr, &data[0]));
1027 }
1028 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1029 EXPECT_OK(rc);
1030 EXPECT_EQ(0, rc); // no_new_privs == 0
1031
1032 // Can't enter seccomp-bpf mode with no_new_privs == 0
1033 struct sock_filter filter[] = {
1034 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1035 };
1036 struct sock_fprog bpf;
1037 bpf.len = (sizeof(filter) / sizeof(filter[0]));
1038 bpf.filter = filter;
1039 rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
1040 EXPECT_EQ(-1, rc);
1041 EXPECT_EQ(EACCES, errno);
1042
1043 // Set no_new_privs = 1
1044 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1045 rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1046 EXPECT_OK(rc);
1047 EXPECT_EQ(1, rc); // no_new_privs = 1
1048
1049 // Can now turn on seccomp mode
1050 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1051 }
1052
1053 /* Macros for BPF generation */
1054 #define BPF_RETURN_ERRNO(err) \
1055 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
1056 #define BPF_KILL_PROCESS \
1057 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
1058 #define BPF_ALLOW \
1059 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1060 #define EXAMINE_SYSCALL \
1061 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
1062 #define ALLOW_SYSCALL(name) \
1063 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1064 BPF_ALLOW
1065 #define KILL_SYSCALL(name) \
1066 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1067 BPF_KILL_PROCESS
1068 #define FAIL_SYSCALL(name, err) \
1069 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1070 BPF_RETURN_ERRNO(err)
1071
TEST(Linux,CapModeWithBPF)1072 TEST(Linux, CapModeWithBPF) {
1073 pid_t child = fork();
1074 EXPECT_OK(child);
1075 if (child == 0) {
1076 int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
1077 cap_rights_t rights;
1078 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1079 EXPECT_OK(cap_rights_limit(fd, &rights));
1080
1081 struct sock_filter filter[] = { EXAMINE_SYSCALL,
1082 FAIL_SYSCALL(fchmod, ENOMEM),
1083 FAIL_SYSCALL(fstat, ENOEXEC),
1084 ALLOW_SYSCALL(close),
1085 KILL_SYSCALL(fsync),
1086 BPF_ALLOW };
1087 struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
1088 .filter = filter};
1089 // Set up seccomp-bpf first.
1090 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1091 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1092
1093 EXPECT_OK(cap_enter()); // Enter capability mode.
1094
1095 // fchmod is allowed by Capsicum, but failed by BPF.
1096 EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
1097 // open is allowed by BPF, but failed by Capsicum
1098 EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
1099 // fstat is failed by both BPF and Capsicum; tie-break is on errno
1100 struct stat buf;
1101 EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
1102 // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
1103 fsync(fd); // terminate with unhandled SIGSYS
1104 exit(0);
1105 }
1106 int status;
1107 EXPECT_EQ(child, waitpid(child, &status, 0));
1108 EXPECT_TRUE(WIFSIGNALED(status));
1109 EXPECT_EQ(SIGSYS, WTERMSIG(status));
1110 unlink(TmpFile("cap_bpf_capmode"));
1111 }
1112
TEST(Linux,AIO)1113 TEST(Linux, AIO) {
1114 int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
1115 EXPECT_OK(fd);
1116
1117 cap_rights_t r_rs;
1118 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1119 cap_rights_t r_ws;
1120 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1121 cap_rights_t r_rwssync;
1122 cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1123
1124 int cap_ro = dup(fd);
1125 EXPECT_OK(cap_ro);
1126 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
1127 EXPECT_OK(cap_ro);
1128 int cap_wo = dup(fd);
1129 EXPECT_OK(cap_wo);
1130 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
1131 EXPECT_OK(cap_wo);
1132 int cap_all = dup(fd);
1133 EXPECT_OK(cap_all);
1134 EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
1135 EXPECT_OK(cap_all);
1136
1137 // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
1138 aio_context_t ctx = 0;
1139 EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
1140
1141 unsigned char buffer[32] = {1, 2, 3, 4};
1142 struct iocb req;
1143 memset(&req, 0, sizeof(req));
1144 req.aio_reqprio = 0;
1145 req.aio_fildes = fd;
1146 uintptr_t bufaddr = (uintptr_t)buffer;
1147 req.aio_buf = (__u64)bufaddr;
1148 req.aio_nbytes = 4;
1149 req.aio_offset = 0;
1150 struct iocb* reqs[1] = {&req};
1151
1152 // Write operation
1153 req.aio_lio_opcode = IOCB_CMD_PWRITE;
1154 req.aio_fildes = cap_ro;
1155 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1156 req.aio_fildes = cap_wo;
1157 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1158
1159 // Sync operation
1160 req.aio_lio_opcode = IOCB_CMD_FSYNC;
1161 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1162 req.aio_lio_opcode = IOCB_CMD_FDSYNC;
1163 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1164 // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
1165 req.aio_fildes = cap_all;
1166 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1167 req.aio_lio_opcode = IOCB_CMD_FSYNC;
1168 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1169
1170 // Read operation
1171 req.aio_lio_opcode = IOCB_CMD_PREAD;
1172 req.aio_fildes = cap_wo;
1173 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1174 req.aio_fildes = cap_ro;
1175 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1176
1177 EXPECT_OK(syscall(__NR_io_destroy, ctx));
1178
1179 close(cap_all);
1180 close(cap_wo);
1181 close(cap_ro);
1182 close(fd);
1183 unlink(TmpFile("cap_aio"));
1184 }
1185
1186 #ifndef KCMP_FILE
1187 #define KCMP_FILE 0
1188 #endif
TEST(Linux,KcmpIfAvailable)1189 TEST(Linux, KcmpIfAvailable) {
1190 // This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
1191 int fd = open("/etc/passwd", O_RDONLY);
1192 EXPECT_OK(fd);
1193 pid_t parent = getpid_();
1194
1195 errno = 0;
1196 int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
1197 if (rc == -1 && errno == ENOSYS) {
1198 GTEST_SKIP() << "kcmp(2) gives -ENOSYS";
1199 }
1200
1201 pid_t child = fork();
1202 if (child == 0) {
1203 // Child: limit rights on FD.
1204 child = getpid_();
1205 EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1206 cap_rights_t rights;
1207 cap_rights_init(&rights, CAP_READ, CAP_WRITE);
1208 EXPECT_OK(cap_rights_limit(fd, &rights));
1209 // A capability wrapping a normal FD is different (from a kcmp(2) perspective)
1210 // than the original file.
1211 EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1212 exit(HasFailure());
1213 }
1214 // Wait for the child.
1215 int status;
1216 EXPECT_EQ(child, waitpid(child, &status, 0));
1217 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
1218 EXPECT_EQ(0, rc);
1219
1220 close(fd);
1221 }
1222
TEST(Linux,ProcFS)1223 TEST(Linux, ProcFS) {
1224 cap_rights_t rights;
1225 cap_rights_init(&rights, CAP_READ, CAP_SEEK);
1226 int fd = open("/etc/passwd", O_RDONLY);
1227 EXPECT_OK(fd);
1228 lseek(fd, 4, SEEK_SET);
1229 int cap = dup(fd);
1230 EXPECT_OK(cap);
1231 EXPECT_OK(cap_rights_limit(cap, &rights));
1232 pid_t me = getpid_();
1233
1234 char buffer[1024];
1235 sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
1236 int procfd = open(buffer, O_RDONLY);
1237 EXPECT_OK(procfd) << " failed to open " << buffer;
1238 if (procfd < 0) return;
1239 int proccap = dup(procfd);
1240 EXPECT_OK(proccap);
1241 EXPECT_OK(cap_rights_limit(proccap, &rights));
1242
1243 EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
1244 // The fdinfo should include the file pos of the underlying file
1245 EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
1246 // ...and the rights of the Capsicum capability.
1247 EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
1248
1249 close(procfd);
1250 close(proccap);
1251 close(cap);
1252 close(fd);
1253 }
1254
FORK_TEST(Linux,ProcessClocks)1255 FORK_TEST(Linux, ProcessClocks) {
1256 pid_t self = getpid_();
1257 pid_t child = fork();
1258 EXPECT_OK(child);
1259 if (child == 0) {
1260 child = getpid_();
1261 usleep(100000);
1262 exit(0);
1263 }
1264
1265 EXPECT_OK(cap_enter()); // Enter capability mode.
1266
1267 // Nefariously build a clock ID for the child's CPU time.
1268 // This relies on knowledge of the internal layout of clock IDs.
1269 clockid_t child_clock;
1270 child_clock = ((~child) << 3) | 0x0;
1271 struct timespec ts;
1272 memset(&ts, 0, sizeof(ts));
1273
1274 // TODO(drysdale): Should not be possible to retrieve info about a
1275 // different process, as the PID global namespace should be locked
1276 // down.
1277 EXPECT_OK(clock_gettime(child_clock, &ts));
1278 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
1279 self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1280
1281 child_clock = ((~1) << 3) | 0x0;
1282 memset(&ts, 0, sizeof(ts));
1283 EXPECT_OK(clock_gettime(child_clock, &ts));
1284 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
1285 self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1286
1287 // Orphan the child.
1288 }
1289
TEST(Linux,SetLease)1290 TEST(Linux, SetLease) {
1291 int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
1292 EXPECT_OK(fd_all);
1293 int fd_rw = dup(fd_all);
1294 EXPECT_OK(fd_rw);
1295
1296 cap_rights_t r_all;
1297 cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
1298 EXPECT_OK(cap_rights_limit(fd_all, &r_all));
1299
1300 cap_rights_t r_rw;
1301 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
1302 EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
1303
1304 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
1305 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
1306
1307 if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases
1308 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
1309 EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
1310
1311 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
1312 EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
1313 }
1314 close(fd_all);
1315 close(fd_rw);
1316 unlink(TmpFile("cap_lease"));
1317 }
1318
TEST(Linux,InvalidRightsSyscall)1319 TEST(Linux, InvalidRightsSyscall) {
1320 int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
1321 EXPECT_OK(fd);
1322
1323 cap_rights_t rights;
1324 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
1325
1326 // Use the raw syscall throughout.
1327 EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1328
1329 // Directly access the syscall, and find all unseemly manner of use for it.
1330 // - Invalid flags
1331 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
1332 EXPECT_EQ(EINVAL, errno);
1333 // - Specify an fcntl subright, but no CAP_FCNTL set
1334 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
1335 EXPECT_EQ(EINVAL, errno);
1336 // - Specify an ioctl subright, but no CAP_IOCTL set
1337 unsigned int ioctl1 = 1;
1338 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
1339 EXPECT_EQ(EINVAL, errno);
1340 // - N ioctls, but null pointer passed
1341 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
1342 EXPECT_EQ(EINVAL, errno);
1343 // - Invalid nioctls
1344 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
1345 EXPECT_EQ(EINVAL, errno);
1346 // - Null primary rights
1347 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
1348 EXPECT_EQ(EFAULT, errno);
1349 // - Invalid index bitmask
1350 rights.cr_rights[0] |= 3ULL << 57;
1351 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1352 EXPECT_EQ(EINVAL, errno);
1353 // - Invalid version
1354 rights.cr_rights[0] |= 2ULL << 62;
1355 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1356 EXPECT_EQ(EINVAL, errno);
1357
1358 close(fd);
1359 unlink(TmpFile("cap_invalid_rights"));
1360 }
1361
1362 FORK_TEST_ON(Linux, OpenByHandleAtIfRoot, TmpFile("cap_openbyhandle_testfile")) {
1363 GTEST_SKIP_IF_NOT_ROOT();
1364 int dir = open(tmpdir.c_str(), O_RDONLY);
1365 EXPECT_OK(dir);
1366 int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
1367 EXPECT_OK(fd);
1368 const char* message = "Saved text";
1369 EXPECT_OK(write(fd, message, strlen(message)));
1370 close(fd);
1371
1372 struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
1373 fhandle->handle_bytes = MAX_HANDLE_SZ;
1374 int mount_id;
1375 EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0));
1376
1377 fd = open_by_handle_at(dir, fhandle, O_RDONLY);
1378 EXPECT_OK(fd);
1379 char buffer[200];
1380 ssize_t len = read(fd, buffer, 199);
1381 EXPECT_OK(len);
1382 EXPECT_EQ(std::string(message), std::string(buffer, len));
1383 close(fd);
1384
1385 // Cannot issue open_by_handle_at after entering capability mode.
1386 cap_enter();
1387 EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
1388
1389 close(dir);
1390 }
1391
getrandom_(void * buf,size_t buflen,unsigned int flags)1392 int getrandom_(void *buf, size_t buflen, unsigned int flags) {
1393 #ifdef __NR_getrandom
1394 return syscall(__NR_getrandom, buf, buflen, flags);
1395 #else
1396 errno = ENOSYS;
1397 return -1;
1398 #endif
1399 }
1400
1401 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1402 #include <linux/random.h> // Requires 3.17 kernel
FORK_TEST(Linux,GetRandom)1403 FORK_TEST(Linux, GetRandom) {
1404 EXPECT_OK(cap_enter());
1405 unsigned char buffer[1024];
1406 unsigned char buffer2[1024];
1407 EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
1408 EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
1409 EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
1410 }
1411 #endif
1412
memfd_create_(const char * name,unsigned int flags)1413 int memfd_create_(const char *name, unsigned int flags) {
1414 #ifdef __NR_memfd_create
1415 return syscall(__NR_memfd_create, name, flags);
1416 #else
1417 errno = ENOSYS;
1418 return -1;
1419 #endif
1420 }
1421
1422 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1423 #include <linux/memfd.h> // Requires 3.17 kernel
TEST(Linux,MemFDDeathTestIfAvailable)1424 TEST(Linux, MemFDDeathTestIfAvailable) {
1425 int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
1426 if (memfd == -1 && errno == ENOSYS) {
1427 GTEST_SKIP() << "memfd_create(2) gives -ENOSYS";
1428 }
1429 const int LEN = 16;
1430 EXPECT_OK(ftruncate(memfd, LEN));
1431 int memfd_ro = dup(memfd);
1432 int memfd_rw = dup(memfd);
1433 EXPECT_OK(memfd_ro);
1434 EXPECT_OK(memfd_rw);
1435 cap_rights_t rights;
1436 EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
1437 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
1438
1439 unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
1440 EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
1441 unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
1442 EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
1443 EXPECT_EQ(MAP_FAILED,
1444 mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
1445
1446 *p_rw = 42;
1447 EXPECT_EQ(42, *p_ro);
1448 EXPECT_DEATH(*p_ro = 42, "");
1449
1450 #ifndef F_ADD_SEALS
1451 // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
1452 #define _F_LINUX_SPECIFIC_BASE F_SETLEASE
1453 #define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9)
1454 #define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10)
1455 #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
1456 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
1457 #define F_SEAL_GROW 0x0004 /* prevent file from growing */
1458 #define F_SEAL_WRITE 0x0008 /* prevent writes */
1459 #endif
1460
1461 // Reading the seal information requires CAP_FSTAT.
1462 int seals = fcntl(memfd, F_GET_SEALS);
1463 EXPECT_OK(seals);
1464 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1465 int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1466 EXPECT_EQ(seals, seals_ro);
1467 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1468 int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
1469 EXPECT_NOTCAPABLE(seals_rw);
1470
1471 // Fail to seal as a writable mapping exists.
1472 EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1473 EXPECT_EQ(EBUSY, errno);
1474 *p_rw = 42;
1475
1476 // Seal the rw version; need to unmap first.
1477 munmap(p_rw, LEN);
1478 munmap(p_ro, LEN);
1479 EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1480
1481 seals = fcntl(memfd, F_GET_SEALS);
1482 EXPECT_OK(seals);
1483 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1484 seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1485 EXPECT_EQ(seals, seals_ro);
1486 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1487
1488 // Remove the CAP_FCHMOD right, can no longer add seals.
1489 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
1490 EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1491
1492 close(memfd);
1493 close(memfd_ro);
1494 close(memfd_rw);
1495 }
1496 #endif
1497
1498 #else
noop()1499 void noop() {}
1500 #endif
1501