1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
3
4 #define _GNU_SOURCE
5
6 // Needed for linux/fanotify.h
7 typedef struct {
8 int val[2];
9 } __kernel_fsid_t;
10 #define __kernel_fsid_t __kernel_fsid_t
11
12 #include <fcntl.h>
13 #include <sched.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/mount.h>
18 #include <unistd.h>
19 #include <sys/syscall.h>
20 #include <sys/fanotify.h>
21
22 #include "../../kselftest_harness.h"
23 #include "../statmount/statmount.h"
24 #include "../utils.h"
25
26 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
27
28 static const int mark_types[] = {
29 FAN_MARK_FILESYSTEM,
30 FAN_MARK_MOUNT,
31 FAN_MARK_INODE
32 };
33
34 static const int mark_cmds[] = {
35 FAN_MARK_ADD,
36 FAN_MARK_REMOVE,
37 FAN_MARK_FLUSH
38 };
39
40 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
41
FIXTURE(fanotify)42 FIXTURE(fanotify) {
43 int fan_fd[NUM_FAN_FDS];
44 char buf[256];
45 unsigned int rem;
46 void *next;
47 char root_mntpoint[sizeof(root_mntpoint_templ)];
48 int orig_root;
49 int orig_ns_fd;
50 int ns_fd;
51 uint64_t root_id;
52 };
53
FIXTURE_SETUP(fanotify)54 FIXTURE_SETUP(fanotify)
55 {
56 int i, ret;
57
58 self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
59 ASSERT_GE(self->orig_ns_fd, 0);
60
61 ret = setup_userns();
62 ASSERT_EQ(ret, 0);
63
64 self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
65 ASSERT_GE(self->ns_fd, 0);
66
67 strcpy(self->root_mntpoint, root_mntpoint_templ);
68 ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
69
70 self->orig_root = open("/", O_PATH | O_CLOEXEC);
71 ASSERT_GE(self->orig_root, 0);
72
73 ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
74
75 ASSERT_EQ(chroot(self->root_mntpoint), 0);
76
77 ASSERT_EQ(chdir("/"), 0);
78
79 ASSERT_EQ(mkdir("a", 0700), 0);
80
81 ASSERT_EQ(mkdir("b", 0700), 0);
82
83 self->root_id = get_unique_mnt_id("/");
84 ASSERT_NE(self->root_id, 0);
85
86 for (i = 0; i < NUM_FAN_FDS; i++) {
87 int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
88 // Verify that watching tmpfs mounted inside userns is allowed
89 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
90 FAN_OPEN, AT_FDCWD, "/");
91 ASSERT_EQ(ret, 0);
92 // ...but watching entire orig root filesystem is not allowed
93 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
94 FAN_OPEN, self->orig_root, ".");
95 ASSERT_NE(ret, 0);
96 close(fan_fd);
97
98 self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
99 0);
100 ASSERT_GE(self->fan_fd[i], 0);
101 // Verify that watching mntns where group was created is allowed
102 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
103 FAN_MARK_MNTNS,
104 FAN_MNT_ATTACH | FAN_MNT_DETACH,
105 self->ns_fd, NULL);
106 ASSERT_EQ(ret, 0);
107 // ...but watching orig mntns is not allowed
108 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
109 FAN_MARK_MNTNS,
110 FAN_MNT_ATTACH | FAN_MNT_DETACH,
111 self->orig_ns_fd, NULL);
112 ASSERT_NE(ret, 0);
113 // On fd[0] we do an extra ADD that changes nothing.
114 // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
115 ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
116 FAN_MARK_MNTNS,
117 FAN_MNT_ATTACH | FAN_MNT_DETACH,
118 self->ns_fd, NULL);
119 ASSERT_EQ(ret, 0);
120 }
121
122 self->rem = 0;
123 }
124
FIXTURE_TEARDOWN(fanotify)125 FIXTURE_TEARDOWN(fanotify)
126 {
127 int i;
128
129 ASSERT_EQ(self->rem, 0);
130 for (i = 0; i < NUM_FAN_FDS; i++)
131 close(self->fan_fd[i]);
132
133 ASSERT_EQ(fchdir(self->orig_root), 0);
134
135 ASSERT_EQ(chroot("."), 0);
136
137 EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
138 EXPECT_EQ(chdir(self->root_mntpoint), 0);
139 EXPECT_EQ(chdir("/"), 0);
140 EXPECT_EQ(rmdir(self->root_mntpoint), 0);
141 }
142
expect_notify(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t * mask)143 static uint64_t expect_notify(struct __test_metadata *const _metadata,
144 FIXTURE_DATA(fanotify) *self,
145 uint64_t *mask)
146 {
147 struct fanotify_event_metadata *meta;
148 struct fanotify_event_info_mnt *mnt;
149 unsigned int thislen;
150
151 if (!self->rem) {
152 ssize_t len;
153 int i;
154
155 for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
156 len = read(self->fan_fd[i], self->buf,
157 sizeof(self->buf));
158 if (i > 0) {
159 // Groups 1,2 should get EAGAIN
160 ASSERT_EQ(len, -1);
161 ASSERT_EQ(errno, EAGAIN);
162 } else {
163 // Group 0 should get events
164 ASSERT_GT(len, 0);
165 }
166 }
167
168 self->rem = len;
169 self->next = (void *) self->buf;
170 }
171
172 meta = self->next;
173 ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
174
175 thislen = meta->event_len;
176 self->rem -= thislen;
177 self->next += thislen;
178
179 *mask = meta->mask;
180 thislen -= sizeof(*meta);
181
182 mnt = ((void *) meta) + meta->event_len - thislen;
183
184 ASSERT_EQ(thislen, sizeof(*mnt));
185
186 return mnt->mnt_id;
187 }
188
expect_notify_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,unsigned int n,uint64_t mask[],uint64_t mnts[])189 static void expect_notify_n(struct __test_metadata *const _metadata,
190 FIXTURE_DATA(fanotify) *self,
191 unsigned int n, uint64_t mask[], uint64_t mnts[])
192 {
193 unsigned int i;
194
195 for (i = 0; i < n; i++)
196 mnts[i] = expect_notify(_metadata, self, &mask[i]);
197 }
198
expect_notify_mask(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t expect_mask)199 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
200 FIXTURE_DATA(fanotify) *self,
201 uint64_t expect_mask)
202 {
203 uint64_t mntid, mask;
204
205 mntid = expect_notify(_metadata, self, &mask);
206 ASSERT_EQ(expect_mask, mask);
207
208 return mntid;
209 }
210
211
expect_notify_mask_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t mask,unsigned int n,uint64_t mnts[])212 static void expect_notify_mask_n(struct __test_metadata *const _metadata,
213 FIXTURE_DATA(fanotify) *self,
214 uint64_t mask, unsigned int n, uint64_t mnts[])
215 {
216 unsigned int i;
217
218 for (i = 0; i < n; i++)
219 mnts[i] = expect_notify_mask(_metadata, self, mask);
220 }
221
verify_mount_ids(struct __test_metadata * const _metadata,const uint64_t list1[],const uint64_t list2[],size_t num)222 static void verify_mount_ids(struct __test_metadata *const _metadata,
223 const uint64_t list1[], const uint64_t list2[],
224 size_t num)
225 {
226 unsigned int i, j;
227
228 // Check that neither list has any duplicates
229 for (i = 0; i < num; i++) {
230 for (j = 0; j < num; j++) {
231 if (i != j) {
232 ASSERT_NE(list1[i], list1[j]);
233 ASSERT_NE(list2[i], list2[j]);
234 }
235 }
236 }
237 // Check that all list1 memebers can be found in list2. Together with
238 // the above it means that the list1 and list2 represent the same sets.
239 for (i = 0; i < num; i++) {
240 for (j = 0; j < num; j++) {
241 if (list1[i] == list2[j])
242 break;
243 }
244 ASSERT_NE(j, num);
245 }
246 }
247
check_mounted(struct __test_metadata * const _metadata,const uint64_t mnts[],size_t num)248 static void check_mounted(struct __test_metadata *const _metadata,
249 const uint64_t mnts[], size_t num)
250 {
251 ssize_t ret;
252 uint64_t *list;
253
254 list = malloc((num + 1) * sizeof(list[0]));
255 ASSERT_NE(list, NULL);
256
257 ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
258 ASSERT_EQ(ret, num);
259
260 verify_mount_ids(_metadata, mnts, list, num);
261
262 free(list);
263 }
264
setup_mount_tree(struct __test_metadata * const _metadata,int log2_num)265 static void setup_mount_tree(struct __test_metadata *const _metadata,
266 int log2_num)
267 {
268 int ret, i;
269
270 ret = mount("", "/", NULL, MS_SHARED, NULL);
271 ASSERT_EQ(ret, 0);
272
273 for (i = 0; i < log2_num; i++) {
274 ret = mount("/", "/", NULL, MS_BIND, NULL);
275 ASSERT_EQ(ret, 0);
276 }
277 }
278
TEST_F(fanotify,bind)279 TEST_F(fanotify, bind)
280 {
281 int ret;
282 uint64_t mnts[2] = { self->root_id };
283
284 ret = mount("/", "/", NULL, MS_BIND, NULL);
285 ASSERT_EQ(ret, 0);
286
287 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
288 ASSERT_NE(mnts[0], mnts[1]);
289
290 check_mounted(_metadata, mnts, 2);
291
292 // Cleanup
293 uint64_t detach_id;
294 ret = umount("/");
295 ASSERT_EQ(ret, 0);
296
297 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
298 ASSERT_EQ(detach_id, mnts[1]);
299
300 check_mounted(_metadata, mnts, 1);
301 }
302
TEST_F(fanotify,move)303 TEST_F(fanotify, move)
304 {
305 int ret;
306 uint64_t mnts[2] = { self->root_id };
307 uint64_t move_id;
308
309 ret = mount("/", "/a", NULL, MS_BIND, NULL);
310 ASSERT_EQ(ret, 0);
311
312 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
313 ASSERT_NE(mnts[0], mnts[1]);
314
315 check_mounted(_metadata, mnts, 2);
316
317 ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
318 ASSERT_EQ(ret, 0);
319
320 move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
321 ASSERT_EQ(move_id, mnts[1]);
322
323 // Cleanup
324 ret = umount("/b");
325 ASSERT_EQ(ret, 0);
326
327 check_mounted(_metadata, mnts, 1);
328 }
329
TEST_F(fanotify,propagate)330 TEST_F(fanotify, propagate)
331 {
332 const unsigned int log2_num = 4;
333 const unsigned int num = (1 << log2_num);
334 uint64_t mnts[num];
335
336 setup_mount_tree(_metadata, log2_num);
337
338 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
339
340 mnts[0] = self->root_id;
341 check_mounted(_metadata, mnts, num);
342
343 // Cleanup
344 int ret;
345 uint64_t mnts2[num];
346 ret = umount2("/", MNT_DETACH);
347 ASSERT_EQ(ret, 0);
348
349 ret = mount("", "/", NULL, MS_PRIVATE, NULL);
350 ASSERT_EQ(ret, 0);
351
352 mnts2[0] = self->root_id;
353 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
354 verify_mount_ids(_metadata, mnts, mnts2, num);
355
356 check_mounted(_metadata, mnts, 1);
357 }
358
TEST_F(fanotify,fsmount)359 TEST_F(fanotify, fsmount)
360 {
361 int ret, fs, mnt;
362 uint64_t mnts[2] = { self->root_id };
363
364 fs = fsopen("tmpfs", 0);
365 ASSERT_GE(fs, 0);
366
367 ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
368 ASSERT_EQ(ret, 0);
369
370 mnt = fsmount(fs, 0, 0);
371 ASSERT_GE(mnt, 0);
372
373 close(fs);
374
375 ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
376 ASSERT_EQ(ret, 0);
377
378 close(mnt);
379
380 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
381 ASSERT_NE(mnts[0], mnts[1]);
382
383 check_mounted(_metadata, mnts, 2);
384
385 // Cleanup
386 uint64_t detach_id;
387 ret = umount("/a");
388 ASSERT_EQ(ret, 0);
389
390 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
391 ASSERT_EQ(detach_id, mnts[1]);
392
393 check_mounted(_metadata, mnts, 1);
394 }
395
TEST_F(fanotify,reparent)396 TEST_F(fanotify, reparent)
397 {
398 uint64_t mnts[6] = { self->root_id };
399 uint64_t dmnts[3];
400 uint64_t masks[3];
401 unsigned int i;
402 int ret;
403
404 // Create setup with a[1] -> b[2] propagation
405 ret = mount("/", "/a", NULL, MS_BIND, NULL);
406 ASSERT_EQ(ret, 0);
407
408 ret = mount("", "/a", NULL, MS_SHARED, NULL);
409 ASSERT_EQ(ret, 0);
410
411 ret = mount("/a", "/b", NULL, MS_BIND, NULL);
412 ASSERT_EQ(ret, 0);
413
414 ret = mount("", "/b", NULL, MS_SLAVE, NULL);
415 ASSERT_EQ(ret, 0);
416
417 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
418
419 check_mounted(_metadata, mnts, 3);
420
421 // Mount on a[3], which is propagated to b[4]
422 ret = mount("/", "/a", NULL, MS_BIND, NULL);
423 ASSERT_EQ(ret, 0);
424
425 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
426
427 check_mounted(_metadata, mnts, 5);
428
429 // Mount on b[5], not propagated
430 ret = mount("/", "/b", NULL, MS_BIND, NULL);
431 ASSERT_EQ(ret, 0);
432
433 mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
434
435 check_mounted(_metadata, mnts, 6);
436
437 // Umount a[3], which is propagated to b[4], but not b[5]
438 // This will result in b[5] "falling" on b[2]
439 ret = umount("/a");
440 ASSERT_EQ(ret, 0);
441
442 expect_notify_n(_metadata, self, 3, masks, dmnts);
443 verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
444
445 for (i = 0; i < 3; i++) {
446 if (dmnts[i] == mnts[5]) {
447 ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
448 } else {
449 ASSERT_EQ(masks[i], FAN_MNT_DETACH);
450 }
451 }
452
453 mnts[3] = mnts[5];
454 check_mounted(_metadata, mnts, 4);
455
456 // Cleanup
457 ret = umount("/b");
458 ASSERT_EQ(ret, 0);
459
460 ret = umount("/a");
461 ASSERT_EQ(ret, 0);
462
463 ret = umount("/b");
464 ASSERT_EQ(ret, 0);
465
466 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
467 verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
468
469 check_mounted(_metadata, mnts, 1);
470 }
471
TEST_F(fanotify,rmdir)472 TEST_F(fanotify, rmdir)
473 {
474 uint64_t mnts[3] = { self->root_id };
475 int ret;
476
477 ret = mount("/", "/a", NULL, MS_BIND, NULL);
478 ASSERT_EQ(ret, 0);
479
480 ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
481 ASSERT_EQ(ret, 0);
482
483 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
484
485 check_mounted(_metadata, mnts, 3);
486
487 ret = chdir("/a");
488 ASSERT_EQ(ret, 0);
489
490 ret = fork();
491 ASSERT_GE(ret, 0);
492
493 if (ret == 0) {
494 chdir("/");
495 unshare(CLONE_NEWNS);
496 mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
497 umount2("/a", MNT_DETACH);
498 // This triggers a detach in the other namespace
499 rmdir("/a");
500 exit(0);
501 }
502 wait(NULL);
503
504 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
505 check_mounted(_metadata, mnts, 1);
506
507 // Cleanup
508 ret = chdir("/");
509 ASSERT_EQ(ret, 0);
510 }
511
TEST_F(fanotify,pivot_root)512 TEST_F(fanotify, pivot_root)
513 {
514 uint64_t mnts[3] = { self->root_id };
515 uint64_t mnts2[3];
516 int ret;
517
518 ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
519 ASSERT_EQ(ret, 0);
520
521 mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
522
523 ret = mkdir("/a/new", 0700);
524 ASSERT_EQ(ret, 0);
525
526 ret = mkdir("/a/old", 0700);
527 ASSERT_EQ(ret, 0);
528
529 ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
530 ASSERT_EQ(ret, 0);
531
532 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
533 check_mounted(_metadata, mnts, 3);
534
535 ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
536 ASSERT_EQ(ret, 0);
537
538 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
539 verify_mount_ids(_metadata, mnts, mnts2, 2);
540 check_mounted(_metadata, mnts, 3);
541
542 // Cleanup
543 ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
544 ASSERT_EQ(ret, 0);
545
546 ret = umount("/a/new");
547 ASSERT_EQ(ret, 0);
548
549 ret = umount("/a");
550 ASSERT_EQ(ret, 0);
551
552 check_mounted(_metadata, mnts, 1);
553 }
554
555 TEST_HARNESS_MAIN
556