1 // SPDX-License-Identifier: GPL-2.0-or-later 2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> 3 4 #define _GNU_SOURCE 5 6 // Needed for linux/fanotify.h 7 typedef struct { 8 int val[2]; 9 } __kernel_fsid_t; 10 #define __kernel_fsid_t __kernel_fsid_t 11 12 #include <fcntl.h> 13 #include <sched.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <sys/stat.h> 17 #include <sys/mount.h> 18 #include <unistd.h> 19 #include <sys/syscall.h> 20 #include <sys/fanotify.h> 21 22 #include "../../kselftest_harness.h" 23 #include "../statmount/statmount.h" 24 #include "../utils.h" 25 26 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; 27 28 static const int mark_types[] = { 29 FAN_MARK_FILESYSTEM, 30 FAN_MARK_MOUNT, 31 FAN_MARK_INODE 32 }; 33 34 static const int mark_cmds[] = { 35 FAN_MARK_ADD, 36 FAN_MARK_REMOVE, 37 FAN_MARK_FLUSH 38 }; 39 40 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) 41 42 FIXTURE(fanotify) { 43 int fan_fd[NUM_FAN_FDS]; 44 char buf[256]; 45 unsigned int rem; 46 void *next; 47 char root_mntpoint[sizeof(root_mntpoint_templ)]; 48 int orig_root; 49 int orig_ns_fd; 50 int ns_fd; 51 uint64_t root_id; 52 }; 53 54 FIXTURE_SETUP(fanotify) 55 { 56 int i, ret; 57 58 self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); 59 ASSERT_GE(self->orig_ns_fd, 0); 60 61 ret = setup_userns(); 62 ASSERT_EQ(ret, 0); 63 64 self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); 65 ASSERT_GE(self->ns_fd, 0); 66 67 strcpy(self->root_mntpoint, root_mntpoint_templ); 68 ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); 69 70 self->orig_root = open("/", O_PATH | O_CLOEXEC); 71 ASSERT_GE(self->orig_root, 0); 72 73 ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); 74 75 ASSERT_EQ(chroot(self->root_mntpoint), 0); 76 77 ASSERT_EQ(chdir("/"), 0); 78 79 ASSERT_EQ(mkdir("a", 0700), 0); 80 81 ASSERT_EQ(mkdir("b", 0700), 0); 82 83 self->root_id = get_unique_mnt_id("/"); 84 ASSERT_NE(self->root_id, 0); 85 86 for (i = 0; i < NUM_FAN_FDS; i++) { 87 int fan_fd = fanotify_init(FAN_REPORT_FID, 0); 88 // Verify that watching tmpfs mounted inside userns is allowed 89 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], 90 FAN_OPEN, AT_FDCWD, "/"); 91 ASSERT_EQ(ret, 0); 92 // ...but watching entire orig root filesystem is not allowed 93 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, 94 FAN_OPEN, self->orig_root, "."); 95 ASSERT_NE(ret, 0); 96 close(fan_fd); 97 98 self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, 99 0); 100 ASSERT_GE(self->fan_fd[i], 0); 101 // Verify that watching mntns where group was created is allowed 102 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | 103 FAN_MARK_MNTNS, 104 FAN_MNT_ATTACH | FAN_MNT_DETACH, 105 self->ns_fd, NULL); 106 ASSERT_EQ(ret, 0); 107 // ...but watching orig mntns is not allowed 108 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | 109 FAN_MARK_MNTNS, 110 FAN_MNT_ATTACH | FAN_MNT_DETACH, 111 self->orig_ns_fd, NULL); 112 ASSERT_NE(ret, 0); 113 // On fd[0] we do an extra ADD that changes nothing. 114 // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. 115 ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | 116 FAN_MARK_MNTNS, 117 FAN_MNT_ATTACH | FAN_MNT_DETACH, 118 self->ns_fd, NULL); 119 ASSERT_EQ(ret, 0); 120 } 121 122 self->rem = 0; 123 } 124 125 FIXTURE_TEARDOWN(fanotify) 126 { 127 int i; 128 129 ASSERT_EQ(self->rem, 0); 130 for (i = 0; i < NUM_FAN_FDS; i++) 131 close(self->fan_fd[i]); 132 133 ASSERT_EQ(fchdir(self->orig_root), 0); 134 135 ASSERT_EQ(chroot("."), 0); 136 137 EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); 138 EXPECT_EQ(chdir(self->root_mntpoint), 0); 139 EXPECT_EQ(chdir("/"), 0); 140 EXPECT_EQ(rmdir(self->root_mntpoint), 0); 141 } 142 143 static uint64_t expect_notify(struct __test_metadata *const _metadata, 144 FIXTURE_DATA(fanotify) *self, 145 uint64_t *mask) 146 { 147 struct fanotify_event_metadata *meta; 148 struct fanotify_event_info_mnt *mnt; 149 unsigned int thislen; 150 151 if (!self->rem) { 152 ssize_t len; 153 int i; 154 155 for (i = NUM_FAN_FDS - 1; i >= 0; i--) { 156 len = read(self->fan_fd[i], self->buf, 157 sizeof(self->buf)); 158 if (i > 0) { 159 // Groups 1,2 should get EAGAIN 160 ASSERT_EQ(len, -1); 161 ASSERT_EQ(errno, EAGAIN); 162 } else { 163 // Group 0 should get events 164 ASSERT_GT(len, 0); 165 } 166 } 167 168 self->rem = len; 169 self->next = (void *) self->buf; 170 } 171 172 meta = self->next; 173 ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); 174 175 thislen = meta->event_len; 176 self->rem -= thislen; 177 self->next += thislen; 178 179 *mask = meta->mask; 180 thislen -= sizeof(*meta); 181 182 mnt = ((void *) meta) + meta->event_len - thislen; 183 184 ASSERT_EQ(thislen, sizeof(*mnt)); 185 186 return mnt->mnt_id; 187 } 188 189 static void expect_notify_n(struct __test_metadata *const _metadata, 190 FIXTURE_DATA(fanotify) *self, 191 unsigned int n, uint64_t mask[], uint64_t mnts[]) 192 { 193 unsigned int i; 194 195 for (i = 0; i < n; i++) 196 mnts[i] = expect_notify(_metadata, self, &mask[i]); 197 } 198 199 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, 200 FIXTURE_DATA(fanotify) *self, 201 uint64_t expect_mask) 202 { 203 uint64_t mntid, mask; 204 205 mntid = expect_notify(_metadata, self, &mask); 206 ASSERT_EQ(expect_mask, mask); 207 208 return mntid; 209 } 210 211 212 static void expect_notify_mask_n(struct __test_metadata *const _metadata, 213 FIXTURE_DATA(fanotify) *self, 214 uint64_t mask, unsigned int n, uint64_t mnts[]) 215 { 216 unsigned int i; 217 218 for (i = 0; i < n; i++) 219 mnts[i] = expect_notify_mask(_metadata, self, mask); 220 } 221 222 static void verify_mount_ids(struct __test_metadata *const _metadata, 223 const uint64_t list1[], const uint64_t list2[], 224 size_t num) 225 { 226 unsigned int i, j; 227 228 // Check that neither list has any duplicates 229 for (i = 0; i < num; i++) { 230 for (j = 0; j < num; j++) { 231 if (i != j) { 232 ASSERT_NE(list1[i], list1[j]); 233 ASSERT_NE(list2[i], list2[j]); 234 } 235 } 236 } 237 // Check that all list1 memebers can be found in list2. Together with 238 // the above it means that the list1 and list2 represent the same sets. 239 for (i = 0; i < num; i++) { 240 for (j = 0; j < num; j++) { 241 if (list1[i] == list2[j]) 242 break; 243 } 244 ASSERT_NE(j, num); 245 } 246 } 247 248 static void check_mounted(struct __test_metadata *const _metadata, 249 const uint64_t mnts[], size_t num) 250 { 251 ssize_t ret; 252 uint64_t *list; 253 254 list = malloc((num + 1) * sizeof(list[0])); 255 ASSERT_NE(list, NULL); 256 257 ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); 258 ASSERT_EQ(ret, num); 259 260 verify_mount_ids(_metadata, mnts, list, num); 261 262 free(list); 263 } 264 265 static void setup_mount_tree(struct __test_metadata *const _metadata, 266 int log2_num) 267 { 268 int ret, i; 269 270 ret = mount("", "/", NULL, MS_SHARED, NULL); 271 ASSERT_EQ(ret, 0); 272 273 for (i = 0; i < log2_num; i++) { 274 ret = mount("/", "/", NULL, MS_BIND, NULL); 275 ASSERT_EQ(ret, 0); 276 } 277 } 278 279 TEST_F(fanotify, bind) 280 { 281 int ret; 282 uint64_t mnts[2] = { self->root_id }; 283 284 ret = mount("/", "/", NULL, MS_BIND, NULL); 285 ASSERT_EQ(ret, 0); 286 287 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 288 ASSERT_NE(mnts[0], mnts[1]); 289 290 check_mounted(_metadata, mnts, 2); 291 292 // Cleanup 293 uint64_t detach_id; 294 ret = umount("/"); 295 ASSERT_EQ(ret, 0); 296 297 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 298 ASSERT_EQ(detach_id, mnts[1]); 299 300 check_mounted(_metadata, mnts, 1); 301 } 302 303 TEST_F(fanotify, move) 304 { 305 int ret; 306 uint64_t mnts[2] = { self->root_id }; 307 uint64_t move_id; 308 309 ret = mount("/", "/a", NULL, MS_BIND, NULL); 310 ASSERT_EQ(ret, 0); 311 312 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 313 ASSERT_NE(mnts[0], mnts[1]); 314 315 check_mounted(_metadata, mnts, 2); 316 317 ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); 318 ASSERT_EQ(ret, 0); 319 320 move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); 321 ASSERT_EQ(move_id, mnts[1]); 322 323 // Cleanup 324 ret = umount("/b"); 325 ASSERT_EQ(ret, 0); 326 327 check_mounted(_metadata, mnts, 1); 328 } 329 330 TEST_F(fanotify, propagate) 331 { 332 const unsigned int log2_num = 4; 333 const unsigned int num = (1 << log2_num); 334 uint64_t mnts[num]; 335 336 setup_mount_tree(_metadata, log2_num); 337 338 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); 339 340 mnts[0] = self->root_id; 341 check_mounted(_metadata, mnts, num); 342 343 // Cleanup 344 int ret; 345 uint64_t mnts2[num]; 346 ret = umount2("/", MNT_DETACH); 347 ASSERT_EQ(ret, 0); 348 349 ret = mount("", "/", NULL, MS_PRIVATE, NULL); 350 ASSERT_EQ(ret, 0); 351 352 mnts2[0] = self->root_id; 353 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); 354 verify_mount_ids(_metadata, mnts, mnts2, num); 355 356 check_mounted(_metadata, mnts, 1); 357 } 358 359 TEST_F(fanotify, fsmount) 360 { 361 int ret, fs, mnt; 362 uint64_t mnts[2] = { self->root_id }; 363 364 fs = fsopen("tmpfs", 0); 365 ASSERT_GE(fs, 0); 366 367 ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); 368 ASSERT_EQ(ret, 0); 369 370 mnt = fsmount(fs, 0, 0); 371 ASSERT_GE(mnt, 0); 372 373 close(fs); 374 375 ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); 376 ASSERT_EQ(ret, 0); 377 378 close(mnt); 379 380 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 381 ASSERT_NE(mnts[0], mnts[1]); 382 383 check_mounted(_metadata, mnts, 2); 384 385 // Cleanup 386 uint64_t detach_id; 387 ret = umount("/a"); 388 ASSERT_EQ(ret, 0); 389 390 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 391 ASSERT_EQ(detach_id, mnts[1]); 392 393 check_mounted(_metadata, mnts, 1); 394 } 395 396 TEST_F(fanotify, reparent) 397 { 398 uint64_t mnts[6] = { self->root_id }; 399 uint64_t dmnts[3]; 400 uint64_t masks[3]; 401 unsigned int i; 402 int ret; 403 404 // Create setup with a[1] -> b[2] propagation 405 ret = mount("/", "/a", NULL, MS_BIND, NULL); 406 ASSERT_EQ(ret, 0); 407 408 ret = mount("", "/a", NULL, MS_SHARED, NULL); 409 ASSERT_EQ(ret, 0); 410 411 ret = mount("/a", "/b", NULL, MS_BIND, NULL); 412 ASSERT_EQ(ret, 0); 413 414 ret = mount("", "/b", NULL, MS_SLAVE, NULL); 415 ASSERT_EQ(ret, 0); 416 417 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 418 419 check_mounted(_metadata, mnts, 3); 420 421 // Mount on a[3], which is propagated to b[4] 422 ret = mount("/", "/a", NULL, MS_BIND, NULL); 423 ASSERT_EQ(ret, 0); 424 425 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); 426 427 check_mounted(_metadata, mnts, 5); 428 429 // Mount on b[5], not propagated 430 ret = mount("/", "/b", NULL, MS_BIND, NULL); 431 ASSERT_EQ(ret, 0); 432 433 mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 434 435 check_mounted(_metadata, mnts, 6); 436 437 // Umount a[3], which is propagated to b[4], but not b[5] 438 // This will result in b[5] "falling" on b[2] 439 ret = umount("/a"); 440 ASSERT_EQ(ret, 0); 441 442 expect_notify_n(_metadata, self, 3, masks, dmnts); 443 verify_mount_ids(_metadata, mnts + 3, dmnts, 3); 444 445 for (i = 0; i < 3; i++) { 446 if (dmnts[i] == mnts[5]) { 447 ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); 448 } else { 449 ASSERT_EQ(masks[i], FAN_MNT_DETACH); 450 } 451 } 452 453 mnts[3] = mnts[5]; 454 check_mounted(_metadata, mnts, 4); 455 456 // Cleanup 457 ret = umount("/b"); 458 ASSERT_EQ(ret, 0); 459 460 ret = umount("/a"); 461 ASSERT_EQ(ret, 0); 462 463 ret = umount("/b"); 464 ASSERT_EQ(ret, 0); 465 466 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); 467 verify_mount_ids(_metadata, mnts + 1, dmnts, 3); 468 469 check_mounted(_metadata, mnts, 1); 470 } 471 472 TEST_F(fanotify, rmdir) 473 { 474 uint64_t mnts[3] = { self->root_id }; 475 int ret; 476 477 ret = mount("/", "/a", NULL, MS_BIND, NULL); 478 ASSERT_EQ(ret, 0); 479 480 ret = mount("/", "/a/b", NULL, MS_BIND, NULL); 481 ASSERT_EQ(ret, 0); 482 483 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 484 485 check_mounted(_metadata, mnts, 3); 486 487 ret = chdir("/a"); 488 ASSERT_EQ(ret, 0); 489 490 ret = fork(); 491 ASSERT_GE(ret, 0); 492 493 if (ret == 0) { 494 chdir("/"); 495 unshare(CLONE_NEWNS); 496 mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); 497 umount2("/a", MNT_DETACH); 498 // This triggers a detach in the other namespace 499 rmdir("/a"); 500 exit(0); 501 } 502 wait(NULL); 503 504 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); 505 check_mounted(_metadata, mnts, 1); 506 507 // Cleanup 508 ret = chdir("/"); 509 ASSERT_EQ(ret, 0); 510 } 511 512 TEST_F(fanotify, pivot_root) 513 { 514 uint64_t mnts[3] = { self->root_id }; 515 uint64_t mnts2[3]; 516 int ret; 517 518 ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); 519 ASSERT_EQ(ret, 0); 520 521 mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 522 523 ret = mkdir("/a/new", 0700); 524 ASSERT_EQ(ret, 0); 525 526 ret = mkdir("/a/old", 0700); 527 ASSERT_EQ(ret, 0); 528 529 ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); 530 ASSERT_EQ(ret, 0); 531 532 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 533 check_mounted(_metadata, mnts, 3); 534 535 ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); 536 ASSERT_EQ(ret, 0); 537 538 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); 539 verify_mount_ids(_metadata, mnts, mnts2, 2); 540 check_mounted(_metadata, mnts, 3); 541 542 // Cleanup 543 ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); 544 ASSERT_EQ(ret, 0); 545 546 ret = umount("/a/new"); 547 ASSERT_EQ(ret, 0); 548 549 ret = umount("/a"); 550 ASSERT_EQ(ret, 0); 551 552 check_mounted(_metadata, mnts, 1); 553 } 554 555 TEST_HARNESS_MAIN 556