1 // SPDX-License-Identifier: GPL-2.0-or-later 2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> 3 4 #define _GNU_SOURCE 5 #include <fcntl.h> 6 #include <sched.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <sys/stat.h> 10 #include <sys/mount.h> 11 #include <unistd.h> 12 #include <sys/syscall.h> 13 14 #include "../../kselftest_harness.h" 15 #include "../../pidfd/pidfd.h" 16 #include "../statmount/statmount.h" 17 #include "../utils.h" 18 19 // Needed for linux/fanotify.h 20 #ifndef __kernel_fsid_t 21 typedef struct { 22 int val[2]; 23 } __kernel_fsid_t; 24 #endif 25 26 #include <sys/fanotify.h> 27 28 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; 29 30 static const int mark_types[] = { 31 FAN_MARK_FILESYSTEM, 32 FAN_MARK_MOUNT, 33 FAN_MARK_INODE 34 }; 35 36 static const int mark_cmds[] = { 37 FAN_MARK_ADD, 38 FAN_MARK_REMOVE, 39 FAN_MARK_FLUSH 40 }; 41 42 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) 43 44 FIXTURE(fanotify) { 45 int fan_fd[NUM_FAN_FDS]; 46 char buf[256]; 47 unsigned int rem; 48 void *next; 49 char root_mntpoint[sizeof(root_mntpoint_templ)]; 50 int orig_root; 51 int orig_ns_fd; 52 int ns_fd; 53 uint64_t root_id; 54 }; 55 56 FIXTURE_SETUP(fanotify) 57 { 58 int i, ret; 59 60 self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); 61 ASSERT_GE(self->orig_ns_fd, 0); 62 63 ret = setup_userns(); 64 ASSERT_EQ(ret, 0); 65 66 self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); 67 ASSERT_GE(self->ns_fd, 0); 68 69 strcpy(self->root_mntpoint, root_mntpoint_templ); 70 ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); 71 72 self->orig_root = open("/", O_PATH | O_CLOEXEC); 73 ASSERT_GE(self->orig_root, 0); 74 75 ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); 76 77 ASSERT_EQ(chroot(self->root_mntpoint), 0); 78 79 ASSERT_EQ(chdir("/"), 0); 80 81 ASSERT_EQ(mkdir("a", 0700), 0); 82 83 ASSERT_EQ(mkdir("b", 0700), 0); 84 85 self->root_id = get_unique_mnt_id("/"); 86 ASSERT_NE(self->root_id, 0); 87 88 for (i = 0; i < NUM_FAN_FDS; i++) { 89 int fan_fd = fanotify_init(FAN_REPORT_FID, 0); 90 // Verify that watching tmpfs mounted inside userns is allowed 91 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], 92 FAN_OPEN, AT_FDCWD, "/"); 93 ASSERT_EQ(ret, 0); 94 // ...but watching entire orig root filesystem is not allowed 95 ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, 96 FAN_OPEN, self->orig_root, "."); 97 ASSERT_NE(ret, 0); 98 close(fan_fd); 99 100 self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, 101 0); 102 ASSERT_GE(self->fan_fd[i], 0); 103 // Verify that watching mntns where group was created is allowed 104 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | 105 FAN_MARK_MNTNS, 106 FAN_MNT_ATTACH | FAN_MNT_DETACH, 107 self->ns_fd, NULL); 108 ASSERT_EQ(ret, 0); 109 // ...but watching orig mntns is not allowed 110 ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | 111 FAN_MARK_MNTNS, 112 FAN_MNT_ATTACH | FAN_MNT_DETACH, 113 self->orig_ns_fd, NULL); 114 ASSERT_NE(ret, 0); 115 // On fd[0] we do an extra ADD that changes nothing. 116 // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. 117 ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | 118 FAN_MARK_MNTNS, 119 FAN_MNT_ATTACH | FAN_MNT_DETACH, 120 self->ns_fd, NULL); 121 ASSERT_EQ(ret, 0); 122 } 123 124 self->rem = 0; 125 } 126 127 FIXTURE_TEARDOWN(fanotify) 128 { 129 int i; 130 131 ASSERT_EQ(self->rem, 0); 132 for (i = 0; i < NUM_FAN_FDS; i++) 133 close(self->fan_fd[i]); 134 135 ASSERT_EQ(fchdir(self->orig_root), 0); 136 137 ASSERT_EQ(chroot("."), 0); 138 139 EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); 140 EXPECT_EQ(chdir(self->root_mntpoint), 0); 141 EXPECT_EQ(chdir("/"), 0); 142 EXPECT_EQ(rmdir(self->root_mntpoint), 0); 143 } 144 145 static uint64_t expect_notify(struct __test_metadata *const _metadata, 146 FIXTURE_DATA(fanotify) *self, 147 uint64_t *mask) 148 { 149 struct fanotify_event_metadata *meta; 150 struct fanotify_event_info_mnt *mnt; 151 unsigned int thislen; 152 153 if (!self->rem) { 154 ssize_t len; 155 int i; 156 157 for (i = NUM_FAN_FDS - 1; i >= 0; i--) { 158 len = read(self->fan_fd[i], self->buf, 159 sizeof(self->buf)); 160 if (i > 0) { 161 // Groups 1,2 should get EAGAIN 162 ASSERT_EQ(len, -1); 163 ASSERT_EQ(errno, EAGAIN); 164 } else { 165 // Group 0 should get events 166 ASSERT_GT(len, 0); 167 } 168 } 169 170 self->rem = len; 171 self->next = (void *) self->buf; 172 } 173 174 meta = self->next; 175 ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); 176 177 thislen = meta->event_len; 178 self->rem -= thislen; 179 self->next += thislen; 180 181 *mask = meta->mask; 182 thislen -= sizeof(*meta); 183 184 mnt = ((void *) meta) + meta->event_len - thislen; 185 186 ASSERT_EQ(thislen, sizeof(*mnt)); 187 188 return mnt->mnt_id; 189 } 190 191 static void expect_notify_n(struct __test_metadata *const _metadata, 192 FIXTURE_DATA(fanotify) *self, 193 unsigned int n, uint64_t mask[], uint64_t mnts[]) 194 { 195 unsigned int i; 196 197 for (i = 0; i < n; i++) 198 mnts[i] = expect_notify(_metadata, self, &mask[i]); 199 } 200 201 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, 202 FIXTURE_DATA(fanotify) *self, 203 uint64_t expect_mask) 204 { 205 uint64_t mntid, mask; 206 207 mntid = expect_notify(_metadata, self, &mask); 208 ASSERT_EQ(expect_mask, mask); 209 210 return mntid; 211 } 212 213 214 static void expect_notify_mask_n(struct __test_metadata *const _metadata, 215 FIXTURE_DATA(fanotify) *self, 216 uint64_t mask, unsigned int n, uint64_t mnts[]) 217 { 218 unsigned int i; 219 220 for (i = 0; i < n; i++) 221 mnts[i] = expect_notify_mask(_metadata, self, mask); 222 } 223 224 static void verify_mount_ids(struct __test_metadata *const _metadata, 225 const uint64_t list1[], const uint64_t list2[], 226 size_t num) 227 { 228 unsigned int i, j; 229 230 // Check that neither list has any duplicates 231 for (i = 0; i < num; i++) { 232 for (j = 0; j < num; j++) { 233 if (i != j) { 234 ASSERT_NE(list1[i], list1[j]); 235 ASSERT_NE(list2[i], list2[j]); 236 } 237 } 238 } 239 // Check that all list1 memebers can be found in list2. Together with 240 // the above it means that the list1 and list2 represent the same sets. 241 for (i = 0; i < num; i++) { 242 for (j = 0; j < num; j++) { 243 if (list1[i] == list2[j]) 244 break; 245 } 246 ASSERT_NE(j, num); 247 } 248 } 249 250 static void check_mounted(struct __test_metadata *const _metadata, 251 const uint64_t mnts[], size_t num) 252 { 253 ssize_t ret; 254 uint64_t *list; 255 256 list = malloc((num + 1) * sizeof(list[0])); 257 ASSERT_NE(list, NULL); 258 259 ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); 260 ASSERT_EQ(ret, num); 261 262 verify_mount_ids(_metadata, mnts, list, num); 263 264 free(list); 265 } 266 267 static void setup_mount_tree(struct __test_metadata *const _metadata, 268 int log2_num) 269 { 270 int ret, i; 271 272 ret = mount("", "/", NULL, MS_SHARED, NULL); 273 ASSERT_EQ(ret, 0); 274 275 for (i = 0; i < log2_num; i++) { 276 ret = mount("/", "/", NULL, MS_BIND, NULL); 277 ASSERT_EQ(ret, 0); 278 } 279 } 280 281 TEST_F(fanotify, bind) 282 { 283 int ret; 284 uint64_t mnts[2] = { self->root_id }; 285 286 ret = mount("/", "/", NULL, MS_BIND, NULL); 287 ASSERT_EQ(ret, 0); 288 289 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 290 ASSERT_NE(mnts[0], mnts[1]); 291 292 check_mounted(_metadata, mnts, 2); 293 294 // Cleanup 295 uint64_t detach_id; 296 ret = umount("/"); 297 ASSERT_EQ(ret, 0); 298 299 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 300 ASSERT_EQ(detach_id, mnts[1]); 301 302 check_mounted(_metadata, mnts, 1); 303 } 304 305 TEST_F(fanotify, move) 306 { 307 int ret; 308 uint64_t mnts[2] = { self->root_id }; 309 uint64_t move_id; 310 311 ret = mount("/", "/a", NULL, MS_BIND, NULL); 312 ASSERT_EQ(ret, 0); 313 314 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 315 ASSERT_NE(mnts[0], mnts[1]); 316 317 check_mounted(_metadata, mnts, 2); 318 319 ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); 320 ASSERT_EQ(ret, 0); 321 322 move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); 323 ASSERT_EQ(move_id, mnts[1]); 324 325 // Cleanup 326 ret = umount("/b"); 327 ASSERT_EQ(ret, 0); 328 329 check_mounted(_metadata, mnts, 1); 330 } 331 332 TEST_F(fanotify, propagate) 333 { 334 const unsigned int log2_num = 4; 335 const unsigned int num = (1 << log2_num); 336 uint64_t mnts[num]; 337 338 setup_mount_tree(_metadata, log2_num); 339 340 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); 341 342 mnts[0] = self->root_id; 343 check_mounted(_metadata, mnts, num); 344 345 // Cleanup 346 int ret; 347 uint64_t mnts2[num]; 348 ret = umount2("/", MNT_DETACH); 349 ASSERT_EQ(ret, 0); 350 351 ret = mount("", "/", NULL, MS_PRIVATE, NULL); 352 ASSERT_EQ(ret, 0); 353 354 mnts2[0] = self->root_id; 355 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); 356 verify_mount_ids(_metadata, mnts, mnts2, num); 357 358 check_mounted(_metadata, mnts, 1); 359 } 360 361 TEST_F(fanotify, fsmount) 362 { 363 int ret, fs, mnt; 364 uint64_t mnts[2] = { self->root_id }; 365 366 fs = fsopen("tmpfs", 0); 367 ASSERT_GE(fs, 0); 368 369 ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); 370 ASSERT_EQ(ret, 0); 371 372 mnt = fsmount(fs, 0, 0); 373 ASSERT_GE(mnt, 0); 374 375 close(fs); 376 377 ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); 378 ASSERT_EQ(ret, 0); 379 380 close(mnt); 381 382 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 383 ASSERT_NE(mnts[0], mnts[1]); 384 385 check_mounted(_metadata, mnts, 2); 386 387 // Cleanup 388 uint64_t detach_id; 389 ret = umount("/a"); 390 ASSERT_EQ(ret, 0); 391 392 detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 393 ASSERT_EQ(detach_id, mnts[1]); 394 395 check_mounted(_metadata, mnts, 1); 396 } 397 398 TEST_F(fanotify, reparent) 399 { 400 uint64_t mnts[6] = { self->root_id }; 401 uint64_t dmnts[3]; 402 uint64_t masks[3]; 403 unsigned int i; 404 int ret; 405 406 // Create setup with a[1] -> b[2] propagation 407 ret = mount("/", "/a", NULL, MS_BIND, NULL); 408 ASSERT_EQ(ret, 0); 409 410 ret = mount("", "/a", NULL, MS_SHARED, NULL); 411 ASSERT_EQ(ret, 0); 412 413 ret = mount("/a", "/b", NULL, MS_BIND, NULL); 414 ASSERT_EQ(ret, 0); 415 416 ret = mount("", "/b", NULL, MS_SLAVE, NULL); 417 ASSERT_EQ(ret, 0); 418 419 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 420 421 check_mounted(_metadata, mnts, 3); 422 423 // Mount on a[3], which is propagated to b[4] 424 ret = mount("/", "/a", NULL, MS_BIND, NULL); 425 ASSERT_EQ(ret, 0); 426 427 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); 428 429 check_mounted(_metadata, mnts, 5); 430 431 // Mount on b[5], not propagated 432 ret = mount("/", "/b", NULL, MS_BIND, NULL); 433 ASSERT_EQ(ret, 0); 434 435 mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 436 437 check_mounted(_metadata, mnts, 6); 438 439 // Umount a[3], which is propagated to b[4], but not b[5] 440 // This will result in b[5] "falling" on b[2] 441 ret = umount("/a"); 442 ASSERT_EQ(ret, 0); 443 444 expect_notify_n(_metadata, self, 3, masks, dmnts); 445 verify_mount_ids(_metadata, mnts + 3, dmnts, 3); 446 447 for (i = 0; i < 3; i++) { 448 if (dmnts[i] == mnts[5]) { 449 ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); 450 } else { 451 ASSERT_EQ(masks[i], FAN_MNT_DETACH); 452 } 453 } 454 455 mnts[3] = mnts[5]; 456 check_mounted(_metadata, mnts, 4); 457 458 // Cleanup 459 ret = umount("/b"); 460 ASSERT_EQ(ret, 0); 461 462 ret = umount("/a"); 463 ASSERT_EQ(ret, 0); 464 465 ret = umount("/b"); 466 ASSERT_EQ(ret, 0); 467 468 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); 469 verify_mount_ids(_metadata, mnts + 1, dmnts, 3); 470 471 check_mounted(_metadata, mnts, 1); 472 } 473 474 TEST_F(fanotify, rmdir) 475 { 476 uint64_t mnts[3] = { self->root_id }; 477 int ret; 478 479 ret = mount("/", "/a", NULL, MS_BIND, NULL); 480 ASSERT_EQ(ret, 0); 481 482 ret = mount("/", "/a/b", NULL, MS_BIND, NULL); 483 ASSERT_EQ(ret, 0); 484 485 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 486 487 check_mounted(_metadata, mnts, 3); 488 489 ret = chdir("/a"); 490 ASSERT_EQ(ret, 0); 491 492 ret = fork(); 493 ASSERT_GE(ret, 0); 494 495 if (ret == 0) { 496 chdir("/"); 497 unshare(CLONE_NEWNS); 498 mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); 499 umount2("/a", MNT_DETACH); 500 // This triggers a detach in the other namespace 501 rmdir("/a"); 502 exit(0); 503 } 504 wait(NULL); 505 506 expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); 507 check_mounted(_metadata, mnts, 1); 508 509 // Cleanup 510 ret = chdir("/"); 511 ASSERT_EQ(ret, 0); 512 } 513 514 TEST_F(fanotify, pivot_root) 515 { 516 uint64_t mnts[3] = { self->root_id }; 517 uint64_t mnts2[3]; 518 int ret; 519 520 ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); 521 ASSERT_EQ(ret, 0); 522 523 mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 524 525 ret = mkdir("/a/new", 0700); 526 ASSERT_EQ(ret, 0); 527 528 ret = mkdir("/a/old", 0700); 529 ASSERT_EQ(ret, 0); 530 531 ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); 532 ASSERT_EQ(ret, 0); 533 534 mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 535 check_mounted(_metadata, mnts, 3); 536 537 ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); 538 ASSERT_EQ(ret, 0); 539 540 expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); 541 verify_mount_ids(_metadata, mnts, mnts2, 2); 542 check_mounted(_metadata, mnts, 3); 543 544 // Cleanup 545 ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); 546 ASSERT_EQ(ret, 0); 547 548 ret = umount("/a/new"); 549 ASSERT_EQ(ret, 0); 550 551 ret = umount("/a"); 552 ASSERT_EQ(ret, 0); 553 554 check_mounted(_metadata, mnts, 1); 555 } 556 557 TEST_HARNESS_MAIN 558