xref: /linux/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c (revision 3191df0a4882c827cac29925e80ecb1775b904bd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
3 
4 #define _GNU_SOURCE
5 
6 // Needed for linux/fanotify.h
7 typedef struct {
8 	int	val[2];
9 } __kernel_fsid_t;
10 #define __kernel_fsid_t __kernel_fsid_t
11 
12 #include <fcntl.h>
13 #include <sched.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/mount.h>
18 #include <unistd.h>
19 #include <sys/syscall.h>
20 #include <sys/fanotify.h>
21 
22 #include "../../kselftest_harness.h"
23 #include "../statmount/statmount.h"
24 #include "../utils.h"
25 
26 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
27 
28 static const int mark_types[] = {
29 	FAN_MARK_FILESYSTEM,
30 	FAN_MARK_MOUNT,
31 	FAN_MARK_INODE
32 };
33 
34 static const int mark_cmds[] = {
35 	FAN_MARK_ADD,
36 	FAN_MARK_REMOVE,
37 	FAN_MARK_FLUSH
38 };
39 
40 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
41 
42 FIXTURE(fanotify) {
43 	int fan_fd[NUM_FAN_FDS];
44 	char buf[256];
45 	unsigned int rem;
46 	void *next;
47 	char root_mntpoint[sizeof(root_mntpoint_templ)];
48 	int orig_root;
49 	int orig_ns_fd;
50 	int ns_fd;
51 	uint64_t root_id;
52 };
53 
54 FIXTURE_SETUP(fanotify)
55 {
56 	int i, ret;
57 
58 	self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
59 	ASSERT_GE(self->orig_ns_fd, 0);
60 
61 	ret = setup_userns();
62 	ASSERT_EQ(ret, 0);
63 
64 	self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
65 	ASSERT_GE(self->ns_fd, 0);
66 
67 	strcpy(self->root_mntpoint, root_mntpoint_templ);
68 	ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
69 
70 	self->orig_root = open("/", O_PATH | O_CLOEXEC);
71 	ASSERT_GE(self->orig_root, 0);
72 
73 	ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
74 
75 	ASSERT_EQ(chroot(self->root_mntpoint), 0);
76 
77 	ASSERT_EQ(chdir("/"), 0);
78 
79 	ASSERT_EQ(mkdir("a", 0700), 0);
80 
81 	ASSERT_EQ(mkdir("b", 0700), 0);
82 
83 	self->root_id = get_unique_mnt_id("/");
84 	ASSERT_NE(self->root_id, 0);
85 
86 	for (i = 0; i < NUM_FAN_FDS; i++) {
87 		int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
88 		// Verify that watching tmpfs mounted inside userns is allowed
89 		ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
90 				    FAN_OPEN, AT_FDCWD, "/");
91 		ASSERT_EQ(ret, 0);
92 		// ...but watching entire orig root filesystem is not allowed
93 		ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
94 				    FAN_OPEN, self->orig_root, ".");
95 		ASSERT_NE(ret, 0);
96 		close(fan_fd);
97 
98 		self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
99 						0);
100 		ASSERT_GE(self->fan_fd[i], 0);
101 		// Verify that watching mntns where group was created is allowed
102 		ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
103 				    FAN_MARK_MNTNS,
104 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
105 				    self->ns_fd, NULL);
106 		ASSERT_EQ(ret, 0);
107 		// ...but watching orig mntns is not allowed
108 		ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
109 				    FAN_MARK_MNTNS,
110 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
111 				    self->orig_ns_fd, NULL);
112 		ASSERT_NE(ret, 0);
113 		// On fd[0] we do an extra ADD that changes nothing.
114 		// On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
115 		ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
116 				    FAN_MARK_MNTNS,
117 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
118 				    self->ns_fd, NULL);
119 		ASSERT_EQ(ret, 0);
120 	}
121 
122 	self->rem = 0;
123 }
124 
125 FIXTURE_TEARDOWN(fanotify)
126 {
127 	int i;
128 
129 	ASSERT_EQ(self->rem, 0);
130 	for (i = 0; i < NUM_FAN_FDS; i++)
131 		close(self->fan_fd[i]);
132 
133 	ASSERT_EQ(fchdir(self->orig_root), 0);
134 
135 	ASSERT_EQ(chroot("."), 0);
136 
137 	EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
138 	EXPECT_EQ(chdir(self->root_mntpoint), 0);
139 	EXPECT_EQ(chdir("/"), 0);
140 	EXPECT_EQ(rmdir(self->root_mntpoint), 0);
141 }
142 
143 static uint64_t expect_notify(struct __test_metadata *const _metadata,
144 			      FIXTURE_DATA(fanotify) *self,
145 			      uint64_t *mask)
146 {
147 	struct fanotify_event_metadata *meta;
148 	struct fanotify_event_info_mnt *mnt;
149 	unsigned int thislen;
150 
151 	if (!self->rem) {
152 		ssize_t len;
153 		int i;
154 
155 		for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
156 			len = read(self->fan_fd[i], self->buf,
157 				   sizeof(self->buf));
158 			if (i > 0) {
159 				// Groups 1,2 should get EAGAIN
160 				ASSERT_EQ(len, -1);
161 				ASSERT_EQ(errno, EAGAIN);
162 			} else {
163 				// Group 0 should get events
164 				ASSERT_GT(len, 0);
165 			}
166 		}
167 
168 		self->rem = len;
169 		self->next = (void *) self->buf;
170 	}
171 
172 	meta = self->next;
173 	ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
174 
175 	thislen = meta->event_len;
176 	self->rem -= thislen;
177 	self->next += thislen;
178 
179 	*mask = meta->mask;
180 	thislen -= sizeof(*meta);
181 
182 	mnt = ((void *) meta) + meta->event_len - thislen;
183 
184 	ASSERT_EQ(thislen, sizeof(*mnt));
185 
186 	return mnt->mnt_id;
187 }
188 
189 static void expect_notify_n(struct __test_metadata *const _metadata,
190 				 FIXTURE_DATA(fanotify) *self,
191 				 unsigned int n, uint64_t mask[], uint64_t mnts[])
192 {
193 	unsigned int i;
194 
195 	for (i = 0; i < n; i++)
196 		mnts[i] = expect_notify(_metadata, self, &mask[i]);
197 }
198 
199 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
200 				   FIXTURE_DATA(fanotify) *self,
201 				   uint64_t expect_mask)
202 {
203 	uint64_t mntid, mask;
204 
205 	mntid = expect_notify(_metadata, self, &mask);
206 	ASSERT_EQ(expect_mask, mask);
207 
208 	return mntid;
209 }
210 
211 
212 static void expect_notify_mask_n(struct __test_metadata *const _metadata,
213 				 FIXTURE_DATA(fanotify) *self,
214 				 uint64_t mask, unsigned int n, uint64_t mnts[])
215 {
216 	unsigned int i;
217 
218 	for (i = 0; i < n; i++)
219 		mnts[i] = expect_notify_mask(_metadata, self, mask);
220 }
221 
222 static void verify_mount_ids(struct __test_metadata *const _metadata,
223 			     const uint64_t list1[], const uint64_t list2[],
224 			     size_t num)
225 {
226 	unsigned int i, j;
227 
228 	// Check that neither list has any duplicates
229 	for (i = 0; i < num; i++) {
230 		for (j = 0; j < num; j++) {
231 			if (i != j) {
232 				ASSERT_NE(list1[i], list1[j]);
233 				ASSERT_NE(list2[i], list2[j]);
234 			}
235 		}
236 	}
237 	// Check that all list1 memebers can be found in list2. Together with
238 	// the above it means that the list1 and list2 represent the same sets.
239 	for (i = 0; i < num; i++) {
240 		for (j = 0; j < num; j++) {
241 			if (list1[i] == list2[j])
242 				break;
243 		}
244 		ASSERT_NE(j, num);
245 	}
246 }
247 
248 static void check_mounted(struct __test_metadata *const _metadata,
249 			  const uint64_t mnts[], size_t num)
250 {
251 	ssize_t ret;
252 	uint64_t *list;
253 
254 	list = malloc((num + 1) * sizeof(list[0]));
255 	ASSERT_NE(list, NULL);
256 
257 	ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
258 	ASSERT_EQ(ret, num);
259 
260 	verify_mount_ids(_metadata, mnts, list, num);
261 
262 	free(list);
263 }
264 
265 static void setup_mount_tree(struct __test_metadata *const _metadata,
266 			    int log2_num)
267 {
268 	int ret, i;
269 
270 	ret = mount("", "/", NULL, MS_SHARED, NULL);
271 	ASSERT_EQ(ret, 0);
272 
273 	for (i = 0; i < log2_num; i++) {
274 		ret = mount("/", "/", NULL, MS_BIND, NULL);
275 		ASSERT_EQ(ret, 0);
276 	}
277 }
278 
279 TEST_F(fanotify, bind)
280 {
281 	int ret;
282 	uint64_t mnts[2] = { self->root_id };
283 
284 	ret = mount("/", "/", NULL, MS_BIND, NULL);
285 	ASSERT_EQ(ret, 0);
286 
287 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
288 	ASSERT_NE(mnts[0], mnts[1]);
289 
290 	check_mounted(_metadata, mnts, 2);
291 
292 	// Cleanup
293 	uint64_t detach_id;
294 	ret = umount("/");
295 	ASSERT_EQ(ret, 0);
296 
297 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
298 	ASSERT_EQ(detach_id, mnts[1]);
299 
300 	check_mounted(_metadata, mnts, 1);
301 }
302 
303 TEST_F(fanotify, move)
304 {
305 	int ret;
306 	uint64_t mnts[2] = { self->root_id };
307 	uint64_t move_id;
308 
309 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
310 	ASSERT_EQ(ret, 0);
311 
312 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
313 	ASSERT_NE(mnts[0], mnts[1]);
314 
315 	check_mounted(_metadata, mnts, 2);
316 
317 	ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
318 	ASSERT_EQ(ret, 0);
319 
320 	move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
321 	ASSERT_EQ(move_id, mnts[1]);
322 
323 	// Cleanup
324 	ret = umount("/b");
325 	ASSERT_EQ(ret, 0);
326 
327 	check_mounted(_metadata, mnts, 1);
328 }
329 
330 TEST_F(fanotify, propagate)
331 {
332 	const unsigned int log2_num = 4;
333 	const unsigned int num = (1 << log2_num);
334 	uint64_t mnts[num];
335 
336 	setup_mount_tree(_metadata, log2_num);
337 
338 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
339 
340 	mnts[0] = self->root_id;
341 	check_mounted(_metadata, mnts, num);
342 
343 	// Cleanup
344 	int ret;
345 	uint64_t mnts2[num];
346 	ret = umount2("/", MNT_DETACH);
347 	ASSERT_EQ(ret, 0);
348 
349 	ret = mount("", "/", NULL, MS_PRIVATE, NULL);
350 	ASSERT_EQ(ret, 0);
351 
352 	mnts2[0] = self->root_id;
353 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
354 	verify_mount_ids(_metadata, mnts, mnts2, num);
355 
356 	check_mounted(_metadata, mnts, 1);
357 }
358 
359 TEST_F(fanotify, fsmount)
360 {
361 	int ret, fs, mnt;
362 	uint64_t mnts[2] = { self->root_id };
363 
364 	fs = fsopen("tmpfs", 0);
365 	ASSERT_GE(fs, 0);
366 
367 	ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
368 	ASSERT_EQ(ret, 0);
369 
370 	mnt = fsmount(fs, 0, 0);
371 	ASSERT_GE(mnt, 0);
372 
373 	close(fs);
374 
375 	ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
376 	ASSERT_EQ(ret, 0);
377 
378 	close(mnt);
379 
380 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
381 	ASSERT_NE(mnts[0], mnts[1]);
382 
383 	check_mounted(_metadata, mnts, 2);
384 
385 	// Cleanup
386 	uint64_t detach_id;
387 	ret = umount("/a");
388 	ASSERT_EQ(ret, 0);
389 
390 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
391 	ASSERT_EQ(detach_id, mnts[1]);
392 
393 	check_mounted(_metadata, mnts, 1);
394 }
395 
396 TEST_F(fanotify, reparent)
397 {
398 	uint64_t mnts[6] = { self->root_id };
399 	uint64_t dmnts[3];
400 	uint64_t masks[3];
401 	unsigned int i;
402 	int ret;
403 
404 	// Create setup with a[1] -> b[2] propagation
405 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
406 	ASSERT_EQ(ret, 0);
407 
408 	ret = mount("", "/a", NULL, MS_SHARED, NULL);
409 	ASSERT_EQ(ret, 0);
410 
411 	ret = mount("/a", "/b", NULL, MS_BIND, NULL);
412 	ASSERT_EQ(ret, 0);
413 
414 	ret = mount("", "/b", NULL, MS_SLAVE, NULL);
415 	ASSERT_EQ(ret, 0);
416 
417 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
418 
419 	check_mounted(_metadata, mnts, 3);
420 
421 	// Mount on a[3], which is propagated to b[4]
422 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
423 	ASSERT_EQ(ret, 0);
424 
425 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
426 
427 	check_mounted(_metadata, mnts, 5);
428 
429 	// Mount on b[5], not propagated
430 	ret = mount("/", "/b", NULL, MS_BIND, NULL);
431 	ASSERT_EQ(ret, 0);
432 
433 	mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
434 
435 	check_mounted(_metadata, mnts, 6);
436 
437 	// Umount a[3], which is propagated to b[4], but not b[5]
438 	// This will result in b[5] "falling" on b[2]
439 	ret = umount("/a");
440 	ASSERT_EQ(ret, 0);
441 
442 	expect_notify_n(_metadata, self, 3, masks, dmnts);
443 	verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
444 
445 	for (i = 0; i < 3; i++) {
446 		if (dmnts[i] == mnts[5]) {
447 			ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
448 		} else {
449 			ASSERT_EQ(masks[i], FAN_MNT_DETACH);
450 		}
451 	}
452 
453 	mnts[3] = mnts[5];
454 	check_mounted(_metadata, mnts, 4);
455 
456 	// Cleanup
457 	ret = umount("/b");
458 	ASSERT_EQ(ret, 0);
459 
460 	ret = umount("/a");
461 	ASSERT_EQ(ret, 0);
462 
463 	ret = umount("/b");
464 	ASSERT_EQ(ret, 0);
465 
466 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
467 	verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
468 
469 	check_mounted(_metadata, mnts, 1);
470 }
471 
472 TEST_F(fanotify, rmdir)
473 {
474 	uint64_t mnts[3] = { self->root_id };
475 	int ret;
476 
477 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
478 	ASSERT_EQ(ret, 0);
479 
480 	ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
481 	ASSERT_EQ(ret, 0);
482 
483 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
484 
485 	check_mounted(_metadata, mnts, 3);
486 
487 	ret = chdir("/a");
488 	ASSERT_EQ(ret, 0);
489 
490 	ret = fork();
491 	ASSERT_GE(ret, 0);
492 
493 	if (ret == 0) {
494 		chdir("/");
495 		unshare(CLONE_NEWNS);
496 		mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
497 		umount2("/a", MNT_DETACH);
498 		// This triggers a detach in the other namespace
499 		rmdir("/a");
500 		exit(0);
501 	}
502 	wait(NULL);
503 
504 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
505 	check_mounted(_metadata, mnts, 1);
506 
507 	// Cleanup
508 	ret = chdir("/");
509 	ASSERT_EQ(ret, 0);
510 }
511 
512 TEST_F(fanotify, pivot_root)
513 {
514 	uint64_t mnts[3] = { self->root_id };
515 	uint64_t mnts2[3];
516 	int ret;
517 
518 	ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
519 	ASSERT_EQ(ret, 0);
520 
521 	mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
522 
523 	ret = mkdir("/a/new", 0700);
524 	ASSERT_EQ(ret, 0);
525 
526 	ret = mkdir("/a/old", 0700);
527 	ASSERT_EQ(ret, 0);
528 
529 	ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
530 	ASSERT_EQ(ret, 0);
531 
532 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
533 	check_mounted(_metadata, mnts, 3);
534 
535 	ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
536 	ASSERT_EQ(ret, 0);
537 
538 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
539 	verify_mount_ids(_metadata, mnts, mnts2, 2);
540 	check_mounted(_metadata, mnts, 3);
541 
542 	// Cleanup
543 	ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
544 	ASSERT_EQ(ret, 0);
545 
546 	ret = umount("/a/new");
547 	ASSERT_EQ(ret, 0);
548 
549 	ret = umount("/a");
550 	ASSERT_EQ(ret, 0);
551 
552 	check_mounted(_metadata, mnts, 1);
553 }
554 
555 TEST_HARNESS_MAIN
556