1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <linux/nsfs.h>
11 #include <sys/capability.h>
12 #include <sys/ioctl.h>
13 #include <sys/prctl.h>
14 #include <sys/stat.h>
15 #include <sys/syscall.h>
16 #include <sys/types.h>
17 #include <sys/wait.h>
18 #include <unistd.h>
19 #include "../kselftest_harness.h"
20 #include "../filesystems/utils.h"
21 #include "wrappers.h"
22
23 /*
24 * Test that unprivileged users can only see namespaces they're currently in.
25 * Create a namespace, drop privileges, verify we can only see our own namespaces.
26 */
TEST(listns_unprivileged_current_only)27 TEST(listns_unprivileged_current_only)
28 {
29 struct ns_id_req req = {
30 .size = sizeof(req),
31 .spare = 0,
32 .ns_id = 0,
33 .ns_type = CLONE_NEWNET,
34 .spare2 = 0,
35 .user_ns_id = 0,
36 };
37 __u64 ns_ids[100];
38 ssize_t ret;
39 int pipefd[2];
40 pid_t pid;
41 int status;
42 bool found_ours;
43 int unexpected_count;
44
45 ASSERT_EQ(pipe(pipefd), 0);
46
47 pid = fork();
48 ASSERT_GE(pid, 0);
49
50 if (pid == 0) {
51 int fd;
52 __u64 our_netns_id;
53 bool found_ours;
54 int unexpected_count;
55
56 close(pipefd[0]);
57
58 /* Create user namespace to be unprivileged */
59 if (setup_userns() < 0) {
60 close(pipefd[1]);
61 exit(1);
62 }
63
64 /* Create a network namespace */
65 if (unshare(CLONE_NEWNET) < 0) {
66 close(pipefd[1]);
67 exit(1);
68 }
69
70 /* Get our network namespace ID */
71 fd = open("/proc/self/ns/net", O_RDONLY);
72 if (fd < 0) {
73 close(pipefd[1]);
74 exit(1);
75 }
76
77 if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) {
78 close(fd);
79 close(pipefd[1]);
80 exit(1);
81 }
82 close(fd);
83
84 /* Now we're unprivileged - list all network namespaces */
85 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
86 if (ret < 0) {
87 close(pipefd[1]);
88 exit(1);
89 }
90
91 /* We should only see our own network namespace */
92 found_ours = false;
93 unexpected_count = 0;
94
95 for (ssize_t i = 0; i < ret; i++) {
96 if (ns_ids[i] == our_netns_id) {
97 found_ours = true;
98 } else {
99 /* This is either init_net (which we can see) or unexpected */
100 unexpected_count++;
101 }
102 }
103
104 /* Send results to parent */
105 write(pipefd[1], &found_ours, sizeof(found_ours));
106 write(pipefd[1], &unexpected_count, sizeof(unexpected_count));
107 close(pipefd[1]);
108 exit(0);
109 }
110
111 /* Parent */
112 close(pipefd[1]);
113
114 found_ours = false;
115 unexpected_count = 0;
116 read(pipefd[0], &found_ours, sizeof(found_ours));
117 read(pipefd[0], &unexpected_count, sizeof(unexpected_count));
118 close(pipefd[0]);
119
120 waitpid(pid, &status, 0);
121 ASSERT_TRUE(WIFEXITED(status));
122 ASSERT_EQ(WEXITSTATUS(status), 0);
123
124 /* Child should have seen its own namespace */
125 ASSERT_TRUE(found_ours);
126
127 TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)",
128 unexpected_count);
129 }
130
131 /*
132 * Test that users with CAP_SYS_ADMIN in a user namespace can see
133 * all namespaces owned by that user namespace.
134 */
TEST(listns_cap_sys_admin_in_userns)135 TEST(listns_cap_sys_admin_in_userns)
136 {
137 struct ns_id_req req = {
138 .size = sizeof(req),
139 .spare = 0,
140 .ns_id = 0,
141 .ns_type = 0, /* All types */
142 .spare2 = 0,
143 .user_ns_id = 0, /* Will be set to our created user namespace */
144 };
145 __u64 ns_ids[100];
146 int pipefd[2];
147 pid_t pid;
148 int status;
149 bool success;
150 ssize_t count;
151
152 ASSERT_EQ(pipe(pipefd), 0);
153
154 pid = fork();
155 ASSERT_GE(pid, 0);
156
157 if (pid == 0) {
158 int fd;
159 __u64 userns_id;
160 ssize_t ret;
161 int min_expected;
162 bool success;
163
164 close(pipefd[0]);
165
166 /* Create user namespace - we'll have CAP_SYS_ADMIN in it */
167 if (setup_userns() < 0) {
168 close(pipefd[1]);
169 exit(1);
170 }
171
172 /* Get the user namespace ID */
173 fd = open("/proc/self/ns/user", O_RDONLY);
174 if (fd < 0) {
175 close(pipefd[1]);
176 exit(1);
177 }
178
179 if (ioctl(fd, NS_GET_ID, &userns_id) < 0) {
180 close(fd);
181 close(pipefd[1]);
182 exit(1);
183 }
184 close(fd);
185
186 /* Create several namespaces owned by this user namespace */
187 unshare(CLONE_NEWNET);
188 unshare(CLONE_NEWUTS);
189 unshare(CLONE_NEWIPC);
190
191 /* List namespaces owned by our user namespace */
192 req.user_ns_id = userns_id;
193 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
194 if (ret < 0) {
195 close(pipefd[1]);
196 exit(1);
197 }
198
199 /*
200 * We have CAP_SYS_ADMIN in this user namespace,
201 * so we should see all namespaces owned by it.
202 * That includes: net, uts, ipc, and the user namespace itself.
203 */
204 min_expected = 4;
205 success = (ret >= min_expected);
206
207 write(pipefd[1], &success, sizeof(success));
208 write(pipefd[1], &ret, sizeof(ret));
209 close(pipefd[1]);
210 exit(0);
211 }
212
213 /* Parent */
214 close(pipefd[1]);
215
216 success = false;
217 count = 0;
218 read(pipefd[0], &success, sizeof(success));
219 read(pipefd[0], &count, sizeof(count));
220 close(pipefd[0]);
221
222 waitpid(pid, &status, 0);
223 ASSERT_TRUE(WIFEXITED(status));
224 ASSERT_EQ(WEXITSTATUS(status), 0);
225
226 ASSERT_TRUE(success);
227 TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace",
228 count);
229 }
230
231 /*
232 * Test that users cannot see namespaces from unrelated user namespaces.
233 * Create two sibling user namespaces, verify they can't see each other's
234 * owned namespaces.
235 */
TEST(listns_cannot_see_sibling_userns_namespaces)236 TEST(listns_cannot_see_sibling_userns_namespaces)
237 {
238 int pipefd[2];
239 pid_t pid1, pid2;
240 int status;
241 __u64 netns_a_id;
242 int pipefd2[2];
243 bool found_sibling_netns;
244
245 ASSERT_EQ(pipe(pipefd), 0);
246
247 /* Fork first child - creates user namespace A */
248 pid1 = fork();
249 ASSERT_GE(pid1, 0);
250
251 if (pid1 == 0) {
252 int fd;
253 __u64 netns_a_id;
254 char buf;
255
256 close(pipefd[0]);
257
258 /* Create user namespace A */
259 if (setup_userns() < 0) {
260 close(pipefd[1]);
261 exit(1);
262 }
263
264 /* Create network namespace owned by user namespace A */
265 if (unshare(CLONE_NEWNET) < 0) {
266 close(pipefd[1]);
267 exit(1);
268 }
269
270 /* Get network namespace ID */
271 fd = open("/proc/self/ns/net", O_RDONLY);
272 if (fd < 0) {
273 close(pipefd[1]);
274 exit(1);
275 }
276
277 if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) {
278 close(fd);
279 close(pipefd[1]);
280 exit(1);
281 }
282 close(fd);
283
284 /* Send namespace ID to parent */
285 write(pipefd[1], &netns_a_id, sizeof(netns_a_id));
286
287 /* Keep alive for sibling to check */
288 read(pipefd[1], &buf, 1);
289 close(pipefd[1]);
290 exit(0);
291 }
292
293 /* Parent reads namespace A ID */
294 close(pipefd[1]);
295 netns_a_id = 0;
296 read(pipefd[0], &netns_a_id, sizeof(netns_a_id));
297
298 TH_LOG("User namespace A created network namespace with ID %llu",
299 (unsigned long long)netns_a_id);
300
301 /* Fork second child - creates user namespace B */
302 ASSERT_EQ(pipe(pipefd2), 0);
303
304 pid2 = fork();
305 ASSERT_GE(pid2, 0);
306
307 if (pid2 == 0) {
308 struct ns_id_req req = {
309 .size = sizeof(req),
310 .spare = 0,
311 .ns_id = 0,
312 .ns_type = CLONE_NEWNET,
313 .spare2 = 0,
314 .user_ns_id = 0,
315 };
316 __u64 ns_ids[100];
317 ssize_t ret;
318 bool found_sibling_netns;
319
320 close(pipefd[0]);
321 close(pipefd2[0]);
322
323 /* Create user namespace B (sibling to A) */
324 if (setup_userns() < 0) {
325 close(pipefd2[1]);
326 exit(1);
327 }
328
329 /* Try to list all network namespaces */
330 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
331
332 found_sibling_netns = false;
333 if (ret > 0) {
334 for (ssize_t i = 0; i < ret; i++) {
335 if (ns_ids[i] == netns_a_id) {
336 found_sibling_netns = true;
337 break;
338 }
339 }
340 }
341
342 /* We should NOT see the sibling's network namespace */
343 write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns));
344 close(pipefd2[1]);
345 exit(0);
346 }
347
348 /* Parent reads result from second child */
349 close(pipefd2[1]);
350 found_sibling_netns = false;
351 read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns));
352 close(pipefd2[0]);
353
354 /* Signal first child to exit */
355 close(pipefd[0]);
356
357 /* Wait for both children */
358 waitpid(pid2, &status, 0);
359 ASSERT_TRUE(WIFEXITED(status));
360
361 waitpid(pid1, &status, 0);
362 ASSERT_TRUE(WIFEXITED(status));
363
364 /* Second child should NOT have seen first child's namespace */
365 ASSERT_FALSE(found_sibling_netns);
366 TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace");
367 }
368
369 /*
370 * Test permission checking with LISTNS_CURRENT_USER.
371 * Verify that listing with LISTNS_CURRENT_USER respects permissions.
372 */
TEST(listns_current_user_permissions)373 TEST(listns_current_user_permissions)
374 {
375 int pipefd[2];
376 pid_t pid;
377 int status;
378 bool success;
379 ssize_t count;
380
381 ASSERT_EQ(pipe(pipefd), 0);
382
383 pid = fork();
384 ASSERT_GE(pid, 0);
385
386 if (pid == 0) {
387 struct ns_id_req req = {
388 .size = sizeof(req),
389 .spare = 0,
390 .ns_id = 0,
391 .ns_type = 0,
392 .spare2 = 0,
393 .user_ns_id = LISTNS_CURRENT_USER,
394 };
395 __u64 ns_ids[100];
396 ssize_t ret;
397 bool success;
398
399 close(pipefd[0]);
400
401 /* Create user namespace */
402 if (setup_userns() < 0) {
403 close(pipefd[1]);
404 exit(1);
405 }
406
407 /* Create some namespaces owned by this user namespace */
408 if (unshare(CLONE_NEWNET) < 0) {
409 close(pipefd[1]);
410 exit(1);
411 }
412
413 if (unshare(CLONE_NEWUTS) < 0) {
414 close(pipefd[1]);
415 exit(1);
416 }
417
418 /* List with LISTNS_CURRENT_USER - should see our owned namespaces */
419 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
420
421 success = (ret >= 3); /* At least user, net, uts */
422 write(pipefd[1], &success, sizeof(success));
423 write(pipefd[1], &ret, sizeof(ret));
424 close(pipefd[1]);
425 exit(0);
426 }
427
428 /* Parent */
429 close(pipefd[1]);
430
431 success = false;
432 count = 0;
433 read(pipefd[0], &success, sizeof(success));
434 read(pipefd[0], &count, sizeof(count));
435 close(pipefd[0]);
436
437 waitpid(pid, &status, 0);
438 ASSERT_TRUE(WIFEXITED(status));
439 ASSERT_EQ(WEXITSTATUS(status), 0);
440
441 ASSERT_TRUE(success);
442 TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count);
443 }
444
445 /*
446 * Test that CAP_SYS_ADMIN in parent user namespace allows seeing
447 * child user namespace's owned namespaces.
448 */
TEST(listns_parent_userns_cap_sys_admin)449 TEST(listns_parent_userns_cap_sys_admin)
450 {
451 int pipefd[2];
452 pid_t pid;
453 int status;
454 bool found_child_userns;
455 ssize_t count;
456
457 ASSERT_EQ(pipe(pipefd), 0);
458
459 pid = fork();
460 ASSERT_GE(pid, 0);
461
462 if (pid == 0) {
463 int fd;
464 __u64 parent_userns_id;
465 __u64 child_userns_id;
466 struct ns_id_req req;
467 __u64 ns_ids[100];
468 ssize_t ret;
469 bool found_child_userns;
470
471 close(pipefd[0]);
472
473 /* Create parent user namespace - we have CAP_SYS_ADMIN in it */
474 if (setup_userns() < 0) {
475 close(pipefd[1]);
476 exit(1);
477 }
478
479 /* Get parent user namespace ID */
480 fd = open("/proc/self/ns/user", O_RDONLY);
481 if (fd < 0) {
482 close(pipefd[1]);
483 exit(1);
484 }
485
486 if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) {
487 close(fd);
488 close(pipefd[1]);
489 exit(1);
490 }
491 close(fd);
492
493 /* Create child user namespace */
494 if (setup_userns() < 0) {
495 close(pipefd[1]);
496 exit(1);
497 }
498
499 /* Get child user namespace ID */
500 fd = open("/proc/self/ns/user", O_RDONLY);
501 if (fd < 0) {
502 close(pipefd[1]);
503 exit(1);
504 }
505
506 if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
507 close(fd);
508 close(pipefd[1]);
509 exit(1);
510 }
511 close(fd);
512
513 /* Create namespaces owned by child user namespace */
514 if (unshare(CLONE_NEWNET) < 0) {
515 close(pipefd[1]);
516 exit(1);
517 }
518
519 /* List namespaces owned by parent user namespace */
520 req.size = sizeof(req);
521 req.spare = 0;
522 req.ns_id = 0;
523 req.ns_type = 0;
524 req.spare2 = 0;
525 req.user_ns_id = parent_userns_id;
526
527 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
528
529 /* Should see child user namespace in the list */
530 found_child_userns = false;
531 if (ret > 0) {
532 for (ssize_t i = 0; i < ret; i++) {
533 if (ns_ids[i] == child_userns_id) {
534 found_child_userns = true;
535 break;
536 }
537 }
538 }
539
540 write(pipefd[1], &found_child_userns, sizeof(found_child_userns));
541 write(pipefd[1], &ret, sizeof(ret));
542 close(pipefd[1]);
543 exit(0);
544 }
545
546 /* Parent */
547 close(pipefd[1]);
548
549 found_child_userns = false;
550 count = 0;
551 read(pipefd[0], &found_child_userns, sizeof(found_child_userns));
552 read(pipefd[0], &count, sizeof(count));
553 close(pipefd[0]);
554
555 waitpid(pid, &status, 0);
556 ASSERT_TRUE(WIFEXITED(status));
557 ASSERT_EQ(WEXITSTATUS(status), 0);
558
559 ASSERT_TRUE(found_child_userns);
560 TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)",
561 count);
562 }
563
564 /*
565 * Test that we can see user namespaces we have CAP_SYS_ADMIN inside of.
566 * This is different from seeing namespaces owned by a user namespace.
567 */
TEST(listns_cap_sys_admin_inside_userns)568 TEST(listns_cap_sys_admin_inside_userns)
569 {
570 int pipefd[2];
571 pid_t pid;
572 int status;
573 bool found_ours;
574
575 ASSERT_EQ(pipe(pipefd), 0);
576
577 pid = fork();
578 ASSERT_GE(pid, 0);
579
580 if (pid == 0) {
581 int fd;
582 __u64 our_userns_id;
583 struct ns_id_req req;
584 __u64 ns_ids[100];
585 ssize_t ret;
586 bool found_ours;
587
588 close(pipefd[0]);
589
590 /* Create user namespace - we have CAP_SYS_ADMIN inside it */
591 if (setup_userns() < 0) {
592 close(pipefd[1]);
593 exit(1);
594 }
595
596 /* Get our user namespace ID */
597 fd = open("/proc/self/ns/user", O_RDONLY);
598 if (fd < 0) {
599 close(pipefd[1]);
600 exit(1);
601 }
602
603 if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) {
604 close(fd);
605 close(pipefd[1]);
606 exit(1);
607 }
608 close(fd);
609
610 /* List all user namespaces globally */
611 req.size = sizeof(req);
612 req.spare = 0;
613 req.ns_id = 0;
614 req.ns_type = CLONE_NEWUSER;
615 req.spare2 = 0;
616 req.user_ns_id = 0;
617
618 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
619
620 /* We should be able to see our own user namespace */
621 found_ours = false;
622 if (ret > 0) {
623 for (ssize_t i = 0; i < ret; i++) {
624 if (ns_ids[i] == our_userns_id) {
625 found_ours = true;
626 break;
627 }
628 }
629 }
630
631 write(pipefd[1], &found_ours, sizeof(found_ours));
632 close(pipefd[1]);
633 exit(0);
634 }
635
636 /* Parent */
637 close(pipefd[1]);
638
639 found_ours = false;
640 read(pipefd[0], &found_ours, sizeof(found_ours));
641 close(pipefd[0]);
642
643 waitpid(pid, &status, 0);
644 ASSERT_TRUE(WIFEXITED(status));
645 ASSERT_EQ(WEXITSTATUS(status), 0);
646
647 ASSERT_TRUE(found_ours);
648 TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of");
649 }
650
651 /*
652 * Test that dropping CAP_SYS_ADMIN restricts what we can see.
653 */
TEST(listns_drop_cap_sys_admin)654 TEST(listns_drop_cap_sys_admin)
655 {
656 cap_t caps;
657 cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
658
659 /* This test needs to start with CAP_SYS_ADMIN */
660 caps = cap_get_proc();
661 if (!caps) {
662 SKIP(return, "Cannot get capabilities");
663 }
664
665 cap_flag_value_t cap_val;
666 if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) {
667 cap_free(caps);
668 SKIP(return, "Cannot check CAP_SYS_ADMIN");
669 }
670
671 if (cap_val != CAP_SET) {
672 cap_free(caps);
673 SKIP(return, "Test needs CAP_SYS_ADMIN to start");
674 }
675 cap_free(caps);
676
677 int pipefd[2];
678 pid_t pid;
679 int status;
680 bool correct;
681 ssize_t count_before, count_after;
682
683 ASSERT_EQ(pipe(pipefd), 0);
684
685 pid = fork();
686 ASSERT_GE(pid, 0);
687
688 if (pid == 0) {
689 struct ns_id_req req = {
690 .size = sizeof(req),
691 .spare = 0,
692 .ns_id = 0,
693 .ns_type = CLONE_NEWNET,
694 .spare2 = 0,
695 .user_ns_id = LISTNS_CURRENT_USER,
696 };
697 __u64 ns_ids_before[100];
698 ssize_t count_before;
699 __u64 ns_ids_after[100];
700 ssize_t count_after;
701 bool correct;
702
703 close(pipefd[0]);
704
705 /* Create user namespace */
706 if (setup_userns() < 0) {
707 close(pipefd[1]);
708 exit(1);
709 }
710
711 /* Count namespaces with CAP_SYS_ADMIN */
712 count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
713
714 /* Drop CAP_SYS_ADMIN */
715 caps = cap_get_proc();
716 if (caps) {
717 cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR);
718 cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR);
719 cap_set_proc(caps);
720 cap_free(caps);
721 }
722
723 /* Ensure we can't regain the capability */
724 prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
725
726 /* Count namespaces without CAP_SYS_ADMIN */
727 count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
728
729 /* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */
730 correct = (count_after <= count_before);
731
732 write(pipefd[1], &correct, sizeof(correct));
733 write(pipefd[1], &count_before, sizeof(count_before));
734 write(pipefd[1], &count_after, sizeof(count_after));
735 close(pipefd[1]);
736 exit(0);
737 }
738
739 /* Parent */
740 close(pipefd[1]);
741
742 correct = false;
743 count_before = 0;
744 count_after = 0;
745 read(pipefd[0], &correct, sizeof(correct));
746 read(pipefd[0], &count_before, sizeof(count_before));
747 read(pipefd[0], &count_after, sizeof(count_after));
748 close(pipefd[0]);
749
750 waitpid(pid, &status, 0);
751 ASSERT_TRUE(WIFEXITED(status));
752 ASSERT_EQ(WEXITSTATUS(status), 0);
753
754 ASSERT_TRUE(correct);
755 TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces",
756 count_before, count_after);
757 }
758
759 TEST_HARNESS_MAIN
760