1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <limits.h> 6 #include <sched.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <linux/nsfs.h> 11 #include <sys/capability.h> 12 #include <sys/ioctl.h> 13 #include <sys/prctl.h> 14 #include <sys/stat.h> 15 #include <sys/syscall.h> 16 #include <sys/types.h> 17 #include <sys/wait.h> 18 #include <unistd.h> 19 #include "../kselftest_harness.h" 20 #include "../filesystems/utils.h" 21 #include "wrappers.h" 22 23 /* 24 * Test that unprivileged users can only see namespaces they're currently in. 25 * Create a namespace, drop privileges, verify we can only see our own namespaces. 26 */ 27 TEST(listns_unprivileged_current_only) 28 { 29 struct ns_id_req req = { 30 .size = sizeof(req), 31 .spare = 0, 32 .ns_id = 0, 33 .ns_type = CLONE_NEWNET, 34 .spare2 = 0, 35 .user_ns_id = 0, 36 }; 37 __u64 ns_ids[100]; 38 ssize_t ret; 39 int pipefd[2]; 40 pid_t pid; 41 int status; 42 bool found_ours; 43 int unexpected_count; 44 45 ASSERT_EQ(pipe(pipefd), 0); 46 47 pid = fork(); 48 ASSERT_GE(pid, 0); 49 50 if (pid == 0) { 51 int fd; 52 __u64 our_netns_id; 53 bool found_ours; 54 int unexpected_count; 55 56 close(pipefd[0]); 57 58 /* Create user namespace to be unprivileged */ 59 if (setup_userns() < 0) { 60 close(pipefd[1]); 61 exit(1); 62 } 63 64 /* Create a network namespace */ 65 if (unshare(CLONE_NEWNET) < 0) { 66 close(pipefd[1]); 67 exit(1); 68 } 69 70 /* Get our network namespace ID */ 71 fd = open("/proc/self/ns/net", O_RDONLY); 72 if (fd < 0) { 73 close(pipefd[1]); 74 exit(1); 75 } 76 77 if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) { 78 close(fd); 79 close(pipefd[1]); 80 exit(1); 81 } 82 close(fd); 83 84 /* Now we're unprivileged - list all network namespaces */ 85 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 86 if (ret < 0) { 87 close(pipefd[1]); 88 exit(1); 89 } 90 91 /* We should only see our own network namespace */ 92 found_ours = false; 93 unexpected_count = 0; 94 95 for (ssize_t i = 0; i < ret; i++) { 96 if (ns_ids[i] == our_netns_id) { 97 found_ours = true; 98 } else { 99 /* This is either init_net (which we can see) or unexpected */ 100 unexpected_count++; 101 } 102 } 103 104 /* Send results to parent */ 105 write(pipefd[1], &found_ours, sizeof(found_ours)); 106 write(pipefd[1], &unexpected_count, sizeof(unexpected_count)); 107 close(pipefd[1]); 108 exit(0); 109 } 110 111 /* Parent */ 112 close(pipefd[1]); 113 114 found_ours = false; 115 unexpected_count = 0; 116 read(pipefd[0], &found_ours, sizeof(found_ours)); 117 read(pipefd[0], &unexpected_count, sizeof(unexpected_count)); 118 close(pipefd[0]); 119 120 waitpid(pid, &status, 0); 121 ASSERT_TRUE(WIFEXITED(status)); 122 ASSERT_EQ(WEXITSTATUS(status), 0); 123 124 /* Child should have seen its own namespace */ 125 ASSERT_TRUE(found_ours); 126 127 TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)", 128 unexpected_count); 129 } 130 131 /* 132 * Test that users with CAP_SYS_ADMIN in a user namespace can see 133 * all namespaces owned by that user namespace. 134 */ 135 TEST(listns_cap_sys_admin_in_userns) 136 { 137 struct ns_id_req req = { 138 .size = sizeof(req), 139 .spare = 0, 140 .ns_id = 0, 141 .ns_type = 0, /* All types */ 142 .spare2 = 0, 143 .user_ns_id = 0, /* Will be set to our created user namespace */ 144 }; 145 __u64 ns_ids[100]; 146 int pipefd[2]; 147 pid_t pid; 148 int status; 149 bool success; 150 ssize_t count; 151 152 ASSERT_EQ(pipe(pipefd), 0); 153 154 pid = fork(); 155 ASSERT_GE(pid, 0); 156 157 if (pid == 0) { 158 int fd; 159 __u64 userns_id; 160 ssize_t ret; 161 int min_expected; 162 bool success; 163 164 close(pipefd[0]); 165 166 /* Create user namespace - we'll have CAP_SYS_ADMIN in it */ 167 if (setup_userns() < 0) { 168 close(pipefd[1]); 169 exit(1); 170 } 171 172 /* Get the user namespace ID */ 173 fd = open("/proc/self/ns/user", O_RDONLY); 174 if (fd < 0) { 175 close(pipefd[1]); 176 exit(1); 177 } 178 179 if (ioctl(fd, NS_GET_ID, &userns_id) < 0) { 180 close(fd); 181 close(pipefd[1]); 182 exit(1); 183 } 184 close(fd); 185 186 /* Create several namespaces owned by this user namespace */ 187 unshare(CLONE_NEWNET); 188 unshare(CLONE_NEWUTS); 189 unshare(CLONE_NEWIPC); 190 191 /* List namespaces owned by our user namespace */ 192 req.user_ns_id = userns_id; 193 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 194 if (ret < 0) { 195 close(pipefd[1]); 196 exit(1); 197 } 198 199 /* 200 * We have CAP_SYS_ADMIN in this user namespace, 201 * so we should see all namespaces owned by it. 202 * That includes: net, uts, ipc, and the user namespace itself. 203 */ 204 min_expected = 4; 205 success = (ret >= min_expected); 206 207 write(pipefd[1], &success, sizeof(success)); 208 write(pipefd[1], &ret, sizeof(ret)); 209 close(pipefd[1]); 210 exit(0); 211 } 212 213 /* Parent */ 214 close(pipefd[1]); 215 216 success = false; 217 count = 0; 218 read(pipefd[0], &success, sizeof(success)); 219 read(pipefd[0], &count, sizeof(count)); 220 close(pipefd[0]); 221 222 waitpid(pid, &status, 0); 223 ASSERT_TRUE(WIFEXITED(status)); 224 ASSERT_EQ(WEXITSTATUS(status), 0); 225 226 ASSERT_TRUE(success); 227 TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace", 228 count); 229 } 230 231 /* 232 * Test that users cannot see namespaces from unrelated user namespaces. 233 * Create two sibling user namespaces, verify they can't see each other's 234 * owned namespaces. 235 */ 236 TEST(listns_cannot_see_sibling_userns_namespaces) 237 { 238 int pipefd[2]; 239 pid_t pid1, pid2; 240 int status; 241 __u64 netns_a_id; 242 int pipefd2[2]; 243 bool found_sibling_netns; 244 245 ASSERT_EQ(pipe(pipefd), 0); 246 247 /* Fork first child - creates user namespace A */ 248 pid1 = fork(); 249 ASSERT_GE(pid1, 0); 250 251 if (pid1 == 0) { 252 int fd; 253 __u64 netns_a_id; 254 char buf; 255 256 close(pipefd[0]); 257 258 /* Create user namespace A */ 259 if (setup_userns() < 0) { 260 close(pipefd[1]); 261 exit(1); 262 } 263 264 /* Create network namespace owned by user namespace A */ 265 if (unshare(CLONE_NEWNET) < 0) { 266 close(pipefd[1]); 267 exit(1); 268 } 269 270 /* Get network namespace ID */ 271 fd = open("/proc/self/ns/net", O_RDONLY); 272 if (fd < 0) { 273 close(pipefd[1]); 274 exit(1); 275 } 276 277 if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) { 278 close(fd); 279 close(pipefd[1]); 280 exit(1); 281 } 282 close(fd); 283 284 /* Send namespace ID to parent */ 285 write(pipefd[1], &netns_a_id, sizeof(netns_a_id)); 286 287 /* Keep alive for sibling to check */ 288 read(pipefd[1], &buf, 1); 289 close(pipefd[1]); 290 exit(0); 291 } 292 293 /* Parent reads namespace A ID */ 294 close(pipefd[1]); 295 netns_a_id = 0; 296 read(pipefd[0], &netns_a_id, sizeof(netns_a_id)); 297 298 TH_LOG("User namespace A created network namespace with ID %llu", 299 (unsigned long long)netns_a_id); 300 301 /* Fork second child - creates user namespace B */ 302 ASSERT_EQ(pipe(pipefd2), 0); 303 304 pid2 = fork(); 305 ASSERT_GE(pid2, 0); 306 307 if (pid2 == 0) { 308 struct ns_id_req req = { 309 .size = sizeof(req), 310 .spare = 0, 311 .ns_id = 0, 312 .ns_type = CLONE_NEWNET, 313 .spare2 = 0, 314 .user_ns_id = 0, 315 }; 316 __u64 ns_ids[100]; 317 ssize_t ret; 318 bool found_sibling_netns; 319 320 close(pipefd[0]); 321 close(pipefd2[0]); 322 323 /* Create user namespace B (sibling to A) */ 324 if (setup_userns() < 0) { 325 close(pipefd2[1]); 326 exit(1); 327 } 328 329 /* Try to list all network namespaces */ 330 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 331 332 found_sibling_netns = false; 333 if (ret > 0) { 334 for (ssize_t i = 0; i < ret; i++) { 335 if (ns_ids[i] == netns_a_id) { 336 found_sibling_netns = true; 337 break; 338 } 339 } 340 } 341 342 /* We should NOT see the sibling's network namespace */ 343 write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns)); 344 close(pipefd2[1]); 345 exit(0); 346 } 347 348 /* Parent reads result from second child */ 349 close(pipefd2[1]); 350 found_sibling_netns = false; 351 read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns)); 352 close(pipefd2[0]); 353 354 /* Signal first child to exit */ 355 close(pipefd[0]); 356 357 /* Wait for both children */ 358 waitpid(pid2, &status, 0); 359 ASSERT_TRUE(WIFEXITED(status)); 360 361 waitpid(pid1, &status, 0); 362 ASSERT_TRUE(WIFEXITED(status)); 363 364 /* Second child should NOT have seen first child's namespace */ 365 ASSERT_FALSE(found_sibling_netns); 366 TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace"); 367 } 368 369 /* 370 * Test permission checking with LISTNS_CURRENT_USER. 371 * Verify that listing with LISTNS_CURRENT_USER respects permissions. 372 */ 373 TEST(listns_current_user_permissions) 374 { 375 int pipefd[2]; 376 pid_t pid; 377 int status; 378 bool success; 379 ssize_t count; 380 381 ASSERT_EQ(pipe(pipefd), 0); 382 383 pid = fork(); 384 ASSERT_GE(pid, 0); 385 386 if (pid == 0) { 387 struct ns_id_req req = { 388 .size = sizeof(req), 389 .spare = 0, 390 .ns_id = 0, 391 .ns_type = 0, 392 .spare2 = 0, 393 .user_ns_id = LISTNS_CURRENT_USER, 394 }; 395 __u64 ns_ids[100]; 396 ssize_t ret; 397 bool success; 398 399 close(pipefd[0]); 400 401 /* Create user namespace */ 402 if (setup_userns() < 0) { 403 close(pipefd[1]); 404 exit(1); 405 } 406 407 /* Create some namespaces owned by this user namespace */ 408 if (unshare(CLONE_NEWNET) < 0) { 409 close(pipefd[1]); 410 exit(1); 411 } 412 413 if (unshare(CLONE_NEWUTS) < 0) { 414 close(pipefd[1]); 415 exit(1); 416 } 417 418 /* List with LISTNS_CURRENT_USER - should see our owned namespaces */ 419 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 420 421 success = (ret >= 3); /* At least user, net, uts */ 422 write(pipefd[1], &success, sizeof(success)); 423 write(pipefd[1], &ret, sizeof(ret)); 424 close(pipefd[1]); 425 exit(0); 426 } 427 428 /* Parent */ 429 close(pipefd[1]); 430 431 success = false; 432 count = 0; 433 read(pipefd[0], &success, sizeof(success)); 434 read(pipefd[0], &count, sizeof(count)); 435 close(pipefd[0]); 436 437 waitpid(pid, &status, 0); 438 ASSERT_TRUE(WIFEXITED(status)); 439 ASSERT_EQ(WEXITSTATUS(status), 0); 440 441 ASSERT_TRUE(success); 442 TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count); 443 } 444 445 /* 446 * Test that CAP_SYS_ADMIN in parent user namespace allows seeing 447 * child user namespace's owned namespaces. 448 */ 449 TEST(listns_parent_userns_cap_sys_admin) 450 { 451 int pipefd[2]; 452 pid_t pid; 453 int status; 454 bool found_child_userns; 455 ssize_t count; 456 457 ASSERT_EQ(pipe(pipefd), 0); 458 459 pid = fork(); 460 ASSERT_GE(pid, 0); 461 462 if (pid == 0) { 463 int fd; 464 __u64 parent_userns_id; 465 __u64 child_userns_id; 466 struct ns_id_req req; 467 __u64 ns_ids[100]; 468 ssize_t ret; 469 bool found_child_userns; 470 471 close(pipefd[0]); 472 473 /* Create parent user namespace - we have CAP_SYS_ADMIN in it */ 474 if (setup_userns() < 0) { 475 close(pipefd[1]); 476 exit(1); 477 } 478 479 /* Get parent user namespace ID */ 480 fd = open("/proc/self/ns/user", O_RDONLY); 481 if (fd < 0) { 482 close(pipefd[1]); 483 exit(1); 484 } 485 486 if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) { 487 close(fd); 488 close(pipefd[1]); 489 exit(1); 490 } 491 close(fd); 492 493 /* Create child user namespace */ 494 if (setup_userns() < 0) { 495 close(pipefd[1]); 496 exit(1); 497 } 498 499 /* Get child user namespace ID */ 500 fd = open("/proc/self/ns/user", O_RDONLY); 501 if (fd < 0) { 502 close(pipefd[1]); 503 exit(1); 504 } 505 506 if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) { 507 close(fd); 508 close(pipefd[1]); 509 exit(1); 510 } 511 close(fd); 512 513 /* Create namespaces owned by child user namespace */ 514 if (unshare(CLONE_NEWNET) < 0) { 515 close(pipefd[1]); 516 exit(1); 517 } 518 519 /* List namespaces owned by parent user namespace */ 520 req.size = sizeof(req); 521 req.spare = 0; 522 req.ns_id = 0; 523 req.ns_type = 0; 524 req.spare2 = 0; 525 req.user_ns_id = parent_userns_id; 526 527 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 528 529 /* Should see child user namespace in the list */ 530 found_child_userns = false; 531 if (ret > 0) { 532 for (ssize_t i = 0; i < ret; i++) { 533 if (ns_ids[i] == child_userns_id) { 534 found_child_userns = true; 535 break; 536 } 537 } 538 } 539 540 write(pipefd[1], &found_child_userns, sizeof(found_child_userns)); 541 write(pipefd[1], &ret, sizeof(ret)); 542 close(pipefd[1]); 543 exit(0); 544 } 545 546 /* Parent */ 547 close(pipefd[1]); 548 549 found_child_userns = false; 550 count = 0; 551 read(pipefd[0], &found_child_userns, sizeof(found_child_userns)); 552 read(pipefd[0], &count, sizeof(count)); 553 close(pipefd[0]); 554 555 waitpid(pid, &status, 0); 556 ASSERT_TRUE(WIFEXITED(status)); 557 ASSERT_EQ(WEXITSTATUS(status), 0); 558 559 ASSERT_TRUE(found_child_userns); 560 TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)", 561 count); 562 } 563 564 /* 565 * Test that we can see user namespaces we have CAP_SYS_ADMIN inside of. 566 * This is different from seeing namespaces owned by a user namespace. 567 */ 568 TEST(listns_cap_sys_admin_inside_userns) 569 { 570 int pipefd[2]; 571 pid_t pid; 572 int status; 573 bool found_ours; 574 575 ASSERT_EQ(pipe(pipefd), 0); 576 577 pid = fork(); 578 ASSERT_GE(pid, 0); 579 580 if (pid == 0) { 581 int fd; 582 __u64 our_userns_id; 583 struct ns_id_req req; 584 __u64 ns_ids[100]; 585 ssize_t ret; 586 bool found_ours; 587 588 close(pipefd[0]); 589 590 /* Create user namespace - we have CAP_SYS_ADMIN inside it */ 591 if (setup_userns() < 0) { 592 close(pipefd[1]); 593 exit(1); 594 } 595 596 /* Get our user namespace ID */ 597 fd = open("/proc/self/ns/user", O_RDONLY); 598 if (fd < 0) { 599 close(pipefd[1]); 600 exit(1); 601 } 602 603 if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) { 604 close(fd); 605 close(pipefd[1]); 606 exit(1); 607 } 608 close(fd); 609 610 /* List all user namespaces globally */ 611 req.size = sizeof(req); 612 req.spare = 0; 613 req.ns_id = 0; 614 req.ns_type = CLONE_NEWUSER; 615 req.spare2 = 0; 616 req.user_ns_id = 0; 617 618 ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0); 619 620 /* We should be able to see our own user namespace */ 621 found_ours = false; 622 if (ret > 0) { 623 for (ssize_t i = 0; i < ret; i++) { 624 if (ns_ids[i] == our_userns_id) { 625 found_ours = true; 626 break; 627 } 628 } 629 } 630 631 write(pipefd[1], &found_ours, sizeof(found_ours)); 632 close(pipefd[1]); 633 exit(0); 634 } 635 636 /* Parent */ 637 close(pipefd[1]); 638 639 found_ours = false; 640 read(pipefd[0], &found_ours, sizeof(found_ours)); 641 close(pipefd[0]); 642 643 waitpid(pid, &status, 0); 644 ASSERT_TRUE(WIFEXITED(status)); 645 ASSERT_EQ(WEXITSTATUS(status), 0); 646 647 ASSERT_TRUE(found_ours); 648 TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of"); 649 } 650 651 /* 652 * Test that dropping CAP_SYS_ADMIN restricts what we can see. 653 */ 654 TEST(listns_drop_cap_sys_admin) 655 { 656 cap_t caps; 657 cap_value_t cap_list[1] = { CAP_SYS_ADMIN }; 658 659 /* This test needs to start with CAP_SYS_ADMIN */ 660 caps = cap_get_proc(); 661 if (!caps) { 662 SKIP(return, "Cannot get capabilities"); 663 } 664 665 cap_flag_value_t cap_val; 666 if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) { 667 cap_free(caps); 668 SKIP(return, "Cannot check CAP_SYS_ADMIN"); 669 } 670 671 if (cap_val != CAP_SET) { 672 cap_free(caps); 673 SKIP(return, "Test needs CAP_SYS_ADMIN to start"); 674 } 675 cap_free(caps); 676 677 int pipefd[2]; 678 pid_t pid; 679 int status; 680 bool correct; 681 ssize_t count_before, count_after; 682 683 ASSERT_EQ(pipe(pipefd), 0); 684 685 pid = fork(); 686 ASSERT_GE(pid, 0); 687 688 if (pid == 0) { 689 struct ns_id_req req = { 690 .size = sizeof(req), 691 .spare = 0, 692 .ns_id = 0, 693 .ns_type = CLONE_NEWNET, 694 .spare2 = 0, 695 .user_ns_id = LISTNS_CURRENT_USER, 696 }; 697 __u64 ns_ids_before[100]; 698 ssize_t count_before; 699 __u64 ns_ids_after[100]; 700 ssize_t count_after; 701 bool correct; 702 703 close(pipefd[0]); 704 705 /* Create user namespace */ 706 if (setup_userns() < 0) { 707 close(pipefd[1]); 708 exit(1); 709 } 710 711 /* Count namespaces with CAP_SYS_ADMIN */ 712 count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 713 714 /* Drop CAP_SYS_ADMIN */ 715 caps = cap_get_proc(); 716 if (caps) { 717 cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR); 718 cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR); 719 cap_set_proc(caps); 720 cap_free(caps); 721 } 722 723 /* Ensure we can't regain the capability */ 724 prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 725 726 /* Count namespaces without CAP_SYS_ADMIN */ 727 count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 728 729 /* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */ 730 correct = (count_after <= count_before); 731 732 write(pipefd[1], &correct, sizeof(correct)); 733 write(pipefd[1], &count_before, sizeof(count_before)); 734 write(pipefd[1], &count_after, sizeof(count_after)); 735 close(pipefd[1]); 736 exit(0); 737 } 738 739 /* Parent */ 740 close(pipefd[1]); 741 742 correct = false; 743 count_before = 0; 744 count_after = 0; 745 read(pipefd[0], &correct, sizeof(correct)); 746 read(pipefd[0], &count_before, sizeof(count_before)); 747 read(pipefd[0], &count_after, sizeof(count_after)); 748 close(pipefd[0]); 749 750 waitpid(pid, &status, 0); 751 ASSERT_TRUE(WIFEXITED(status)); 752 ASSERT_EQ(WEXITSTATUS(status), 0); 753 754 ASSERT_TRUE(correct); 755 TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces", 756 count_before, count_after); 757 } 758 759 TEST_HARNESS_MAIN 760