1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <limits.h> 6 #include <sched.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <linux/nsfs.h> 11 #include <sys/mount.h> 12 #include <sys/stat.h> 13 #include <sys/types.h> 14 #include <sys/wait.h> 15 #include <unistd.h> 16 #include "../kselftest_harness.h" 17 #include "../filesystems/utils.h" 18 19 #ifndef FD_NSFS_ROOT 20 #define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ 21 #endif 22 23 /* 24 * Test that initial namespaces can be reopened via file handle. 25 * Initial namespaces should have active ref count of 1 from boot. 26 */ 27 TEST(init_ns_always_active) 28 { 29 struct file_handle *handle; 30 int mount_id; 31 int ret; 32 int fd1, fd2; 33 struct stat st1, st2; 34 35 handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); 36 ASSERT_NE(handle, NULL); 37 38 /* Open initial network namespace */ 39 fd1 = open("/proc/1/ns/net", O_RDONLY); 40 ASSERT_GE(fd1, 0); 41 42 /* Get file handle for initial namespace */ 43 handle->handle_bytes = MAX_HANDLE_SZ; 44 ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH); 45 if (ret < 0 && errno == EOPNOTSUPP) { 46 SKIP(free(handle); close(fd1); 47 return, "nsfs doesn't support file handles"); 48 } 49 ASSERT_EQ(ret, 0); 50 51 /* Close the namespace fd */ 52 close(fd1); 53 54 /* Try to reopen via file handle - should succeed since init ns is always active */ 55 fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 56 if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { 57 SKIP(free(handle); 58 return, "open_by_handle_at with FD_NSFS_ROOT not supported"); 59 } 60 ASSERT_GE(fd2, 0); 61 62 /* Verify we opened the same namespace */ 63 fd1 = open("/proc/1/ns/net", O_RDONLY); 64 ASSERT_GE(fd1, 0); 65 ASSERT_EQ(fstat(fd1, &st1), 0); 66 ASSERT_EQ(fstat(fd2, &st2), 0); 67 ASSERT_EQ(st1.st_ino, st2.st_ino); 68 69 close(fd1); 70 close(fd2); 71 free(handle); 72 } 73 74 /* 75 * Test namespace lifecycle: create a namespace in a child process, 76 * get a file handle while it's active, then try to reopen after 77 * the process exits (namespace becomes inactive). 78 */ 79 TEST(ns_inactive_after_exit) 80 { 81 struct file_handle *handle; 82 int mount_id; 83 int ret; 84 int fd; 85 int pipefd[2]; 86 pid_t pid; 87 int status; 88 char buf[sizeof(*handle) + MAX_HANDLE_SZ]; 89 90 /* Create pipe for passing file handle from child */ 91 ASSERT_EQ(pipe(pipefd), 0); 92 93 pid = fork(); 94 ASSERT_GE(pid, 0); 95 96 if (pid == 0) { 97 /* Child process */ 98 close(pipefd[0]); 99 100 /* Create new network namespace */ 101 ret = unshare(CLONE_NEWNET); 102 if (ret < 0) { 103 close(pipefd[1]); 104 exit(1); 105 } 106 107 /* Open our new namespace */ 108 fd = open("/proc/self/ns/net", O_RDONLY); 109 if (fd < 0) { 110 close(pipefd[1]); 111 exit(1); 112 } 113 114 /* Get file handle for the namespace */ 115 handle = (struct file_handle *)buf; 116 handle->handle_bytes = MAX_HANDLE_SZ; 117 ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH); 118 close(fd); 119 120 if (ret < 0) { 121 close(pipefd[1]); 122 exit(1); 123 } 124 125 /* Send handle to parent */ 126 write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes); 127 close(pipefd[1]); 128 129 /* Exit - namespace should become inactive */ 130 exit(0); 131 } 132 133 /* Parent process */ 134 close(pipefd[1]); 135 136 /* Read file handle from child */ 137 ret = read(pipefd[0], buf, sizeof(buf)); 138 close(pipefd[0]); 139 140 /* Wait for child to exit */ 141 waitpid(pid, &status, 0); 142 ASSERT_TRUE(WIFEXITED(status)); 143 ASSERT_EQ(WEXITSTATUS(status), 0); 144 145 ASSERT_GT(ret, 0); 146 handle = (struct file_handle *)buf; 147 148 /* Try to reopen namespace - should fail with ENOENT since it's inactive */ 149 fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 150 ASSERT_LT(fd, 0); 151 /* Should fail with ENOENT (namespace inactive) or ESTALE */ 152 ASSERT_TRUE(errno == ENOENT || errno == ESTALE); 153 } 154 155 /* 156 * Test that a namespace remains active while a process is using it, 157 * even after the creating process exits. 158 */ 159 TEST(ns_active_with_multiple_processes) 160 { 161 struct file_handle *handle; 162 int mount_id; 163 int ret; 164 int fd; 165 int pipefd[2]; 166 int syncpipe[2]; 167 pid_t pid1, pid2; 168 int status; 169 char buf[sizeof(*handle) + MAX_HANDLE_SZ]; 170 char sync_byte; 171 172 /* Create pipes for communication */ 173 ASSERT_EQ(pipe(pipefd), 0); 174 ASSERT_EQ(pipe(syncpipe), 0); 175 176 pid1 = fork(); 177 ASSERT_GE(pid1, 0); 178 179 if (pid1 == 0) { 180 /* First child - creates namespace */ 181 close(pipefd[0]); 182 close(syncpipe[1]); 183 184 /* Create new network namespace */ 185 ret = unshare(CLONE_NEWNET); 186 if (ret < 0) { 187 close(pipefd[1]); 188 close(syncpipe[0]); 189 exit(1); 190 } 191 192 /* Open and get handle */ 193 fd = open("/proc/self/ns/net", O_RDONLY); 194 if (fd < 0) { 195 close(pipefd[1]); 196 close(syncpipe[0]); 197 exit(1); 198 } 199 200 handle = (struct file_handle *)buf; 201 handle->handle_bytes = MAX_HANDLE_SZ; 202 ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH); 203 close(fd); 204 205 if (ret < 0) { 206 close(pipefd[1]); 207 close(syncpipe[0]); 208 exit(1); 209 } 210 211 /* Send handle to parent */ 212 write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes); 213 close(pipefd[1]); 214 215 /* Wait for signal before exiting */ 216 read(syncpipe[0], &sync_byte, 1); 217 close(syncpipe[0]); 218 exit(0); 219 } 220 221 /* Parent reads handle */ 222 close(pipefd[1]); 223 ret = read(pipefd[0], buf, sizeof(buf)); 224 close(pipefd[0]); 225 ASSERT_GT(ret, 0); 226 227 handle = (struct file_handle *)buf; 228 229 /* Create second child that will keep namespace active */ 230 pid2 = fork(); 231 ASSERT_GE(pid2, 0); 232 233 if (pid2 == 0) { 234 /* Second child - reopens the namespace */ 235 close(syncpipe[0]); 236 close(syncpipe[1]); 237 238 /* Open the namespace via handle */ 239 fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 240 if (fd < 0) { 241 exit(1); 242 } 243 244 /* Join the namespace */ 245 ret = setns(fd, CLONE_NEWNET); 246 close(fd); 247 if (ret < 0) { 248 exit(1); 249 } 250 251 /* Sleep to keep namespace active */ 252 sleep(1); 253 exit(0); 254 } 255 256 /* Let second child enter the namespace */ 257 usleep(100000); /* 100ms */ 258 259 /* Signal first child to exit */ 260 close(syncpipe[0]); 261 sync_byte = 'X'; 262 write(syncpipe[1], &sync_byte, 1); 263 close(syncpipe[1]); 264 265 /* Wait for first child */ 266 waitpid(pid1, &status, 0); 267 ASSERT_TRUE(WIFEXITED(status)); 268 269 /* Namespace should still be active because second child is using it */ 270 fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 271 ASSERT_GE(fd, 0); 272 close(fd); 273 274 /* Wait for second child */ 275 waitpid(pid2, &status, 0); 276 ASSERT_TRUE(WIFEXITED(status)); 277 } 278 279 /* 280 * Test user namespace active ref tracking via credential lifecycle 281 */ 282 TEST(userns_active_ref_lifecycle) 283 { 284 struct file_handle *handle; 285 int mount_id; 286 int ret; 287 int fd; 288 int pipefd[2]; 289 pid_t pid; 290 int status; 291 char buf[sizeof(*handle) + MAX_HANDLE_SZ]; 292 293 ASSERT_EQ(pipe(pipefd), 0); 294 295 pid = fork(); 296 ASSERT_GE(pid, 0); 297 298 if (pid == 0) { 299 /* Child process */ 300 close(pipefd[0]); 301 302 /* Create new user namespace */ 303 ret = unshare(CLONE_NEWUSER); 304 if (ret < 0) { 305 close(pipefd[1]); 306 exit(1); 307 } 308 309 /* Set up uid/gid mappings */ 310 int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); 311 int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); 312 int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); 313 314 if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) { 315 write(setgroups_fd, "deny", 4); 316 close(setgroups_fd); 317 318 char mapping[64]; 319 snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); 320 write(uid_map_fd, mapping, strlen(mapping)); 321 close(uid_map_fd); 322 323 snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); 324 write(gid_map_fd, mapping, strlen(mapping)); 325 close(gid_map_fd); 326 } 327 328 /* Get file handle */ 329 fd = open("/proc/self/ns/user", O_RDONLY); 330 if (fd < 0) { 331 close(pipefd[1]); 332 exit(1); 333 } 334 335 handle = (struct file_handle *)buf; 336 handle->handle_bytes = MAX_HANDLE_SZ; 337 ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH); 338 close(fd); 339 340 if (ret < 0) { 341 close(pipefd[1]); 342 exit(1); 343 } 344 345 /* Send handle to parent */ 346 write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes); 347 close(pipefd[1]); 348 exit(0); 349 } 350 351 /* Parent */ 352 close(pipefd[1]); 353 ret = read(pipefd[0], buf, sizeof(buf)); 354 close(pipefd[0]); 355 356 waitpid(pid, &status, 0); 357 ASSERT_TRUE(WIFEXITED(status)); 358 ASSERT_EQ(WEXITSTATUS(status), 0); 359 360 ASSERT_GT(ret, 0); 361 handle = (struct file_handle *)buf; 362 363 /* Namespace should be inactive after all tasks exit */ 364 fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 365 ASSERT_LT(fd, 0); 366 ASSERT_TRUE(errno == ENOENT || errno == ESTALE); 367 } 368 369 /* 370 * Test PID namespace active ref tracking 371 */ 372 TEST(pidns_active_ref_lifecycle) 373 { 374 struct file_handle *handle; 375 int mount_id; 376 int ret; 377 int fd; 378 int pipefd[2]; 379 pid_t pid; 380 int status; 381 char buf[sizeof(*handle) + MAX_HANDLE_SZ]; 382 383 ASSERT_EQ(pipe(pipefd), 0); 384 385 pid = fork(); 386 ASSERT_GE(pid, 0); 387 388 if (pid == 0) { 389 /* Child process */ 390 close(pipefd[0]); 391 392 /* Create new PID namespace */ 393 ret = unshare(CLONE_NEWPID); 394 if (ret < 0) { 395 close(pipefd[1]); 396 exit(1); 397 } 398 399 /* Fork to actually enter the PID namespace */ 400 pid_t child = fork(); 401 if (child < 0) { 402 close(pipefd[1]); 403 exit(1); 404 } 405 406 if (child == 0) { 407 /* Grandchild - in new PID namespace */ 408 fd = open("/proc/self/ns/pid", O_RDONLY); 409 if (fd < 0) { 410 exit(1); 411 } 412 413 handle = (struct file_handle *)buf; 414 handle->handle_bytes = MAX_HANDLE_SZ; 415 ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH); 416 close(fd); 417 418 if (ret < 0) { 419 exit(1); 420 } 421 422 /* Send handle to grandparent */ 423 write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes); 424 close(pipefd[1]); 425 exit(0); 426 } 427 428 /* Wait for grandchild */ 429 waitpid(child, NULL, 0); 430 exit(0); 431 } 432 433 /* Parent */ 434 close(pipefd[1]); 435 ret = read(pipefd[0], buf, sizeof(buf)); 436 close(pipefd[0]); 437 438 waitpid(pid, &status, 0); 439 ASSERT_TRUE(WIFEXITED(status)); 440 ASSERT_EQ(WEXITSTATUS(status), 0); 441 442 ASSERT_GT(ret, 0); 443 handle = (struct file_handle *)buf; 444 445 /* Namespace should be inactive after all processes exit */ 446 fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 447 ASSERT_LT(fd, 0); 448 ASSERT_TRUE(errno == ENOENT || errno == ESTALE); 449 } 450 451 /* 452 * Test that an open file descriptor keeps a namespace active. 453 * Even after the creating process exits, the namespace should remain 454 * active as long as an fd is held open. 455 */ 456 TEST(ns_fd_keeps_active) 457 { 458 struct file_handle *handle; 459 int mount_id; 460 int ret; 461 int nsfd; 462 int pipe_child_ready[2]; 463 int pipe_parent_ready[2]; 464 pid_t pid; 465 int status; 466 char buf[sizeof(*handle) + MAX_HANDLE_SZ]; 467 char sync_byte; 468 char proc_path[64]; 469 470 ASSERT_EQ(pipe(pipe_child_ready), 0); 471 ASSERT_EQ(pipe(pipe_parent_ready), 0); 472 473 pid = fork(); 474 ASSERT_GE(pid, 0); 475 476 if (pid == 0) { 477 /* Child process */ 478 close(pipe_child_ready[0]); 479 close(pipe_parent_ready[1]); 480 481 TH_LOG("Child: creating new network namespace"); 482 483 /* Create new network namespace */ 484 ret = unshare(CLONE_NEWNET); 485 if (ret < 0) { 486 TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno)); 487 close(pipe_child_ready[1]); 488 close(pipe_parent_ready[0]); 489 exit(1); 490 } 491 492 TH_LOG("Child: network namespace created successfully"); 493 494 /* Get file handle for the namespace */ 495 nsfd = open("/proc/self/ns/net", O_RDONLY); 496 if (nsfd < 0) { 497 TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno)); 498 close(pipe_child_ready[1]); 499 close(pipe_parent_ready[0]); 500 exit(1); 501 } 502 503 TH_LOG("Child: opened namespace fd %d", nsfd); 504 505 handle = (struct file_handle *)buf; 506 handle->handle_bytes = MAX_HANDLE_SZ; 507 ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH); 508 close(nsfd); 509 510 if (ret < 0) { 511 TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno)); 512 close(pipe_child_ready[1]); 513 close(pipe_parent_ready[0]); 514 exit(1); 515 } 516 517 TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes); 518 519 /* Send file handle to parent */ 520 ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes); 521 TH_LOG("Child: sent %d bytes of file handle to parent", ret); 522 close(pipe_child_ready[1]); 523 524 /* Wait for parent to open the fd */ 525 TH_LOG("Child: waiting for parent to open fd"); 526 ret = read(pipe_parent_ready[0], &sync_byte, 1); 527 close(pipe_parent_ready[0]); 528 529 TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret); 530 /* Exit - namespace should stay active because parent holds fd */ 531 exit(0); 532 } 533 534 /* Parent process */ 535 close(pipe_child_ready[1]); 536 close(pipe_parent_ready[0]); 537 538 TH_LOG("Parent: reading file handle from child"); 539 540 /* Read file handle from child */ 541 ret = read(pipe_child_ready[0], buf, sizeof(buf)); 542 close(pipe_child_ready[0]); 543 ASSERT_GT(ret, 0); 544 handle = (struct file_handle *)buf; 545 546 TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes); 547 548 /* Open the child's namespace while it's still alive */ 549 snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid); 550 TH_LOG("Parent: opening child's namespace at %s", proc_path); 551 nsfd = open(proc_path, O_RDONLY); 552 if (nsfd < 0) { 553 TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno)); 554 close(pipe_parent_ready[1]); 555 kill(pid, SIGKILL); 556 waitpid(pid, NULL, 0); 557 SKIP(return, "Failed to open child's namespace"); 558 } 559 560 TH_LOG("Parent: opened child's namespace, got fd %d", nsfd); 561 562 /* Signal child that we have the fd */ 563 sync_byte = 'G'; 564 write(pipe_parent_ready[1], &sync_byte, 1); 565 close(pipe_parent_ready[1]); 566 TH_LOG("Parent: signaled child that we have the fd"); 567 568 /* Wait for child to exit */ 569 waitpid(pid, &status, 0); 570 ASSERT_TRUE(WIFEXITED(status)); 571 ASSERT_EQ(WEXITSTATUS(status), 0); 572 573 TH_LOG("Child exited, parent holds fd %d to namespace", nsfd); 574 575 /* 576 * Namespace should still be ACTIVE because we hold an fd. 577 * We should be able to reopen it via file handle. 578 */ 579 TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)"); 580 int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 581 ASSERT_GE(fd2, 0); 582 583 TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2); 584 585 /* Verify it's the same namespace */ 586 struct stat st1, st2; 587 ASSERT_EQ(fstat(nsfd, &st1), 0); 588 ASSERT_EQ(fstat(fd2, &st2), 0); 589 TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino); 590 ASSERT_EQ(st1.st_ino, st2.st_ino); 591 close(fd2); 592 593 /* Now close the fd - namespace should become inactive */ 594 TH_LOG("Closing fd %d - namespace should become inactive", nsfd); 595 close(nsfd); 596 597 /* Now reopening should fail - namespace is inactive */ 598 TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)"); 599 fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); 600 ASSERT_LT(fd2, 0); 601 /* Should fail with ENOENT (inactive) or ESTALE (gone) */ 602 TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno); 603 ASSERT_TRUE(errno == ENOENT || errno == ESTALE); 604 } 605 606 TEST_HARNESS_MAIN 607