1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <limits.h> 6 #include <sched.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/ioctl.h> 11 #include <sys/socket.h> 12 #include <sys/stat.h> 13 #include <sys/types.h> 14 #include <sys/wait.h> 15 #include <unistd.h> 16 #include <linux/if.h> 17 #include <linux/sockios.h> 18 #include <linux/nsfs.h> 19 #include <arpa/inet.h> 20 #include "../kselftest_harness.h" 21 #include "../filesystems/utils.h" 22 #include "wrappers.h" 23 24 #ifndef SIOCGSKNS 25 #define SIOCGSKNS 0x894C 26 #endif 27 28 #ifndef FD_NSFS_ROOT 29 #define FD_NSFS_ROOT -10003 30 #endif 31 32 #ifndef FILEID_NSFS 33 #define FILEID_NSFS 0xf1 34 #endif 35 36 /* 37 * Test basic SIOCGSKNS functionality. 38 * Create a socket and verify SIOCGSKNS returns the correct network namespace. 39 */ 40 TEST(siocgskns_basic) 41 { 42 int sock_fd, netns_fd, current_netns_fd; 43 struct stat st1, st2; 44 45 /* Create a TCP socket */ 46 sock_fd = socket(AF_INET, SOCK_STREAM, 0); 47 ASSERT_GE(sock_fd, 0); 48 49 /* Use SIOCGSKNS to get network namespace */ 50 netns_fd = ioctl(sock_fd, SIOCGSKNS); 51 if (netns_fd < 0) { 52 close(sock_fd); 53 if (errno == ENOTTY || errno == EINVAL) 54 SKIP(return, "SIOCGSKNS not supported"); 55 ASSERT_GE(netns_fd, 0); 56 } 57 58 /* Get current network namespace */ 59 current_netns_fd = open("/proc/self/ns/net", O_RDONLY); 60 ASSERT_GE(current_netns_fd, 0); 61 62 /* Verify they match */ 63 ASSERT_EQ(fstat(netns_fd, &st1), 0); 64 ASSERT_EQ(fstat(current_netns_fd, &st2), 0); 65 ASSERT_EQ(st1.st_ino, st2.st_ino); 66 67 close(sock_fd); 68 close(netns_fd); 69 close(current_netns_fd); 70 } 71 72 /* 73 * Test that socket file descriptors keep network namespaces active. 74 * Create a network namespace, create a socket in it, then exit the namespace. 75 * The namespace should remain active while the socket FD is held. 76 */ 77 TEST(siocgskns_keeps_netns_active) 78 { 79 int sock_fd, netns_fd, test_fd; 80 int ipc_sockets[2]; 81 pid_t pid; 82 int status; 83 struct stat st; 84 85 EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); 86 87 pid = fork(); 88 ASSERT_GE(pid, 0); 89 90 if (pid == 0) { 91 /* Child: create new netns and socket */ 92 close(ipc_sockets[0]); 93 94 if (unshare(CLONE_NEWNET) < 0) { 95 TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno)); 96 close(ipc_sockets[1]); 97 exit(1); 98 } 99 100 /* Create a socket in the new network namespace */ 101 sock_fd = socket(AF_INET, SOCK_DGRAM, 0); 102 if (sock_fd < 0) { 103 TH_LOG("socket() failed: %s", strerror(errno)); 104 close(ipc_sockets[1]); 105 exit(1); 106 } 107 108 /* Send socket FD to parent via SCM_RIGHTS */ 109 struct msghdr msg = {0}; 110 struct iovec iov = {0}; 111 char buf[1] = {'X'}; 112 char cmsg_buf[CMSG_SPACE(sizeof(int))]; 113 114 iov.iov_base = buf; 115 iov.iov_len = 1; 116 msg.msg_iov = &iov; 117 msg.msg_iovlen = 1; 118 msg.msg_control = cmsg_buf; 119 msg.msg_controllen = sizeof(cmsg_buf); 120 121 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 122 cmsg->cmsg_level = SOL_SOCKET; 123 cmsg->cmsg_type = SCM_RIGHTS; 124 cmsg->cmsg_len = CMSG_LEN(sizeof(int)); 125 memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int)); 126 127 if (sendmsg(ipc_sockets[1], &msg, 0) < 0) { 128 close(sock_fd); 129 close(ipc_sockets[1]); 130 exit(1); 131 } 132 133 close(sock_fd); 134 close(ipc_sockets[1]); 135 exit(0); 136 } 137 138 /* Parent: receive socket FD */ 139 close(ipc_sockets[1]); 140 141 struct msghdr msg = {0}; 142 struct iovec iov = {0}; 143 char buf[1]; 144 char cmsg_buf[CMSG_SPACE(sizeof(int))]; 145 146 iov.iov_base = buf; 147 iov.iov_len = 1; 148 msg.msg_iov = &iov; 149 msg.msg_iovlen = 1; 150 msg.msg_control = cmsg_buf; 151 msg.msg_controllen = sizeof(cmsg_buf); 152 153 ssize_t n = recvmsg(ipc_sockets[0], &msg, 0); 154 close(ipc_sockets[0]); 155 ASSERT_EQ(n, 1); 156 157 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 158 ASSERT_NE(cmsg, NULL); 159 ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); 160 161 memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int)); 162 163 /* Wait for child to exit */ 164 waitpid(pid, &status, 0); 165 ASSERT_TRUE(WIFEXITED(status)); 166 ASSERT_EQ(WEXITSTATUS(status), 0); 167 168 /* Get network namespace from socket */ 169 netns_fd = ioctl(sock_fd, SIOCGSKNS); 170 if (netns_fd < 0) { 171 close(sock_fd); 172 if (errno == ENOTTY || errno == EINVAL) 173 SKIP(return, "SIOCGSKNS not supported"); 174 ASSERT_GE(netns_fd, 0); 175 } 176 177 ASSERT_EQ(fstat(netns_fd, &st), 0); 178 179 /* 180 * Namespace should still be active because socket FD keeps it alive. 181 * Try to access it via /proc/self/fd/<fd>. 182 */ 183 char path[64]; 184 snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fd); 185 test_fd = open(path, O_RDONLY); 186 ASSERT_GE(test_fd, 0); 187 close(test_fd); 188 close(netns_fd); 189 190 /* Close socket - namespace should become inactive */ 191 close(sock_fd); 192 193 /* Try SIOCGSKNS again - should fail since socket is closed */ 194 ASSERT_LT(ioctl(sock_fd, SIOCGSKNS), 0); 195 } 196 197 /* 198 * Test SIOCGSKNS with different socket types (TCP, UDP, RAW). 199 */ 200 TEST(siocgskns_socket_types) 201 { 202 int sock_tcp, sock_udp, sock_raw; 203 int netns_tcp, netns_udp, netns_raw; 204 struct stat st_tcp, st_udp, st_raw; 205 206 /* TCP socket */ 207 sock_tcp = socket(AF_INET, SOCK_STREAM, 0); 208 ASSERT_GE(sock_tcp, 0); 209 210 /* UDP socket */ 211 sock_udp = socket(AF_INET, SOCK_DGRAM, 0); 212 ASSERT_GE(sock_udp, 0); 213 214 /* RAW socket (may require privileges) */ 215 sock_raw = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); 216 if (sock_raw < 0 && (errno == EPERM || errno == EACCES)) { 217 sock_raw = -1; /* Skip raw socket test */ 218 } 219 220 /* Test SIOCGSKNS on TCP */ 221 netns_tcp = ioctl(sock_tcp, SIOCGSKNS); 222 if (netns_tcp < 0) { 223 close(sock_tcp); 224 close(sock_udp); 225 if (sock_raw >= 0) close(sock_raw); 226 if (errno == ENOTTY || errno == EINVAL) 227 SKIP(return, "SIOCGSKNS not supported"); 228 ASSERT_GE(netns_tcp, 0); 229 } 230 231 /* Test SIOCGSKNS on UDP */ 232 netns_udp = ioctl(sock_udp, SIOCGSKNS); 233 ASSERT_GE(netns_udp, 0); 234 235 /* Test SIOCGSKNS on RAW (if available) */ 236 if (sock_raw >= 0) { 237 netns_raw = ioctl(sock_raw, SIOCGSKNS); 238 ASSERT_GE(netns_raw, 0); 239 } 240 241 /* Verify all return the same network namespace */ 242 ASSERT_EQ(fstat(netns_tcp, &st_tcp), 0); 243 ASSERT_EQ(fstat(netns_udp, &st_udp), 0); 244 ASSERT_EQ(st_tcp.st_ino, st_udp.st_ino); 245 246 if (sock_raw >= 0) { 247 ASSERT_EQ(fstat(netns_raw, &st_raw), 0); 248 ASSERT_EQ(st_tcp.st_ino, st_raw.st_ino); 249 close(netns_raw); 250 close(sock_raw); 251 } 252 253 close(netns_tcp); 254 close(netns_udp); 255 close(sock_tcp); 256 close(sock_udp); 257 } 258 259 /* 260 * Test SIOCGSKNS across setns. 261 * Create a socket in netns A, switch to netns B, verify SIOCGSKNS still 262 * returns netns A. 263 */ 264 TEST(siocgskns_across_setns) 265 { 266 int sock_fd, netns_a_fd, netns_b_fd, result_fd; 267 struct stat st_a; 268 269 /* Get current netns (A) */ 270 netns_a_fd = open("/proc/self/ns/net", O_RDONLY); 271 ASSERT_GE(netns_a_fd, 0); 272 ASSERT_EQ(fstat(netns_a_fd, &st_a), 0); 273 274 /* Create socket in netns A */ 275 sock_fd = socket(AF_INET, SOCK_STREAM, 0); 276 ASSERT_GE(sock_fd, 0); 277 278 /* Create new netns (B) */ 279 ASSERT_EQ(unshare(CLONE_NEWNET), 0); 280 281 netns_b_fd = open("/proc/self/ns/net", O_RDONLY); 282 ASSERT_GE(netns_b_fd, 0); 283 284 /* Get netns from socket created in A */ 285 result_fd = ioctl(sock_fd, SIOCGSKNS); 286 if (result_fd < 0) { 287 close(sock_fd); 288 setns(netns_a_fd, CLONE_NEWNET); 289 close(netns_a_fd); 290 close(netns_b_fd); 291 if (errno == ENOTTY || errno == EINVAL) 292 SKIP(return, "SIOCGSKNS not supported"); 293 ASSERT_GE(result_fd, 0); 294 } 295 296 /* Verify it still points to netns A */ 297 struct stat st_result_stat; 298 ASSERT_EQ(fstat(result_fd, &st_result_stat), 0); 299 ASSERT_EQ(st_a.st_ino, st_result_stat.st_ino); 300 301 close(result_fd); 302 close(sock_fd); 303 close(netns_b_fd); 304 305 /* Restore original netns */ 306 ASSERT_EQ(setns(netns_a_fd, CLONE_NEWNET), 0); 307 close(netns_a_fd); 308 } 309 310 /* 311 * Test SIOCGSKNS fails on non-socket file descriptors. 312 */ 313 TEST(siocgskns_non_socket) 314 { 315 int fd; 316 int pipefd[2]; 317 318 /* Test on regular file */ 319 fd = open("/dev/null", O_RDONLY); 320 ASSERT_GE(fd, 0); 321 322 ASSERT_LT(ioctl(fd, SIOCGSKNS), 0); 323 ASSERT_TRUE(errno == ENOTTY || errno == EINVAL); 324 close(fd); 325 326 /* Test on pipe */ 327 ASSERT_EQ(pipe(pipefd), 0); 328 329 ASSERT_LT(ioctl(pipefd[0], SIOCGSKNS), 0); 330 ASSERT_TRUE(errno == ENOTTY || errno == EINVAL); 331 332 close(pipefd[0]); 333 close(pipefd[1]); 334 } 335 336 /* 337 * Test multiple sockets keep the same network namespace active. 338 * Create multiple sockets, verify closing some doesn't affect others. 339 */ 340 TEST(siocgskns_multiple_sockets) 341 { 342 int socks[5]; 343 int netns_fds[5]; 344 int i; 345 struct stat st; 346 ino_t netns_ino; 347 348 /* Create new network namespace */ 349 ASSERT_EQ(unshare(CLONE_NEWNET), 0); 350 351 /* Create multiple sockets */ 352 for (i = 0; i < 5; i++) { 353 socks[i] = socket(AF_INET, SOCK_STREAM, 0); 354 ASSERT_GE(socks[i], 0); 355 } 356 357 /* Get netns from all sockets */ 358 for (i = 0; i < 5; i++) { 359 netns_fds[i] = ioctl(socks[i], SIOCGSKNS); 360 if (netns_fds[i] < 0) { 361 int j; 362 for (j = 0; j <= i; j++) { 363 close(socks[j]); 364 if (j < i && netns_fds[j] >= 0) 365 close(netns_fds[j]); 366 } 367 if (errno == ENOTTY || errno == EINVAL) 368 SKIP(return, "SIOCGSKNS not supported"); 369 ASSERT_GE(netns_fds[i], 0); 370 } 371 } 372 373 /* Verify all point to same netns */ 374 ASSERT_EQ(fstat(netns_fds[0], &st), 0); 375 netns_ino = st.st_ino; 376 377 for (i = 1; i < 5; i++) { 378 ASSERT_EQ(fstat(netns_fds[i], &st), 0); 379 ASSERT_EQ(st.st_ino, netns_ino); 380 } 381 382 /* Close some sockets */ 383 for (i = 0; i < 3; i++) { 384 close(socks[i]); 385 } 386 387 /* Remaining netns FDs should still be valid */ 388 for (i = 3; i < 5; i++) { 389 char path[64]; 390 snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fds[i]); 391 int test_fd = open(path, O_RDONLY); 392 ASSERT_GE(test_fd, 0); 393 close(test_fd); 394 } 395 396 /* Cleanup */ 397 for (i = 0; i < 5; i++) { 398 if (i >= 3) 399 close(socks[i]); 400 close(netns_fds[i]); 401 } 402 } 403 404 /* 405 * Test socket keeps netns active after creating process exits. 406 * Verify that as long as the socket FD exists, the namespace remains active. 407 */ 408 TEST(siocgskns_netns_lifecycle) 409 { 410 int sock_fd, netns_fd; 411 int ipc_sockets[2]; 412 int syncpipe[2]; 413 pid_t pid; 414 int status; 415 char sync_byte; 416 struct stat st; 417 ino_t netns_ino; 418 419 EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); 420 421 ASSERT_EQ(pipe(syncpipe), 0); 422 423 pid = fork(); 424 ASSERT_GE(pid, 0); 425 426 if (pid == 0) { 427 /* Child */ 428 close(ipc_sockets[0]); 429 close(syncpipe[1]); 430 431 if (unshare(CLONE_NEWNET) < 0) { 432 close(ipc_sockets[1]); 433 close(syncpipe[0]); 434 exit(1); 435 } 436 437 sock_fd = socket(AF_INET, SOCK_STREAM, 0); 438 if (sock_fd < 0) { 439 close(ipc_sockets[1]); 440 close(syncpipe[0]); 441 exit(1); 442 } 443 444 /* Send socket to parent */ 445 struct msghdr msg = {0}; 446 struct iovec iov = {0}; 447 char buf[1] = {'X'}; 448 char cmsg_buf[CMSG_SPACE(sizeof(int))]; 449 450 iov.iov_base = buf; 451 iov.iov_len = 1; 452 msg.msg_iov = &iov; 453 msg.msg_iovlen = 1; 454 msg.msg_control = cmsg_buf; 455 msg.msg_controllen = sizeof(cmsg_buf); 456 457 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 458 cmsg->cmsg_level = SOL_SOCKET; 459 cmsg->cmsg_type = SCM_RIGHTS; 460 cmsg->cmsg_len = CMSG_LEN(sizeof(int)); 461 memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int)); 462 463 if (sendmsg(ipc_sockets[1], &msg, 0) < 0) { 464 close(sock_fd); 465 close(ipc_sockets[1]); 466 close(syncpipe[0]); 467 exit(1); 468 } 469 470 close(sock_fd); 471 close(ipc_sockets[1]); 472 473 /* Wait for parent signal */ 474 read(syncpipe[0], &sync_byte, 1); 475 close(syncpipe[0]); 476 exit(0); 477 } 478 479 /* Parent */ 480 close(ipc_sockets[1]); 481 close(syncpipe[0]); 482 483 /* Receive socket FD */ 484 struct msghdr msg = {0}; 485 struct iovec iov = {0}; 486 char buf[1]; 487 char cmsg_buf[CMSG_SPACE(sizeof(int))]; 488 489 iov.iov_base = buf; 490 iov.iov_len = 1; 491 msg.msg_iov = &iov; 492 msg.msg_iovlen = 1; 493 msg.msg_control = cmsg_buf; 494 msg.msg_controllen = sizeof(cmsg_buf); 495 496 ssize_t n = recvmsg(ipc_sockets[0], &msg, 0); 497 close(ipc_sockets[0]); 498 ASSERT_EQ(n, 1); 499 500 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 501 ASSERT_NE(cmsg, NULL); 502 memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int)); 503 504 /* Get netns from socket while child is alive */ 505 netns_fd = ioctl(sock_fd, SIOCGSKNS); 506 if (netns_fd < 0) { 507 sync_byte = 'G'; 508 write(syncpipe[1], &sync_byte, 1); 509 close(syncpipe[1]); 510 close(sock_fd); 511 waitpid(pid, NULL, 0); 512 if (errno == ENOTTY || errno == EINVAL) 513 SKIP(return, "SIOCGSKNS not supported"); 514 ASSERT_GE(netns_fd, 0); 515 } 516 ASSERT_EQ(fstat(netns_fd, &st), 0); 517 netns_ino = st.st_ino; 518 519 /* Signal child to exit */ 520 sync_byte = 'G'; 521 write(syncpipe[1], &sync_byte, 1); 522 close(syncpipe[1]); 523 524 waitpid(pid, &status, 0); 525 ASSERT_TRUE(WIFEXITED(status)); 526 527 /* 528 * Socket FD should still keep namespace active even after 529 * the creating process exited. 530 */ 531 int test_fd = ioctl(sock_fd, SIOCGSKNS); 532 ASSERT_GE(test_fd, 0); 533 534 struct stat st_test; 535 ASSERT_EQ(fstat(test_fd, &st_test), 0); 536 ASSERT_EQ(st_test.st_ino, netns_ino); 537 538 close(test_fd); 539 close(netns_fd); 540 541 /* Close socket - namespace should become inactive */ 542 close(sock_fd); 543 } 544 545 /* 546 * Test IPv6 sockets also work with SIOCGSKNS. 547 */ 548 TEST(siocgskns_ipv6) 549 { 550 int sock_fd, netns_fd, current_netns_fd; 551 struct stat st1, st2; 552 553 /* Create an IPv6 TCP socket */ 554 sock_fd = socket(AF_INET6, SOCK_STREAM, 0); 555 ASSERT_GE(sock_fd, 0); 556 557 /* Use SIOCGSKNS */ 558 netns_fd = ioctl(sock_fd, SIOCGSKNS); 559 if (netns_fd < 0) { 560 close(sock_fd); 561 if (errno == ENOTTY || errno == EINVAL) 562 SKIP(return, "SIOCGSKNS not supported"); 563 ASSERT_GE(netns_fd, 0); 564 } 565 566 /* Verify it matches current namespace */ 567 current_netns_fd = open("/proc/self/ns/net", O_RDONLY); 568 ASSERT_GE(current_netns_fd, 0); 569 570 ASSERT_EQ(fstat(netns_fd, &st1), 0); 571 ASSERT_EQ(fstat(current_netns_fd, &st2), 0); 572 ASSERT_EQ(st1.st_ino, st2.st_ino); 573 574 close(sock_fd); 575 close(netns_fd); 576 close(current_netns_fd); 577 } 578 579 TEST_HARNESS_MAIN 580