1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <limits.h> 7 #include <linux/types.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <syscall.h> 14 #include <sys/prctl.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <sys/socket.h> 18 #include <sys/stat.h> 19 20 #include "pidfd.h" 21 #include "../clone3/clone3_selftests.h" 22 #include "../kselftest_harness.h" 23 24 enum { 25 PIDFD_NS_USER, 26 PIDFD_NS_MNT, 27 PIDFD_NS_PID, 28 PIDFD_NS_UTS, 29 PIDFD_NS_IPC, 30 PIDFD_NS_NET, 31 PIDFD_NS_CGROUP, 32 PIDFD_NS_PIDCLD, 33 PIDFD_NS_TIME, 34 PIDFD_NS_MAX 35 }; 36 37 const struct ns_info { 38 const char *name; 39 int flag; 40 } ns_info[] = { 41 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 42 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 43 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 44 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 45 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 46 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 47 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 48 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 49 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, 50 }; 51 52 FIXTURE(current_nsset) 53 { 54 pid_t pid; 55 int pidfd; 56 int nsfds[PIDFD_NS_MAX]; 57 58 pid_t child_pid_exited; 59 int child_pidfd_exited; 60 61 pid_t child_pid1; 62 int child_pidfd1; 63 int child_nsfds1[PIDFD_NS_MAX]; 64 65 pid_t child_pid2; 66 int child_pidfd2; 67 int child_nsfds2[PIDFD_NS_MAX]; 68 }; 69 70 static int sys_waitid(int which, pid_t pid, int options) 71 { 72 return syscall(__NR_waitid, which, pid, NULL, options, NULL); 73 } 74 75 pid_t create_child(int *pidfd, unsigned flags) 76 { 77 struct __clone_args args = { 78 .flags = CLONE_PIDFD | flags, 79 .exit_signal = SIGCHLD, 80 .pidfd = ptr_to_u64(pidfd), 81 }; 82 83 return sys_clone3(&args, sizeof(struct clone_args)); 84 } 85 86 static bool switch_timens(void) 87 { 88 int fd, ret; 89 90 if (unshare(CLONE_NEWTIME)) 91 return false; 92 93 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC); 94 if (fd < 0) 95 return false; 96 97 ret = setns(fd, CLONE_NEWTIME); 98 close(fd); 99 return ret == 0; 100 } 101 102 static ssize_t read_nointr(int fd, void *buf, size_t count) 103 { 104 ssize_t ret; 105 106 do { 107 ret = read(fd, buf, count); 108 } while (ret < 0 && errno == EINTR); 109 110 return ret; 111 } 112 113 static ssize_t write_nointr(int fd, const void *buf, size_t count) 114 { 115 ssize_t ret; 116 117 do { 118 ret = write(fd, buf, count); 119 } while (ret < 0 && errno == EINTR); 120 121 return ret; 122 } 123 124 FIXTURE_SETUP(current_nsset) 125 { 126 int i, proc_fd, ret; 127 int ipc_sockets[2]; 128 char c; 129 130 for (i = 0; i < PIDFD_NS_MAX; i++) { 131 self->nsfds[i] = -EBADF; 132 self->child_nsfds1[i] = -EBADF; 133 self->child_nsfds2[i] = -EBADF; 134 } 135 136 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 137 ASSERT_GE(proc_fd, 0) { 138 TH_LOG("%m - Failed to open /proc/self/ns"); 139 } 140 141 self->pid = getpid(); 142 for (i = 0; i < PIDFD_NS_MAX; i++) { 143 const struct ns_info *info = &ns_info[i]; 144 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 145 if (self->nsfds[i] < 0) { 146 EXPECT_EQ(errno, ENOENT) { 147 TH_LOG("%m - Failed to open %s namespace for process %d", 148 info->name, self->pid); 149 } 150 } 151 } 152 153 self->pidfd = sys_pidfd_open(self->pid, 0); 154 EXPECT_GT(self->pidfd, 0) { 155 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 156 } 157 158 /* Create task that exits right away. */ 159 self->child_pid_exited = create_child(&self->child_pidfd_exited, 160 CLONE_NEWUSER | CLONE_NEWNET); 161 EXPECT_GE(self->child_pid_exited, 0); 162 163 if (self->child_pid_exited == 0) 164 _exit(EXIT_SUCCESS); 165 166 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 167 168 self->pidfd = sys_pidfd_open(self->pid, 0); 169 EXPECT_GE(self->pidfd, 0) { 170 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 171 } 172 173 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 174 EXPECT_EQ(ret, 0); 175 176 /* Create tasks that will be stopped. */ 177 self->child_pid1 = create_child(&self->child_pidfd1, 178 CLONE_NEWUSER | CLONE_NEWNS | 179 CLONE_NEWCGROUP | CLONE_NEWIPC | 180 CLONE_NEWUTS | CLONE_NEWPID | 181 CLONE_NEWNET); 182 EXPECT_GE(self->child_pid1, 0); 183 184 if (self->child_pid1 == 0) { 185 close(ipc_sockets[0]); 186 187 if (!switch_timens()) 188 _exit(EXIT_FAILURE); 189 190 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 191 _exit(EXIT_FAILURE); 192 193 close(ipc_sockets[1]); 194 195 pause(); 196 _exit(EXIT_SUCCESS); 197 } 198 199 close(ipc_sockets[1]); 200 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 201 close(ipc_sockets[0]); 202 203 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 204 EXPECT_EQ(ret, 0); 205 206 self->child_pid2 = create_child(&self->child_pidfd2, 207 CLONE_NEWUSER | CLONE_NEWNS | 208 CLONE_NEWCGROUP | CLONE_NEWIPC | 209 CLONE_NEWUTS | CLONE_NEWPID | 210 CLONE_NEWNET); 211 EXPECT_GE(self->child_pid2, 0); 212 213 if (self->child_pid2 == 0) { 214 close(ipc_sockets[0]); 215 216 if (!switch_timens()) 217 _exit(EXIT_FAILURE); 218 219 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 220 _exit(EXIT_FAILURE); 221 222 close(ipc_sockets[1]); 223 224 pause(); 225 _exit(EXIT_SUCCESS); 226 } 227 228 close(ipc_sockets[1]); 229 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 230 close(ipc_sockets[0]); 231 232 for (i = 0; i < PIDFD_NS_MAX; i++) { 233 char p[100]; 234 235 const struct ns_info *info = &ns_info[i]; 236 237 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 238 if (self->nsfds[i] < 0) { 239 EXPECT_EQ(errno, ENOENT) { 240 TH_LOG("%m - Failed to open %s namespace for process %d", 241 info->name, self->pid); 242 } 243 } 244 245 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 246 self->child_pid1, info->name); 247 EXPECT_GT(ret, 0); 248 EXPECT_LT(ret, sizeof(p)); 249 250 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 251 if (self->child_nsfds1[i] < 0) { 252 EXPECT_EQ(errno, ENOENT) { 253 TH_LOG("%m - Failed to open %s namespace for process %d", 254 info->name, self->child_pid1); 255 } 256 } 257 258 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 259 self->child_pid2, info->name); 260 EXPECT_GT(ret, 0); 261 EXPECT_LT(ret, sizeof(p)); 262 263 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 264 if (self->child_nsfds2[i] < 0) { 265 EXPECT_EQ(errno, ENOENT) { 266 TH_LOG("%m - Failed to open %s namespace for process %d", 267 info->name, self->child_pid1); 268 } 269 } 270 } 271 272 close(proc_fd); 273 } 274 275 FIXTURE_TEARDOWN(current_nsset) 276 { 277 int i; 278 279 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 280 SIGKILL, NULL, 0), 0); 281 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 282 SIGKILL, NULL, 0), 0); 283 284 for (i = 0; i < PIDFD_NS_MAX; i++) { 285 if (self->nsfds[i] >= 0) 286 close(self->nsfds[i]); 287 if (self->child_nsfds1[i] >= 0) 288 close(self->child_nsfds1[i]); 289 if (self->child_nsfds2[i] >= 0) 290 close(self->child_nsfds2[i]); 291 } 292 293 if (self->child_pidfd1 >= 0) 294 EXPECT_EQ(0, close(self->child_pidfd1)); 295 if (self->child_pidfd2 >= 0) 296 EXPECT_EQ(0, close(self->child_pidfd2)); 297 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 298 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 300 } 301 302 static int preserve_ns(const int pid, const char *ns) 303 { 304 int ret; 305 char path[50]; 306 307 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 308 if (ret < 0 || (size_t)ret >= sizeof(path)) 309 return -EIO; 310 311 return open(path, O_RDONLY | O_CLOEXEC); 312 } 313 314 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 315 { 316 int ns_fd2 = -EBADF; 317 int ret = -1; 318 struct stat ns_st1, ns_st2; 319 320 ret = fstat(ns_fd1, &ns_st1); 321 if (ret < 0) 322 return -1; 323 324 ns_fd2 = preserve_ns(pid2, ns); 325 if (ns_fd2 < 0) 326 return -1; 327 328 ret = fstat(ns_fd2, &ns_st2); 329 close(ns_fd2); 330 if (ret < 0) 331 return -1; 332 333 /* processes are in the same namespace */ 334 if ((ns_st1.st_dev == ns_st2.st_dev) && 335 (ns_st1.st_ino == ns_st2.st_ino)) 336 return 1; 337 338 /* processes are in different namespaces */ 339 return 0; 340 } 341 342 /* Test that we can't pass garbage to the kernel. */ 343 TEST_F(current_nsset, invalid_flags) 344 { 345 ASSERT_NE(setns(self->pidfd, 0), 0); 346 EXPECT_EQ(errno, EINVAL); 347 348 ASSERT_NE(setns(self->pidfd, -1), 0); 349 EXPECT_EQ(errno, EINVAL); 350 351 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 352 EXPECT_EQ(errno, EINVAL); 353 354 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 355 EXPECT_EQ(errno, EINVAL); 356 } 357 358 /* Test that we can't attach to a task that has already exited. */ 359 TEST_F(current_nsset, pidfd_exited_child) 360 { 361 int i; 362 pid_t pid; 363 364 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 365 0); 366 EXPECT_EQ(errno, ESRCH); 367 368 pid = getpid(); 369 for (i = 0; i < PIDFD_NS_MAX; i++) { 370 const struct ns_info *info = &ns_info[i]; 371 /* Verify that we haven't changed any namespaces. */ 372 if (self->nsfds[i] >= 0) 373 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 374 } 375 } 376 377 TEST_F(current_nsset, pidfd_incremental_setns) 378 { 379 int i; 380 pid_t pid; 381 382 pid = getpid(); 383 for (i = 0; i < PIDFD_NS_MAX; i++) { 384 const struct ns_info *info = &ns_info[i]; 385 int nsfd; 386 387 if (self->child_nsfds1[i] < 0) 388 continue; 389 390 if (info->flag) { 391 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 392 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 393 info->name, self->child_pid1, 394 self->child_pidfd1); 395 } 396 } 397 398 /* Verify that we have changed to the correct namespaces. */ 399 if (info->flag == CLONE_NEWPID) 400 nsfd = self->nsfds[i]; 401 else 402 nsfd = self->child_nsfds1[i]; 403 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 404 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 405 info->name, self->child_pid1, 406 self->child_pidfd1); 407 } 408 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 409 info->name, self->child_pid1, self->child_pidfd1); 410 } 411 } 412 413 TEST_F(current_nsset, nsfd_incremental_setns) 414 { 415 int i; 416 pid_t pid; 417 418 pid = getpid(); 419 for (i = 0; i < PIDFD_NS_MAX; i++) { 420 const struct ns_info *info = &ns_info[i]; 421 int nsfd; 422 423 if (self->child_nsfds1[i] < 0) 424 continue; 425 426 if (info->flag) { 427 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 428 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 429 info->name, self->child_pid1, 430 self->child_nsfds1[i]); 431 } 432 } 433 434 /* Verify that we have changed to the correct namespaces. */ 435 if (info->flag == CLONE_NEWPID) 436 nsfd = self->nsfds[i]; 437 else 438 nsfd = self->child_nsfds1[i]; 439 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 440 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 441 info->name, self->child_pid1, 442 self->child_nsfds1[i]); 443 } 444 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 445 info->name, self->child_pid1, self->child_nsfds1[i]); 446 } 447 } 448 449 TEST_F(current_nsset, pidfd_one_shot_setns) 450 { 451 unsigned flags = 0; 452 int i; 453 pid_t pid; 454 455 for (i = 0; i < PIDFD_NS_MAX; i++) { 456 const struct ns_info *info = &ns_info[i]; 457 458 if (self->child_nsfds1[i] < 0) 459 continue; 460 461 flags |= info->flag; 462 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 463 info->name, self->child_pid1); 464 } 465 466 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 467 TH_LOG("%m - Failed to setns to namespaces of %d", 468 self->child_pid1); 469 } 470 471 pid = getpid(); 472 for (i = 0; i < PIDFD_NS_MAX; i++) { 473 const struct ns_info *info = &ns_info[i]; 474 int nsfd; 475 476 if (self->child_nsfds1[i] < 0) 477 continue; 478 479 /* Verify that we have changed to the correct namespaces. */ 480 if (info->flag == CLONE_NEWPID) 481 nsfd = self->nsfds[i]; 482 else 483 nsfd = self->child_nsfds1[i]; 484 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 485 TH_LOG("setns failed to place us correctly into %s namespace of %d", 486 info->name, self->child_pid1); 487 } 488 TH_LOG("Managed to correctly setns to %s namespace of %d", 489 info->name, self->child_pid1); 490 } 491 } 492 493 TEST_F(current_nsset, no_foul_play) 494 { 495 unsigned flags = 0; 496 int i; 497 498 for (i = 0; i < PIDFD_NS_MAX; i++) { 499 const struct ns_info *info = &ns_info[i]; 500 501 if (self->child_nsfds1[i] < 0) 502 continue; 503 504 flags |= info->flag; 505 if (info->flag) /* No use logging pid_for_children. */ 506 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 507 info->name, self->child_pid1); 508 } 509 510 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 511 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 512 self->child_pid1, self->child_pidfd1); 513 } 514 515 /* 516 * Can't setns to a user namespace outside of our hierarchy since we 517 * don't have caps in there and didn't create it. That means that under 518 * no circumstances should we be able to setns to any of the other 519 * ones since they aren't owned by our user namespace. 520 */ 521 for (i = 0; i < PIDFD_NS_MAX; i++) { 522 const struct ns_info *info = &ns_info[i]; 523 524 if (self->child_nsfds2[i] < 0 || !info->flag) 525 continue; 526 527 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 528 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 529 info->name, self->child_pid2, 530 self->child_pidfd2); 531 } 532 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 533 info->name, self->child_pid2, 534 self->child_pidfd2); 535 536 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 537 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 538 info->name, self->child_pid2, 539 self->child_nsfds2[i]); 540 } 541 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 542 info->name, self->child_pid2, 543 self->child_nsfds2[i]); 544 } 545 } 546 547 TEST(setns_einval) 548 { 549 int fd; 550 551 fd = sys_memfd_create("rostock", 0); 552 EXPECT_GT(fd, 0); 553 554 ASSERT_NE(setns(fd, 0), 0); 555 EXPECT_EQ(errno, EINVAL); 556 close(fd); 557 } 558 559 TEST_HARNESS_MAIN 560