1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <limits.h> 7 #include <linux/types.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <syscall.h> 14 #include <sys/prctl.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <sys/socket.h> 18 #include <sys/stat.h> 19 #include <linux/kcmp.h> 20 21 #include "pidfd.h" 22 #include "../clone3/clone3_selftests.h" 23 #include "../kselftest_harness.h" 24 25 enum { 26 PIDFD_NS_USER, 27 PIDFD_NS_MNT, 28 PIDFD_NS_PID, 29 PIDFD_NS_UTS, 30 PIDFD_NS_IPC, 31 PIDFD_NS_NET, 32 PIDFD_NS_CGROUP, 33 PIDFD_NS_PIDCLD, 34 PIDFD_NS_MAX 35 }; 36 37 const struct ns_info { 38 const char *name; 39 int flag; 40 } ns_info[] = { 41 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 42 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 43 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 44 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 45 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 46 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 47 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 48 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 49 }; 50 51 FIXTURE(current_nsset) 52 { 53 pid_t pid; 54 int pidfd; 55 int nsfds[PIDFD_NS_MAX]; 56 57 pid_t child_pid_exited; 58 int child_pidfd_exited; 59 60 pid_t child_pid1; 61 int child_pidfd1; 62 int child_nsfds1[PIDFD_NS_MAX]; 63 64 pid_t child_pid2; 65 int child_pidfd2; 66 int child_nsfds2[PIDFD_NS_MAX]; 67 }; 68 69 static int sys_waitid(int which, pid_t pid, int options) 70 { 71 return syscall(__NR_waitid, which, pid, NULL, options, NULL); 72 } 73 74 pid_t create_child(int *pidfd, unsigned flags) 75 { 76 struct clone_args args = { 77 .flags = CLONE_PIDFD | flags, 78 .exit_signal = SIGCHLD, 79 .pidfd = ptr_to_u64(pidfd), 80 }; 81 82 return sys_clone3(&args, sizeof(struct clone_args)); 83 } 84 85 FIXTURE_SETUP(current_nsset) 86 { 87 int i, proc_fd, ret; 88 89 for (i = 0; i < PIDFD_NS_MAX; i++) { 90 self->nsfds[i] = -EBADF; 91 self->child_nsfds1[i] = -EBADF; 92 self->child_nsfds2[i] = -EBADF; 93 } 94 95 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 96 ASSERT_GE(proc_fd, 0) { 97 TH_LOG("%m - Failed to open /proc/self/ns"); 98 } 99 100 self->pid = getpid(); 101 for (i = 0; i < PIDFD_NS_MAX; i++) { 102 const struct ns_info *info = &ns_info[i]; 103 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 104 if (self->nsfds[i] < 0) { 105 EXPECT_EQ(errno, ENOENT) { 106 TH_LOG("%m - Failed to open %s namespace for process %d", 107 info->name, self->pid); 108 } 109 } 110 } 111 112 self->pidfd = sys_pidfd_open(self->pid, 0); 113 EXPECT_GT(self->pidfd, 0) { 114 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 115 } 116 117 /* Create task that exits right away. */ 118 self->child_pid_exited = create_child(&self->child_pidfd_exited, 119 CLONE_NEWUSER | CLONE_NEWNET); 120 EXPECT_GT(self->child_pid_exited, 0); 121 122 if (self->child_pid_exited == 0) 123 _exit(EXIT_SUCCESS); 124 125 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 126 127 self->pidfd = sys_pidfd_open(self->pid, 0); 128 EXPECT_GE(self->pidfd, 0) { 129 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 130 } 131 132 /* Create tasks that will be stopped. */ 133 self->child_pid1 = create_child(&self->child_pidfd1, 134 CLONE_NEWUSER | CLONE_NEWNS | 135 CLONE_NEWCGROUP | CLONE_NEWIPC | 136 CLONE_NEWUTS | CLONE_NEWPID | 137 CLONE_NEWNET); 138 EXPECT_GE(self->child_pid1, 0); 139 140 if (self->child_pid1 == 0) { 141 pause(); 142 _exit(EXIT_SUCCESS); 143 } 144 145 self->child_pid2 = create_child(&self->child_pidfd2, 146 CLONE_NEWUSER | CLONE_NEWNS | 147 CLONE_NEWCGROUP | CLONE_NEWIPC | 148 CLONE_NEWUTS | CLONE_NEWPID | 149 CLONE_NEWNET); 150 EXPECT_GE(self->child_pid2, 0); 151 152 if (self->child_pid2 == 0) { 153 pause(); 154 _exit(EXIT_SUCCESS); 155 } 156 157 for (i = 0; i < PIDFD_NS_MAX; i++) { 158 char p[100]; 159 160 const struct ns_info *info = &ns_info[i]; 161 162 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 163 if (self->nsfds[i] < 0) { 164 EXPECT_EQ(errno, ENOENT) { 165 TH_LOG("%m - Failed to open %s namespace for process %d", 166 info->name, self->pid); 167 } 168 } 169 170 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 171 self->child_pid1, info->name); 172 EXPECT_GT(ret, 0); 173 EXPECT_LT(ret, sizeof(p)); 174 175 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 176 if (self->child_nsfds1[i] < 0) { 177 EXPECT_EQ(errno, ENOENT) { 178 TH_LOG("%m - Failed to open %s namespace for process %d", 179 info->name, self->child_pid1); 180 } 181 } 182 183 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 184 self->child_pid2, info->name); 185 EXPECT_GT(ret, 0); 186 EXPECT_LT(ret, sizeof(p)); 187 188 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 189 if (self->child_nsfds2[i] < 0) { 190 EXPECT_EQ(errno, ENOENT) { 191 TH_LOG("%m - Failed to open %s namespace for process %d", 192 info->name, self->child_pid1); 193 } 194 } 195 } 196 197 close(proc_fd); 198 } 199 200 FIXTURE_TEARDOWN(current_nsset) 201 { 202 int i; 203 204 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 205 SIGKILL, NULL, 0), 0); 206 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 207 SIGKILL, NULL, 0), 0); 208 209 for (i = 0; i < PIDFD_NS_MAX; i++) { 210 if (self->nsfds[i] >= 0) 211 close(self->nsfds[i]); 212 if (self->child_nsfds1[i] >= 0) 213 close(self->child_nsfds1[i]); 214 if (self->child_nsfds2[i] >= 0) 215 close(self->child_nsfds2[i]); 216 } 217 218 if (self->child_pidfd1 >= 0) 219 EXPECT_EQ(0, close(self->child_pidfd1)); 220 if (self->child_pidfd2 >= 0) 221 EXPECT_EQ(0, close(self->child_pidfd2)); 222 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 223 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 224 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 225 } 226 227 static int preserve_ns(const int pid, const char *ns) 228 { 229 int ret; 230 char path[50]; 231 232 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 233 if (ret < 0 || (size_t)ret >= sizeof(path)) 234 return -EIO; 235 236 return open(path, O_RDONLY | O_CLOEXEC); 237 } 238 239 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 240 { 241 int ns_fd2 = -EBADF; 242 int ret = -1; 243 struct stat ns_st1, ns_st2; 244 245 ret = fstat(ns_fd1, &ns_st1); 246 if (ret < 0) 247 return -1; 248 249 ns_fd2 = preserve_ns(pid2, ns); 250 if (ns_fd2 < 0) 251 return -1; 252 253 ret = fstat(ns_fd2, &ns_st2); 254 close(ns_fd2); 255 if (ret < 0) 256 return -1; 257 258 /* processes are in the same namespace */ 259 if ((ns_st1.st_dev == ns_st2.st_dev) && 260 (ns_st1.st_ino == ns_st2.st_ino)) 261 return 1; 262 263 /* processes are in different namespaces */ 264 return 0; 265 } 266 267 /* Test that we can't pass garbage to the kernel. */ 268 TEST_F(current_nsset, invalid_flags) 269 { 270 ASSERT_NE(setns(self->pidfd, 0), 0); 271 EXPECT_EQ(errno, EINVAL); 272 273 ASSERT_NE(setns(self->pidfd, -1), 0); 274 EXPECT_EQ(errno, EINVAL); 275 276 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 277 EXPECT_EQ(errno, EINVAL); 278 279 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 280 EXPECT_EQ(errno, EINVAL); 281 } 282 283 /* Test that we can't attach to a task that has already exited. */ 284 TEST_F(current_nsset, pidfd_exited_child) 285 { 286 int i; 287 pid_t pid; 288 289 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 290 0); 291 EXPECT_EQ(errno, ESRCH); 292 293 pid = getpid(); 294 for (i = 0; i < PIDFD_NS_MAX; i++) { 295 const struct ns_info *info = &ns_info[i]; 296 /* Verify that we haven't changed any namespaces. */ 297 if (self->nsfds[i] >= 0) 298 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 299 } 300 } 301 302 TEST_F(current_nsset, pidfd_incremental_setns) 303 { 304 int i; 305 pid_t pid; 306 307 pid = getpid(); 308 for (i = 0; i < PIDFD_NS_MAX; i++) { 309 const struct ns_info *info = &ns_info[i]; 310 int nsfd; 311 312 if (self->child_nsfds1[i] < 0) 313 continue; 314 315 if (info->flag) { 316 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 317 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 318 info->name, self->child_pid1, 319 self->child_pidfd1); 320 } 321 } 322 323 /* Verify that we have changed to the correct namespaces. */ 324 if (info->flag == CLONE_NEWPID) 325 nsfd = self->nsfds[i]; 326 else 327 nsfd = self->child_nsfds1[i]; 328 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 329 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 330 info->name, self->child_pid1, 331 self->child_pidfd1); 332 } 333 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 334 info->name, self->child_pid1, self->child_pidfd1); 335 } 336 } 337 338 TEST_F(current_nsset, nsfd_incremental_setns) 339 { 340 int i; 341 pid_t pid; 342 343 pid = getpid(); 344 for (i = 0; i < PIDFD_NS_MAX; i++) { 345 const struct ns_info *info = &ns_info[i]; 346 int nsfd; 347 348 if (self->child_nsfds1[i] < 0) 349 continue; 350 351 if (info->flag) { 352 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 353 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 354 info->name, self->child_pid1, 355 self->child_nsfds1[i]); 356 } 357 } 358 359 /* Verify that we have changed to the correct namespaces. */ 360 if (info->flag == CLONE_NEWPID) 361 nsfd = self->nsfds[i]; 362 else 363 nsfd = self->child_nsfds1[i]; 364 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 365 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 366 info->name, self->child_pid1, 367 self->child_nsfds1[i]); 368 } 369 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 370 info->name, self->child_pid1, self->child_nsfds1[i]); 371 } 372 } 373 374 TEST_F(current_nsset, pidfd_one_shot_setns) 375 { 376 unsigned flags = 0; 377 int i; 378 pid_t pid; 379 380 for (i = 0; i < PIDFD_NS_MAX; i++) { 381 const struct ns_info *info = &ns_info[i]; 382 383 if (self->child_nsfds1[i] < 0) 384 continue; 385 386 flags |= info->flag; 387 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 388 info->name, self->child_pid1); 389 } 390 391 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 392 TH_LOG("%m - Failed to setns to namespaces of %d", 393 self->child_pid1); 394 } 395 396 pid = getpid(); 397 for (i = 0; i < PIDFD_NS_MAX; i++) { 398 const struct ns_info *info = &ns_info[i]; 399 int nsfd; 400 401 if (self->child_nsfds1[i] < 0) 402 continue; 403 404 /* Verify that we have changed to the correct namespaces. */ 405 if (info->flag == CLONE_NEWPID) 406 nsfd = self->nsfds[i]; 407 else 408 nsfd = self->child_nsfds1[i]; 409 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 410 TH_LOG("setns failed to place us correctly into %s namespace of %d", 411 info->name, self->child_pid1); 412 } 413 TH_LOG("Managed to correctly setns to %s namespace of %d", 414 info->name, self->child_pid1); 415 } 416 } 417 418 TEST_F(current_nsset, no_foul_play) 419 { 420 unsigned flags = 0; 421 int i; 422 423 for (i = 0; i < PIDFD_NS_MAX; i++) { 424 const struct ns_info *info = &ns_info[i]; 425 426 if (self->child_nsfds1[i] < 0) 427 continue; 428 429 flags |= info->flag; 430 if (info->flag) /* No use logging pid_for_children. */ 431 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 432 info->name, self->child_pid1); 433 } 434 435 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 436 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 437 self->child_pid1, self->child_pidfd1); 438 } 439 440 /* 441 * Can't setns to a user namespace outside of our hierarchy since we 442 * don't have caps in there and didn't create it. That means that under 443 * no circumstances should we be able to setns to any of the other 444 * ones since they aren't owned by our user namespace. 445 */ 446 for (i = 0; i < PIDFD_NS_MAX; i++) { 447 const struct ns_info *info = &ns_info[i]; 448 449 if (self->child_nsfds2[i] < 0 || !info->flag) 450 continue; 451 452 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 453 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 454 info->name, self->child_pid2, 455 self->child_pidfd2); 456 } 457 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 458 info->name, self->child_pid2, 459 self->child_pidfd2); 460 461 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 462 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 463 info->name, self->child_pid2, 464 self->child_nsfds2[i]); 465 } 466 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 467 info->name, self->child_pid2, 468 self->child_nsfds2[i]); 469 } 470 } 471 472 TEST(setns_einval) 473 { 474 int fd; 475 476 fd = sys_memfd_create("rostock", 0); 477 EXPECT_GT(fd, 0); 478 479 ASSERT_NE(setns(fd, 0), 0); 480 EXPECT_EQ(errno, EINVAL); 481 close(fd); 482 } 483 484 TEST_HARNESS_MAIN 485