1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <linux/types.h> 7 #include <sched.h> 8 #include <signal.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <sys/mount.h> 14 #include <sys/wait.h> 15 #include <unistd.h> 16 17 #include "kselftest_harness.h" 18 #include "../pidfd/pidfd.h" 19 20 /* 21 * The kernel computes the minimum allowed pid_max as: 22 * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus()) 23 * Mirror that here so the test values are always valid. 24 * 25 * Note: glibc's get_nprocs_conf() returns the number of *configured* 26 * (present) CPUs, not *possible* CPUs. The kernel uses 27 * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible. 28 * These can differ significantly (e.g. 16 configured vs 128 possible). 29 */ 30 #define RESERVED_PIDS 300 31 #define PIDS_PER_CPU_MIN 8 32 33 /* Count CPUs from a range list like "0-31" or "0-15,32-47". */ 34 static int num_possible_cpus(void) 35 { 36 FILE *f; 37 int count = 0; 38 int lo, hi; 39 40 f = fopen("/sys/devices/system/cpu/possible", "r"); 41 if (!f) 42 return 0; 43 44 while (fscanf(f, "%d", &lo) == 1) { 45 if (fscanf(f, "-%d", &hi) == 1) 46 count += hi - lo + 1; 47 else 48 count++; 49 /* skip comma separator */ 50 fscanf(f, ","); 51 } 52 53 fclose(f); 54 return count; 55 } 56 57 static int pid_min(void) 58 { 59 int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus(); 60 61 return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1); 62 } 63 64 /* 65 * Outer and inner pid_max limits used by the tests. The outer limit is 66 * the more restrictive ancestor; the inner limit is set higher in a 67 * nested namespace but must still be capped by the outer limit. 68 * Both are derived from the kernel's minimum so they are always writable. 69 * 70 * Global so that clone callbacks can access them without parameter plumbing. 71 */ 72 static int outer_limit; 73 static int inner_limit; 74 75 static int write_int_to_fd(int fd, int val) 76 { 77 char buf[12]; 78 int len = snprintf(buf, sizeof(buf), "%d", val); 79 80 return write(fd, buf, len); 81 } 82 83 #define __STACK_SIZE (8 * 1024 * 1024) 84 static pid_t do_clone(int (*fn)(void *), void *arg, int flags) 85 { 86 char *stack; 87 pid_t ret; 88 89 stack = malloc(__STACK_SIZE); 90 if (!stack) 91 return -ENOMEM; 92 93 #ifdef __ia64__ 94 ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg); 95 #else 96 ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg); 97 #endif 98 free(stack); 99 return ret; 100 } 101 102 static int pid_max_cb(void *data) 103 { 104 int fd, ret; 105 pid_t pid; 106 107 ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); 108 if (ret) { 109 fprintf(stderr, "%m - Failed to make rootfs private mount\n"); 110 return -1; 111 } 112 113 umount2("/proc", MNT_DETACH); 114 115 ret = mount("proc", "/proc", "proc", 0, NULL); 116 if (ret) { 117 fprintf(stderr, "%m - Failed to mount proc\n"); 118 return -1; 119 } 120 121 fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); 122 if (fd < 0) { 123 fprintf(stderr, "%m - Failed to open pid_max\n"); 124 return -1; 125 } 126 127 ret = write_int_to_fd(fd, inner_limit); 128 if (ret < 0) { 129 fprintf(stderr, "%m - Failed to write pid_max\n"); 130 return -1; 131 } 132 133 for (int i = 0; i < inner_limit + 1; i++) { 134 pid = fork(); 135 if (pid == 0) 136 exit(EXIT_SUCCESS); 137 wait_for_pid(pid); 138 if (pid > inner_limit) { 139 fprintf(stderr, "Managed to create pid number beyond limit\n"); 140 return -1; 141 } 142 } 143 144 return 0; 145 } 146 147 static int pid_max_nested_inner(void *data) 148 { 149 int fret = -1; 150 pid_t pids[2]; 151 int fd, i, ret; 152 153 ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); 154 if (ret) { 155 fprintf(stderr, "%m - Failed to make rootfs private mount\n"); 156 return fret; 157 } 158 159 umount2("/proc", MNT_DETACH); 160 161 ret = mount("proc", "/proc", "proc", 0, NULL); 162 if (ret) { 163 fprintf(stderr, "%m - Failed to mount proc\n"); 164 return fret; 165 } 166 167 fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); 168 if (fd < 0) { 169 fprintf(stderr, "%m - Failed to open pid_max\n"); 170 return fret; 171 } 172 173 ret = write_int_to_fd(fd, inner_limit); 174 close(fd); 175 if (ret < 0) { 176 fprintf(stderr, "%m - Failed to write pid_max\n"); 177 return fret; 178 } 179 180 pids[0] = fork(); 181 if (pids[0] < 0) { 182 fprintf(stderr, "Failed to create first new process\n"); 183 return fret; 184 } 185 186 if (pids[0] == 0) 187 exit(EXIT_SUCCESS); 188 189 pids[1] = fork(); 190 wait_for_pid(pids[0]); 191 if (pids[1] >= 0) { 192 if (pids[1] == 0) 193 exit(EXIT_SUCCESS); 194 wait_for_pid(pids[1]); 195 196 fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n"); 197 return fret; 198 } 199 200 /* Now make sure that we wrap pids at outer_limit. */ 201 for (i = 0; i < inner_limit + 10; i++) { 202 pid_t pid; 203 204 pid = fork(); 205 if (pid < 0) 206 return fret; 207 208 if (pid == 0) 209 exit(EXIT_SUCCESS); 210 211 wait_for_pid(pid); 212 if (pid >= inner_limit) { 213 fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); 214 return fret; 215 } 216 } 217 218 return 0; 219 } 220 221 static int pid_max_nested_outer(void *data) 222 { 223 int fret = -1, nr_procs = 0; 224 pid_t *pids; 225 int fd, ret; 226 pid_t pid; 227 228 pids = malloc(outer_limit * sizeof(pid_t)); 229 if (!pids) 230 return -1; 231 232 ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); 233 if (ret) { 234 fprintf(stderr, "%m - Failed to make rootfs private mount\n"); 235 goto out; 236 } 237 238 umount2("/proc", MNT_DETACH); 239 240 ret = mount("proc", "/proc", "proc", 0, NULL); 241 if (ret) { 242 fprintf(stderr, "%m - Failed to mount proc\n"); 243 goto out; 244 } 245 246 fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); 247 if (fd < 0) { 248 fprintf(stderr, "%m - Failed to open pid_max\n"); 249 goto out; 250 } 251 252 ret = write_int_to_fd(fd, outer_limit); 253 close(fd); 254 if (ret < 0) { 255 fprintf(stderr, "%m - Failed to write pid_max\n"); 256 goto out; 257 } 258 259 /* 260 * Create (outer_limit - 4) processes. This leaves room for 261 * do_clone() and one more. So creating another process needs 262 * to fail. 263 */ 264 for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) { 265 pid = fork(); 266 if (pid < 0) 267 goto reap; 268 269 if (pid == 0) 270 exit(EXIT_SUCCESS); 271 272 pids[nr_procs] = pid; 273 } 274 275 pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); 276 if (pid < 0) { 277 fprintf(stderr, "%m - Failed to clone nested pidns\n"); 278 goto reap; 279 } 280 281 if (wait_for_pid(pid)) { 282 fprintf(stderr, "%m - Nested pid_max failed\n"); 283 goto reap; 284 } 285 286 fret = 0; 287 288 reap: 289 for (int i = 0; i < nr_procs; i++) 290 wait_for_pid(pids[i]); 291 292 out: 293 free(pids); 294 return fret; 295 } 296 297 static int pid_max_nested_limit_inner(void *data) 298 { 299 int fret = -1, nr_procs = 0; 300 int fd, ret; 301 pid_t pid; 302 pid_t *pids; 303 304 pids = malloc(inner_limit * sizeof(pid_t)); 305 if (!pids) 306 return -1; 307 308 ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); 309 if (ret) { 310 fprintf(stderr, "%m - Failed to make rootfs private mount\n"); 311 goto out; 312 } 313 314 umount2("/proc", MNT_DETACH); 315 316 ret = mount("proc", "/proc", "proc", 0, NULL); 317 if (ret) { 318 fprintf(stderr, "%m - Failed to mount proc\n"); 319 goto out; 320 } 321 322 fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); 323 if (fd < 0) { 324 fprintf(stderr, "%m - Failed to open pid_max\n"); 325 goto out; 326 } 327 328 ret = write_int_to_fd(fd, inner_limit); 329 close(fd); 330 if (ret < 0) { 331 fprintf(stderr, "%m - Failed to write pid_max\n"); 332 goto out; 333 } 334 335 for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) { 336 pid = fork(); 337 if (pid < 0) 338 break; 339 340 if (pid == 0) 341 exit(EXIT_SUCCESS); 342 343 pids[nr_procs] = pid; 344 } 345 346 if (nr_procs >= outer_limit) { 347 fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); 348 goto reap; 349 } 350 351 fret = 0; 352 353 reap: 354 for (int i = 0; i < nr_procs; i++) 355 wait_for_pid(pids[i]); 356 357 out: 358 free(pids); 359 return fret; 360 } 361 362 static int pid_max_nested_limit_outer(void *data) 363 { 364 int fd, ret; 365 pid_t pid; 366 367 ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); 368 if (ret) { 369 fprintf(stderr, "%m - Failed to make rootfs private mount\n"); 370 return -1; 371 } 372 373 umount2("/proc", MNT_DETACH); 374 375 ret = mount("proc", "/proc", "proc", 0, NULL); 376 if (ret) { 377 fprintf(stderr, "%m - Failed to mount proc\n"); 378 return -1; 379 } 380 381 fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); 382 if (fd < 0) { 383 fprintf(stderr, "%m - Failed to open pid_max\n"); 384 return -1; 385 } 386 387 ret = write_int_to_fd(fd, outer_limit); 388 close(fd); 389 if (ret < 0) { 390 fprintf(stderr, "%m - Failed to write pid_max\n"); 391 return -1; 392 } 393 394 pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); 395 if (pid < 0) { 396 fprintf(stderr, "%m - Failed to clone nested pidns\n"); 397 return -1; 398 } 399 400 if (wait_for_pid(pid)) { 401 fprintf(stderr, "%m - Nested pid_max failed\n"); 402 return -1; 403 } 404 405 return 0; 406 } 407 408 FIXTURE(pid_max) { 409 int dummy; 410 }; 411 412 FIXTURE_SETUP(pid_max) 413 { 414 int min = pid_min(); 415 416 outer_limit = min + 100; 417 inner_limit = min + 200; 418 } 419 420 FIXTURE_TEARDOWN(pid_max) 421 { 422 } 423 424 TEST_F(pid_max, simple) 425 { 426 pid_t pid; 427 428 pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); 429 ASSERT_GT(pid, 0); 430 ASSERT_EQ(0, wait_for_pid(pid)); 431 } 432 433 TEST_F(pid_max, nested_limit) 434 { 435 pid_t pid; 436 437 pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); 438 ASSERT_GT(pid, 0); 439 ASSERT_EQ(0, wait_for_pid(pid)); 440 } 441 442 TEST_F(pid_max, nested) 443 { 444 pid_t pid; 445 446 pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); 447 ASSERT_GT(pid, 0); 448 ASSERT_EQ(0, wait_for_pid(pid)); 449 } 450 451 TEST_HARNESS_MAIN 452