xref: /linux/tools/testing/selftests/pid_namespace/pid_max.c (revision d324c5416a63d7b828e6d6406815cde7d4ff1a7d)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/types.h>
7 #include <sched.h>
8 #include <signal.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <sys/mount.h>
14 #include <sys/wait.h>
15 #include <unistd.h>
16 
17 #include "kselftest_harness.h"
18 #include "../pidfd/pidfd.h"
19 
20 /*
21  * The kernel computes the minimum allowed pid_max as:
22  *   max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
23  * Mirror that here so the test values are always valid.
24  *
25  * Note: glibc's get_nprocs_conf() returns the number of *configured*
26  * (present) CPUs, not *possible* CPUs.  The kernel uses
27  * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
28  * These can differ significantly (e.g. 16 configured vs 128 possible).
29  */
30 #define RESERVED_PIDS		300
31 #define PIDS_PER_CPU_MIN	8
32 
33 /* Count CPUs from a range list like "0-31" or "0-15,32-47". */
34 static int num_possible_cpus(void)
35 {
36 	FILE *f;
37 	int count = 0;
38 	int lo, hi;
39 
40 	f = fopen("/sys/devices/system/cpu/possible", "r");
41 	if (!f)
42 		return 0;
43 
44 	while (fscanf(f, "%d", &lo) == 1) {
45 		if (fscanf(f, "-%d", &hi) == 1)
46 			count += hi - lo + 1;
47 		else
48 			count++;
49 		/* skip comma separator */
50 		fscanf(f, ",");
51 	}
52 
53 	fclose(f);
54 	return count;
55 }
56 
57 static int pid_min(void)
58 {
59 	int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
60 
61 	return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
62 }
63 
64 /*
65  * Outer and inner pid_max limits used by the tests.  The outer limit is
66  * the more restrictive ancestor; the inner limit is set higher in a
67  * nested namespace but must still be capped by the outer limit.
68  * Both are derived from the kernel's minimum so they are always writable.
69  *
70  * Global so that clone callbacks can access them without parameter plumbing.
71  */
72 static int outer_limit;
73 static int inner_limit;
74 
75 static int write_int_to_fd(int fd, int val)
76 {
77 	char buf[12];
78 	int len = snprintf(buf, sizeof(buf), "%d", val);
79 
80 	return write(fd, buf, len);
81 }
82 
83 #define __STACK_SIZE (8 * 1024 * 1024)
84 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
85 {
86 	char *stack;
87 	pid_t ret;
88 
89 	stack = malloc(__STACK_SIZE);
90 	if (!stack)
91 		return -ENOMEM;
92 
93 #ifdef __ia64__
94 	ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg);
95 #else
96 	ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg);
97 #endif
98 	free(stack);
99 	return ret;
100 }
101 
102 static int pid_max_cb(void *data)
103 {
104 	int fd, ret;
105 	pid_t pid;
106 
107 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
108 	if (ret) {
109 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
110 		return -1;
111 	}
112 
113 	umount2("/proc", MNT_DETACH);
114 
115 	ret = mount("proc", "/proc", "proc", 0, NULL);
116 	if (ret) {
117 		fprintf(stderr, "%m - Failed to mount proc\n");
118 		return -1;
119 	}
120 
121 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
122 	if (fd < 0) {
123 		fprintf(stderr, "%m - Failed to open pid_max\n");
124 		return -1;
125 	}
126 
127 	ret = write_int_to_fd(fd, inner_limit);
128 	if (ret < 0) {
129 		fprintf(stderr, "%m - Failed to write pid_max\n");
130 		return -1;
131 	}
132 
133 	for (int i = 0; i < inner_limit + 1; i++) {
134 		pid = fork();
135 		if (pid == 0)
136 			exit(EXIT_SUCCESS);
137 		wait_for_pid(pid);
138 		if (pid > inner_limit) {
139 			fprintf(stderr, "Managed to create pid number beyond limit\n");
140 			return -1;
141 		}
142 	}
143 
144 	return 0;
145 }
146 
147 static int pid_max_nested_inner(void *data)
148 {
149 	int fret = -1;
150 	pid_t pids[2];
151 	int fd, i, ret;
152 
153 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
154 	if (ret) {
155 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
156 		return fret;
157 	}
158 
159 	umount2("/proc", MNT_DETACH);
160 
161 	ret = mount("proc", "/proc", "proc", 0, NULL);
162 	if (ret) {
163 		fprintf(stderr, "%m - Failed to mount proc\n");
164 		return fret;
165 	}
166 
167 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
168 	if (fd < 0) {
169 		fprintf(stderr, "%m - Failed to open pid_max\n");
170 		return fret;
171 	}
172 
173 	ret = write_int_to_fd(fd, inner_limit);
174 	close(fd);
175 	if (ret < 0) {
176 		fprintf(stderr, "%m - Failed to write pid_max\n");
177 		return fret;
178 	}
179 
180 	pids[0] = fork();
181 	if (pids[0] < 0) {
182 		fprintf(stderr, "Failed to create first new process\n");
183 		return fret;
184 	}
185 
186 	if (pids[0] == 0)
187 		exit(EXIT_SUCCESS);
188 
189 	pids[1] = fork();
190 	wait_for_pid(pids[0]);
191 	if (pids[1] >= 0) {
192 		if (pids[1] == 0)
193 			exit(EXIT_SUCCESS);
194 		wait_for_pid(pids[1]);
195 
196 		fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n");
197 		return fret;
198 	}
199 
200 	/* Now make sure that we wrap pids at outer_limit. */
201 	for (i = 0; i < inner_limit + 10; i++) {
202 		pid_t pid;
203 
204 		pid = fork();
205 		if (pid < 0)
206 			return fret;
207 
208 		if (pid == 0)
209 			exit(EXIT_SUCCESS);
210 
211 		wait_for_pid(pid);
212 		if (pid >= inner_limit) {
213 			fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
214 			return fret;
215 		}
216 	}
217 
218 	return 0;
219 }
220 
221 static int pid_max_nested_outer(void *data)
222 {
223 	int fret = -1, nr_procs = 0;
224 	pid_t *pids;
225 	int fd, ret;
226 	pid_t pid;
227 
228 	pids = malloc(outer_limit * sizeof(pid_t));
229 	if (!pids)
230 		return -1;
231 
232 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
233 	if (ret) {
234 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
235 		goto out;
236 	}
237 
238 	umount2("/proc", MNT_DETACH);
239 
240 	ret = mount("proc", "/proc", "proc", 0, NULL);
241 	if (ret) {
242 		fprintf(stderr, "%m - Failed to mount proc\n");
243 		goto out;
244 	}
245 
246 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
247 	if (fd < 0) {
248 		fprintf(stderr, "%m - Failed to open pid_max\n");
249 		goto out;
250 	}
251 
252 	ret = write_int_to_fd(fd, outer_limit);
253 	close(fd);
254 	if (ret < 0) {
255 		fprintf(stderr, "%m - Failed to write pid_max\n");
256 		goto out;
257 	}
258 
259 	/*
260 	 * Create (outer_limit - 4) processes. This leaves room for
261 	 * do_clone() and one more. So creating another process needs
262 	 * to fail.
263 	 */
264 	for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) {
265 		pid = fork();
266 		if (pid < 0)
267 			goto reap;
268 
269 		if (pid == 0)
270 			exit(EXIT_SUCCESS);
271 
272 		pids[nr_procs] = pid;
273 	}
274 
275 	pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
276 	if (pid < 0) {
277 		fprintf(stderr, "%m - Failed to clone nested pidns\n");
278 		goto reap;
279 	}
280 
281 	if (wait_for_pid(pid)) {
282 		fprintf(stderr, "%m - Nested pid_max failed\n");
283 		goto reap;
284 	}
285 
286 	fret = 0;
287 
288 reap:
289 	for (int i = 0; i < nr_procs; i++)
290 		wait_for_pid(pids[i]);
291 
292 out:
293 	free(pids);
294 	return fret;
295 }
296 
297 static int pid_max_nested_limit_inner(void *data)
298 {
299 	int fret = -1, nr_procs = 0;
300 	int fd, ret;
301 	pid_t pid;
302 	pid_t *pids;
303 
304 	pids = malloc(inner_limit * sizeof(pid_t));
305 	if (!pids)
306 		return -1;
307 
308 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
309 	if (ret) {
310 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
311 		goto out;
312 	}
313 
314 	umount2("/proc", MNT_DETACH);
315 
316 	ret = mount("proc", "/proc", "proc", 0, NULL);
317 	if (ret) {
318 		fprintf(stderr, "%m - Failed to mount proc\n");
319 		goto out;
320 	}
321 
322 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
323 	if (fd < 0) {
324 		fprintf(stderr, "%m - Failed to open pid_max\n");
325 		goto out;
326 	}
327 
328 	ret = write_int_to_fd(fd, inner_limit);
329 	close(fd);
330 	if (ret < 0) {
331 		fprintf(stderr, "%m - Failed to write pid_max\n");
332 		goto out;
333 	}
334 
335 	for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) {
336 		pid = fork();
337 		if (pid < 0)
338 			break;
339 
340 		if (pid == 0)
341 			exit(EXIT_SUCCESS);
342 
343 		pids[nr_procs] = pid;
344 	}
345 
346 	if (nr_procs >= outer_limit) {
347 		fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
348 		goto reap;
349 	}
350 
351 	fret = 0;
352 
353 reap:
354 	for (int i = 0; i < nr_procs; i++)
355 		wait_for_pid(pids[i]);
356 
357 out:
358 	free(pids);
359 	return fret;
360 }
361 
362 static int pid_max_nested_limit_outer(void *data)
363 {
364 	int fd, ret;
365 	pid_t pid;
366 
367 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
368 	if (ret) {
369 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
370 		return -1;
371 	}
372 
373 	umount2("/proc", MNT_DETACH);
374 
375 	ret = mount("proc", "/proc", "proc", 0, NULL);
376 	if (ret) {
377 		fprintf(stderr, "%m - Failed to mount proc\n");
378 		return -1;
379 	}
380 
381 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
382 	if (fd < 0) {
383 		fprintf(stderr, "%m - Failed to open pid_max\n");
384 		return -1;
385 	}
386 
387 	ret = write_int_to_fd(fd, outer_limit);
388 	close(fd);
389 	if (ret < 0) {
390 		fprintf(stderr, "%m - Failed to write pid_max\n");
391 		return -1;
392 	}
393 
394 	pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
395 	if (pid < 0) {
396 		fprintf(stderr, "%m - Failed to clone nested pidns\n");
397 		return -1;
398 	}
399 
400 	if (wait_for_pid(pid)) {
401 		fprintf(stderr, "%m - Nested pid_max failed\n");
402 		return -1;
403 	}
404 
405 	return 0;
406 }
407 
408 FIXTURE(pid_max) {
409 	int dummy;
410 };
411 
412 FIXTURE_SETUP(pid_max)
413 {
414 	int min = pid_min();
415 
416 	outer_limit = min + 100;
417 	inner_limit = min + 200;
418 }
419 
420 FIXTURE_TEARDOWN(pid_max)
421 {
422 }
423 
424 TEST_F(pid_max, simple)
425 {
426 	pid_t pid;
427 
428 	pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
429 	ASSERT_GT(pid, 0);
430 	ASSERT_EQ(0, wait_for_pid(pid));
431 }
432 
433 TEST_F(pid_max, nested_limit)
434 {
435 	pid_t pid;
436 
437 	pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
438 	ASSERT_GT(pid, 0);
439 	ASSERT_EQ(0, wait_for_pid(pid));
440 }
441 
442 TEST_F(pid_max, nested)
443 {
444 	pid_t pid;
445 
446 	pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
447 	ASSERT_GT(pid, 0);
448 	ASSERT_EQ(0, wait_for_pid(pid));
449 }
450 
451 TEST_HARNESS_MAIN
452