1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/ioctl.h>
11 #include <sys/socket.h>
12 #include <sys/stat.h>
13 #include <sys/syscall.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include <linux/nsfs.h>
18 #include "../kselftest_harness.h"
19 #include "../filesystems/utils.h"
20 #include "wrappers.h"
21
22 /*
23 * Stress tests for namespace active reference counting.
24 *
25 * These tests validate that the active reference counting system can handle
26 * high load scenarios including rapid namespace creation/destruction, large
27 * numbers of concurrent namespaces, and various edge cases under stress.
28 */
29
30 /*
31 * Test rapid creation and destruction of user namespaces.
32 * Create and destroy namespaces in quick succession to stress the
33 * active reference tracking and ensure no leaks occur.
34 */
TEST(rapid_namespace_creation_destruction)35 TEST(rapid_namespace_creation_destruction)
36 {
37 struct ns_id_req req = {
38 .size = sizeof(req),
39 .spare = 0,
40 .ns_id = 0,
41 .ns_type = CLONE_NEWUSER,
42 .spare2 = 0,
43 .user_ns_id = 0,
44 };
45 __u64 ns_ids_before[256], ns_ids_after[256];
46 ssize_t ret_before, ret_after;
47 int i;
48
49 /* Get baseline count of active user namespaces */
50 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
51 if (ret_before < 0) {
52 if (errno == ENOSYS)
53 SKIP(return, "listns() not supported");
54 ASSERT_GE(ret_before, 0);
55 }
56
57 TH_LOG("Baseline: %zd active user namespaces", ret_before);
58
59 /* Rapidly create and destroy 100 user namespaces */
60 for (i = 0; i < 100; i++) {
61 pid_t pid = fork();
62 ASSERT_GE(pid, 0);
63
64 if (pid == 0) {
65 /* Child: create user namespace and immediately exit */
66 if (setup_userns() < 0)
67 exit(1);
68 exit(0);
69 }
70
71 /* Parent: wait for child */
72 int status;
73 waitpid(pid, &status, 0);
74 ASSERT_TRUE(WIFEXITED(status));
75 ASSERT_EQ(WEXITSTATUS(status), 0);
76 }
77
78 /* Verify we're back to baseline (no leaked namespaces) */
79 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
80 ASSERT_GE(ret_after, 0);
81
82 TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after);
83 ASSERT_EQ(ret_before, ret_after);
84 }
85
86 /*
87 * Test creating many concurrent namespaces.
88 * Verify that listns() correctly tracks all of them and that they all
89 * become inactive after processes exit.
90 */
TEST(many_concurrent_namespaces)91 TEST(many_concurrent_namespaces)
92 {
93 struct ns_id_req req = {
94 .size = sizeof(req),
95 .spare = 0,
96 .ns_id = 0,
97 .ns_type = CLONE_NEWUSER,
98 .spare2 = 0,
99 .user_ns_id = 0,
100 };
101 __u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512];
102 ssize_t ret_before, ret_during, ret_after;
103 pid_t pids[50];
104 int num_children = 50;
105 int i;
106 int sv[2];
107
108 /* Get baseline */
109 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
110 if (ret_before < 0) {
111 if (errno == ENOSYS)
112 SKIP(return, "listns() not supported");
113 ASSERT_GE(ret_before, 0);
114 }
115
116 TH_LOG("Baseline: %zd active user namespaces", ret_before);
117
118 ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
119
120 /* Create many children, each with their own user namespace */
121 for (i = 0; i < num_children; i++) {
122 pids[i] = fork();
123 ASSERT_GE(pids[i], 0);
124
125 if (pids[i] == 0) {
126 /* Child: create user namespace and wait for parent signal */
127 char c;
128
129 close(sv[0]);
130
131 if (setup_userns() < 0) {
132 close(sv[1]);
133 exit(1);
134 }
135
136 /* Signal parent we're ready */
137 if (write(sv[1], &c, 1) != 1) {
138 close(sv[1]);
139 exit(1);
140 }
141
142 /* Wait for parent signal to exit */
143 if (read(sv[1], &c, 1) != 1) {
144 close(sv[1]);
145 exit(1);
146 }
147
148 close(sv[1]);
149 exit(0);
150 }
151 }
152
153 close(sv[1]);
154
155 /* Wait for all children to signal ready */
156 for (i = 0; i < num_children; i++) {
157 char c;
158 if (read(sv[0], &c, 1) != 1) {
159 /* If we fail to read, kill all children and exit */
160 close(sv[0]);
161 for (int j = 0; j < num_children; j++)
162 kill(pids[j], SIGKILL);
163 for (int j = 0; j < num_children; j++)
164 waitpid(pids[j], NULL, 0);
165 ASSERT_TRUE(false);
166 }
167 }
168
169 /* List namespaces while all children are running */
170 ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
171 ASSERT_GE(ret_during, 0);
172
173 TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during);
174
175 /* Should have at least num_children more namespaces than baseline */
176 ASSERT_GE(ret_during, ret_before + num_children);
177
178 /* Signal all children to exit */
179 for (i = 0; i < num_children; i++) {
180 char c = 'X';
181 if (write(sv[0], &c, 1) != 1) {
182 /* If we fail to write, kill remaining children */
183 close(sv[0]);
184 for (int j = i; j < num_children; j++)
185 kill(pids[j], SIGKILL);
186 for (int j = 0; j < num_children; j++)
187 waitpid(pids[j], NULL, 0);
188 ASSERT_TRUE(false);
189 }
190 }
191
192 close(sv[0]);
193
194 /* Wait for all children */
195 for (i = 0; i < num_children; i++) {
196 int status;
197 waitpid(pids[i], &status, 0);
198 ASSERT_TRUE(WIFEXITED(status));
199 }
200
201 /* Verify we're back to baseline */
202 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
203 ASSERT_GE(ret_after, 0);
204
205 TH_LOG("After all children exit: %zd active user namespaces", ret_after);
206 ASSERT_EQ(ret_before, ret_after);
207 }
208
209 /*
210 * Test rapid namespace creation with different namespace types.
211 * Create multiple types of namespaces rapidly to stress the tracking system.
212 */
TEST(rapid_mixed_namespace_creation)213 TEST(rapid_mixed_namespace_creation)
214 {
215 struct ns_id_req req = {
216 .size = sizeof(req),
217 .spare = 0,
218 .ns_id = 0,
219 .ns_type = 0, /* All types */
220 .spare2 = 0,
221 .user_ns_id = 0,
222 };
223 __u64 ns_ids_before[512], ns_ids_after[512];
224 ssize_t ret_before, ret_after;
225 int i;
226
227 /* Get baseline count */
228 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
229 if (ret_before < 0) {
230 if (errno == ENOSYS)
231 SKIP(return, "listns() not supported");
232 ASSERT_GE(ret_before, 0);
233 }
234
235 TH_LOG("Baseline: %zd active namespaces (all types)", ret_before);
236
237 /* Rapidly create and destroy namespaces with multiple types */
238 for (i = 0; i < 50; i++) {
239 pid_t pid = fork();
240 ASSERT_GE(pid, 0);
241
242 if (pid == 0) {
243 /* Child: create multiple namespace types */
244 if (setup_userns() < 0)
245 exit(1);
246
247 /* Create additional namespace types */
248 if (unshare(CLONE_NEWNET) < 0)
249 exit(1);
250 if (unshare(CLONE_NEWUTS) < 0)
251 exit(1);
252 if (unshare(CLONE_NEWIPC) < 0)
253 exit(1);
254
255 exit(0);
256 }
257
258 /* Parent: wait for child */
259 int status;
260 waitpid(pid, &status, 0);
261 ASSERT_TRUE(WIFEXITED(status));
262 }
263
264 /* Verify we're back to baseline */
265 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
266 ASSERT_GE(ret_after, 0);
267
268 TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after);
269 ASSERT_EQ(ret_before, ret_after);
270 }
271
272 /*
273 * Test nested namespace creation under stress.
274 * Create deeply nested namespace hierarchies and verify proper cleanup.
275 */
TEST(nested_namespace_stress)276 TEST(nested_namespace_stress)
277 {
278 struct ns_id_req req = {
279 .size = sizeof(req),
280 .spare = 0,
281 .ns_id = 0,
282 .ns_type = CLONE_NEWUSER,
283 .spare2 = 0,
284 .user_ns_id = 0,
285 };
286 __u64 ns_ids_before[512], ns_ids_after[512];
287 ssize_t ret_before, ret_after;
288 int i;
289
290 /* Get baseline */
291 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
292 if (ret_before < 0) {
293 if (errno == ENOSYS)
294 SKIP(return, "listns() not supported");
295 ASSERT_GE(ret_before, 0);
296 }
297
298 TH_LOG("Baseline: %zd active user namespaces", ret_before);
299
300 /* Create 20 processes, each with nested user namespaces */
301 for (i = 0; i < 20; i++) {
302 pid_t pid = fork();
303 ASSERT_GE(pid, 0);
304
305 if (pid == 0) {
306 int userns_fd;
307 uid_t orig_uid = getuid();
308 int depth;
309
310 /* Create nested user namespaces (up to 5 levels) */
311 for (depth = 0; depth < 5; depth++) {
312 userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1);
313 if (userns_fd < 0)
314 exit(1);
315
316 if (setns(userns_fd, CLONE_NEWUSER) < 0) {
317 close(userns_fd);
318 exit(1);
319 }
320 close(userns_fd);
321 }
322
323 exit(0);
324 }
325
326 /* Parent: wait for child */
327 int status;
328 waitpid(pid, &status, 0);
329 ASSERT_TRUE(WIFEXITED(status));
330 }
331
332 /* Verify we're back to baseline */
333 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
334 ASSERT_GE(ret_after, 0);
335
336 TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after);
337 ASSERT_EQ(ret_before, ret_after);
338 }
339
340 /*
341 * Test listns() pagination under stress.
342 * Create many namespaces and verify pagination works correctly.
343 */
TEST(listns_pagination_stress)344 TEST(listns_pagination_stress)
345 {
346 struct ns_id_req req = {
347 .size = sizeof(req),
348 .spare = 0,
349 .ns_id = 0,
350 .ns_type = CLONE_NEWUSER,
351 .spare2 = 0,
352 .user_ns_id = 0,
353 };
354 pid_t pids[30];
355 int num_children = 30;
356 int i;
357 int sv[2];
358 __u64 all_ns_ids[512];
359 int total_found = 0;
360
361 ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
362
363 /* Create many children with user namespaces */
364 for (i = 0; i < num_children; i++) {
365 pids[i] = fork();
366 ASSERT_GE(pids[i], 0);
367
368 if (pids[i] == 0) {
369 char c;
370 close(sv[0]);
371
372 if (setup_userns() < 0) {
373 close(sv[1]);
374 exit(1);
375 }
376
377 /* Signal parent we're ready */
378 if (write(sv[1], &c, 1) != 1) {
379 close(sv[1]);
380 exit(1);
381 }
382
383 /* Wait for parent signal to exit */
384 if (read(sv[1], &c, 1) != 1) {
385 close(sv[1]);
386 exit(1);
387 }
388
389 close(sv[1]);
390 exit(0);
391 }
392 }
393
394 close(sv[1]);
395
396 /* Wait for all children to signal ready */
397 for (i = 0; i < num_children; i++) {
398 char c;
399 if (read(sv[0], &c, 1) != 1) {
400 /* If we fail to read, kill all children and exit */
401 close(sv[0]);
402 for (int j = 0; j < num_children; j++)
403 kill(pids[j], SIGKILL);
404 for (int j = 0; j < num_children; j++)
405 waitpid(pids[j], NULL, 0);
406 ASSERT_TRUE(false);
407 }
408 }
409
410 /* Paginate through all namespaces using small batch sizes */
411 req.ns_id = 0;
412 while (1) {
413 __u64 batch[5]; /* Small batch size to force pagination */
414 ssize_t ret;
415
416 ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0);
417 if (ret < 0) {
418 if (errno == ENOSYS) {
419 close(sv[0]);
420 for (i = 0; i < num_children; i++)
421 kill(pids[i], SIGKILL);
422 for (i = 0; i < num_children; i++)
423 waitpid(pids[i], NULL, 0);
424 SKIP(return, "listns() not supported");
425 }
426 ASSERT_GE(ret, 0);
427 }
428
429 if (ret == 0)
430 break;
431
432 /* Store results */
433 for (i = 0; i < ret && total_found < 512; i++) {
434 all_ns_ids[total_found++] = batch[i];
435 }
436
437 /* Update cursor for next batch */
438 if (ret == ARRAY_SIZE(batch))
439 req.ns_id = batch[ret - 1];
440 else
441 break;
442 }
443
444 TH_LOG("Paginated through %d user namespaces", total_found);
445
446 /* Verify no duplicates in pagination */
447 for (i = 0; i < total_found; i++) {
448 for (int j = i + 1; j < total_found; j++) {
449 if (all_ns_ids[i] == all_ns_ids[j]) {
450 TH_LOG("Found duplicate ns_id: %llu at positions %d and %d",
451 (unsigned long long)all_ns_ids[i], i, j);
452 ASSERT_TRUE(false);
453 }
454 }
455 }
456
457 /* Signal all children to exit */
458 for (i = 0; i < num_children; i++) {
459 char c = 'X';
460 if (write(sv[0], &c, 1) != 1) {
461 close(sv[0]);
462 for (int j = i; j < num_children; j++)
463 kill(pids[j], SIGKILL);
464 for (int j = 0; j < num_children; j++)
465 waitpid(pids[j], NULL, 0);
466 ASSERT_TRUE(false);
467 }
468 }
469
470 close(sv[0]);
471
472 /* Wait for all children */
473 for (i = 0; i < num_children; i++) {
474 int status;
475 waitpid(pids[i], &status, 0);
476 }
477 }
478
479 /*
480 * Test concurrent namespace operations.
481 * Multiple processes creating, querying, and destroying namespaces concurrently.
482 */
TEST(concurrent_namespace_operations)483 TEST(concurrent_namespace_operations)
484 {
485 struct ns_id_req req = {
486 .size = sizeof(req),
487 .spare = 0,
488 .ns_id = 0,
489 .ns_type = 0,
490 .spare2 = 0,
491 .user_ns_id = 0,
492 };
493 __u64 ns_ids_before[512], ns_ids_after[512];
494 ssize_t ret_before, ret_after;
495 pid_t pids[20];
496 int num_workers = 20;
497 int i;
498
499 /* Get baseline */
500 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
501 if (ret_before < 0) {
502 if (errno == ENOSYS)
503 SKIP(return, "listns() not supported");
504 ASSERT_GE(ret_before, 0);
505 }
506
507 TH_LOG("Baseline: %zd active namespaces", ret_before);
508
509 /* Create worker processes that do concurrent operations */
510 for (i = 0; i < num_workers; i++) {
511 pids[i] = fork();
512 ASSERT_GE(pids[i], 0);
513
514 if (pids[i] == 0) {
515 /* Each worker: create namespaces, list them, repeat */
516 int iterations;
517
518 for (iterations = 0; iterations < 10; iterations++) {
519 int userns_fd;
520 __u64 temp_ns_ids[100];
521 ssize_t ret;
522
523 /* Create a user namespace */
524 userns_fd = get_userns_fd(0, getuid(), 1);
525 if (userns_fd < 0)
526 continue;
527
528 /* List namespaces */
529 ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0);
530 (void)ret;
531
532 close(userns_fd);
533
534 /* Small delay */
535 usleep(1000);
536 }
537
538 exit(0);
539 }
540 }
541
542 /* Wait for all workers */
543 for (i = 0; i < num_workers; i++) {
544 int status;
545 waitpid(pids[i], &status, 0);
546 ASSERT_TRUE(WIFEXITED(status));
547 ASSERT_EQ(WEXITSTATUS(status), 0);
548 }
549
550 /* Verify we're back to baseline */
551 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
552 ASSERT_GE(ret_after, 0);
553
554 TH_LOG("After concurrent operations: %zd active namespaces", ret_after);
555 ASSERT_EQ(ret_before, ret_after);
556 }
557
558 /*
559 * Test namespace churn - continuous creation and destruction.
560 * Simulates high-churn scenarios like container orchestration.
561 */
TEST(namespace_churn)562 TEST(namespace_churn)
563 {
564 struct ns_id_req req = {
565 .size = sizeof(req),
566 .spare = 0,
567 .ns_id = 0,
568 .ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS,
569 .spare2 = 0,
570 .user_ns_id = 0,
571 };
572 __u64 ns_ids_before[512], ns_ids_after[512];
573 ssize_t ret_before, ret_after;
574 int cycle;
575
576 /* Get baseline */
577 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
578 if (ret_before < 0) {
579 if (errno == ENOSYS)
580 SKIP(return, "listns() not supported");
581 ASSERT_GE(ret_before, 0);
582 }
583
584 TH_LOG("Baseline: %zd active namespaces", ret_before);
585
586 /* Simulate churn: batches of namespaces created and destroyed */
587 for (cycle = 0; cycle < 10; cycle++) {
588 pid_t batch_pids[10];
589 int i;
590
591 /* Create batch */
592 for (i = 0; i < 10; i++) {
593 batch_pids[i] = fork();
594 ASSERT_GE(batch_pids[i], 0);
595
596 if (batch_pids[i] == 0) {
597 /* Create multiple namespace types */
598 if (setup_userns() < 0)
599 exit(1);
600 if (unshare(CLONE_NEWNET) < 0)
601 exit(1);
602 if (unshare(CLONE_NEWUTS) < 0)
603 exit(1);
604
605 /* Keep namespaces alive briefly */
606 usleep(10000);
607 exit(0);
608 }
609 }
610
611 /* Wait for batch to complete */
612 for (i = 0; i < 10; i++) {
613 int status;
614 waitpid(batch_pids[i], &status, 0);
615 }
616 }
617
618 /* Verify we're back to baseline */
619 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
620 ASSERT_GE(ret_after, 0);
621
622 TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after);
623 ASSERT_EQ(ret_before, ret_after);
624 }
625
626 TEST_HARNESS_MAIN
627