1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <limits.h> 6 #include <sched.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/ioctl.h> 11 #include <sys/socket.h> 12 #include <sys/stat.h> 13 #include <sys/syscall.h> 14 #include <sys/types.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <linux/nsfs.h> 18 #include "../kselftest_harness.h" 19 #include "../filesystems/utils.h" 20 #include "wrappers.h" 21 22 /* 23 * Stress tests for namespace active reference counting. 24 * 25 * These tests validate that the active reference counting system can handle 26 * high load scenarios including rapid namespace creation/destruction, large 27 * numbers of concurrent namespaces, and various edge cases under stress. 28 */ 29 30 /* 31 * Test rapid creation and destruction of user namespaces. 32 * Create and destroy namespaces in quick succession to stress the 33 * active reference tracking and ensure no leaks occur. 34 */ 35 TEST(rapid_namespace_creation_destruction) 36 { 37 struct ns_id_req req = { 38 .size = sizeof(req), 39 .spare = 0, 40 .ns_id = 0, 41 .ns_type = CLONE_NEWUSER, 42 .spare2 = 0, 43 .user_ns_id = 0, 44 }; 45 __u64 ns_ids_before[256], ns_ids_after[256]; 46 ssize_t ret_before, ret_after; 47 int i; 48 49 /* Get baseline count of active user namespaces */ 50 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 51 if (ret_before < 0) { 52 if (errno == ENOSYS) 53 SKIP(return, "listns() not supported"); 54 ASSERT_GE(ret_before, 0); 55 } 56 57 TH_LOG("Baseline: %zd active user namespaces", ret_before); 58 59 /* Rapidly create and destroy 100 user namespaces */ 60 for (i = 0; i < 100; i++) { 61 pid_t pid = fork(); 62 ASSERT_GE(pid, 0); 63 64 if (pid == 0) { 65 /* Child: create user namespace and immediately exit */ 66 if (setup_userns() < 0) 67 exit(1); 68 exit(0); 69 } 70 71 /* Parent: wait for child */ 72 int status; 73 waitpid(pid, &status, 0); 74 ASSERT_TRUE(WIFEXITED(status)); 75 ASSERT_EQ(WEXITSTATUS(status), 0); 76 } 77 78 /* Verify we're back to baseline (no leaked namespaces) */ 79 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 80 ASSERT_GE(ret_after, 0); 81 82 TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after); 83 ASSERT_EQ(ret_before, ret_after); 84 } 85 86 /* 87 * Test creating many concurrent namespaces. 88 * Verify that listns() correctly tracks all of them and that they all 89 * become inactive after processes exit. 90 */ 91 TEST(many_concurrent_namespaces) 92 { 93 struct ns_id_req req = { 94 .size = sizeof(req), 95 .spare = 0, 96 .ns_id = 0, 97 .ns_type = CLONE_NEWUSER, 98 .spare2 = 0, 99 .user_ns_id = 0, 100 }; 101 __u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512]; 102 ssize_t ret_before, ret_during, ret_after; 103 pid_t pids[50]; 104 int num_children = 50; 105 int i; 106 int sv[2]; 107 108 /* Get baseline */ 109 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 110 if (ret_before < 0) { 111 if (errno == ENOSYS) 112 SKIP(return, "listns() not supported"); 113 ASSERT_GE(ret_before, 0); 114 } 115 116 TH_LOG("Baseline: %zd active user namespaces", ret_before); 117 118 ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0); 119 120 /* Create many children, each with their own user namespace */ 121 for (i = 0; i < num_children; i++) { 122 pids[i] = fork(); 123 ASSERT_GE(pids[i], 0); 124 125 if (pids[i] == 0) { 126 /* Child: create user namespace and wait for parent signal */ 127 char c; 128 129 close(sv[0]); 130 131 if (setup_userns() < 0) { 132 close(sv[1]); 133 exit(1); 134 } 135 136 /* Signal parent we're ready */ 137 if (write(sv[1], &c, 1) != 1) { 138 close(sv[1]); 139 exit(1); 140 } 141 142 /* Wait for parent signal to exit */ 143 if (read(sv[1], &c, 1) != 1) { 144 close(sv[1]); 145 exit(1); 146 } 147 148 close(sv[1]); 149 exit(0); 150 } 151 } 152 153 close(sv[1]); 154 155 /* Wait for all children to signal ready */ 156 for (i = 0; i < num_children; i++) { 157 char c; 158 if (read(sv[0], &c, 1) != 1) { 159 /* If we fail to read, kill all children and exit */ 160 close(sv[0]); 161 for (int j = 0; j < num_children; j++) 162 kill(pids[j], SIGKILL); 163 for (int j = 0; j < num_children; j++) 164 waitpid(pids[j], NULL, 0); 165 ASSERT_TRUE(false); 166 } 167 } 168 169 /* List namespaces while all children are running */ 170 ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0); 171 ASSERT_GE(ret_during, 0); 172 173 TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during); 174 175 /* Should have at least num_children more namespaces than baseline */ 176 ASSERT_GE(ret_during, ret_before + num_children); 177 178 /* Signal all children to exit */ 179 for (i = 0; i < num_children; i++) { 180 char c = 'X'; 181 if (write(sv[0], &c, 1) != 1) { 182 /* If we fail to write, kill remaining children */ 183 close(sv[0]); 184 for (int j = i; j < num_children; j++) 185 kill(pids[j], SIGKILL); 186 for (int j = 0; j < num_children; j++) 187 waitpid(pids[j], NULL, 0); 188 ASSERT_TRUE(false); 189 } 190 } 191 192 close(sv[0]); 193 194 /* Wait for all children */ 195 for (i = 0; i < num_children; i++) { 196 int status; 197 waitpid(pids[i], &status, 0); 198 ASSERT_TRUE(WIFEXITED(status)); 199 } 200 201 /* Verify we're back to baseline */ 202 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 203 ASSERT_GE(ret_after, 0); 204 205 TH_LOG("After all children exit: %zd active user namespaces", ret_after); 206 ASSERT_EQ(ret_before, ret_after); 207 } 208 209 /* 210 * Test rapid namespace creation with different namespace types. 211 * Create multiple types of namespaces rapidly to stress the tracking system. 212 */ 213 TEST(rapid_mixed_namespace_creation) 214 { 215 struct ns_id_req req = { 216 .size = sizeof(req), 217 .spare = 0, 218 .ns_id = 0, 219 .ns_type = 0, /* All types */ 220 .spare2 = 0, 221 .user_ns_id = 0, 222 }; 223 __u64 ns_ids_before[512], ns_ids_after[512]; 224 ssize_t ret_before, ret_after; 225 int i; 226 227 /* Get baseline count */ 228 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 229 if (ret_before < 0) { 230 if (errno == ENOSYS) 231 SKIP(return, "listns() not supported"); 232 ASSERT_GE(ret_before, 0); 233 } 234 235 TH_LOG("Baseline: %zd active namespaces (all types)", ret_before); 236 237 /* Rapidly create and destroy namespaces with multiple types */ 238 for (i = 0; i < 50; i++) { 239 pid_t pid = fork(); 240 ASSERT_GE(pid, 0); 241 242 if (pid == 0) { 243 /* Child: create multiple namespace types */ 244 if (setup_userns() < 0) 245 exit(1); 246 247 /* Create additional namespace types */ 248 if (unshare(CLONE_NEWNET) < 0) 249 exit(1); 250 if (unshare(CLONE_NEWUTS) < 0) 251 exit(1); 252 if (unshare(CLONE_NEWIPC) < 0) 253 exit(1); 254 255 exit(0); 256 } 257 258 /* Parent: wait for child */ 259 int status; 260 waitpid(pid, &status, 0); 261 ASSERT_TRUE(WIFEXITED(status)); 262 } 263 264 /* Verify we're back to baseline */ 265 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 266 ASSERT_GE(ret_after, 0); 267 268 TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after); 269 ASSERT_EQ(ret_before, ret_after); 270 } 271 272 /* 273 * Test nested namespace creation under stress. 274 * Create deeply nested namespace hierarchies and verify proper cleanup. 275 */ 276 TEST(nested_namespace_stress) 277 { 278 struct ns_id_req req = { 279 .size = sizeof(req), 280 .spare = 0, 281 .ns_id = 0, 282 .ns_type = CLONE_NEWUSER, 283 .spare2 = 0, 284 .user_ns_id = 0, 285 }; 286 __u64 ns_ids_before[512], ns_ids_after[512]; 287 ssize_t ret_before, ret_after; 288 int i; 289 290 /* Get baseline */ 291 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 292 if (ret_before < 0) { 293 if (errno == ENOSYS) 294 SKIP(return, "listns() not supported"); 295 ASSERT_GE(ret_before, 0); 296 } 297 298 TH_LOG("Baseline: %zd active user namespaces", ret_before); 299 300 /* Create 20 processes, each with nested user namespaces */ 301 for (i = 0; i < 20; i++) { 302 pid_t pid = fork(); 303 ASSERT_GE(pid, 0); 304 305 if (pid == 0) { 306 int userns_fd; 307 uid_t orig_uid = getuid(); 308 int depth; 309 310 /* Create nested user namespaces (up to 5 levels) */ 311 for (depth = 0; depth < 5; depth++) { 312 userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1); 313 if (userns_fd < 0) 314 exit(1); 315 316 if (setns(userns_fd, CLONE_NEWUSER) < 0) { 317 close(userns_fd); 318 exit(1); 319 } 320 close(userns_fd); 321 } 322 323 exit(0); 324 } 325 326 /* Parent: wait for child */ 327 int status; 328 waitpid(pid, &status, 0); 329 ASSERT_TRUE(WIFEXITED(status)); 330 } 331 332 /* Verify we're back to baseline */ 333 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 334 ASSERT_GE(ret_after, 0); 335 336 TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after); 337 ASSERT_EQ(ret_before, ret_after); 338 } 339 340 /* 341 * Test listns() pagination under stress. 342 * Create many namespaces and verify pagination works correctly. 343 */ 344 TEST(listns_pagination_stress) 345 { 346 struct ns_id_req req = { 347 .size = sizeof(req), 348 .spare = 0, 349 .ns_id = 0, 350 .ns_type = CLONE_NEWUSER, 351 .spare2 = 0, 352 .user_ns_id = 0, 353 }; 354 pid_t pids[30]; 355 int num_children = 30; 356 int i; 357 int sv[2]; 358 __u64 all_ns_ids[512]; 359 int total_found = 0; 360 361 ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0); 362 363 /* Create many children with user namespaces */ 364 for (i = 0; i < num_children; i++) { 365 pids[i] = fork(); 366 ASSERT_GE(pids[i], 0); 367 368 if (pids[i] == 0) { 369 char c; 370 close(sv[0]); 371 372 if (setup_userns() < 0) { 373 close(sv[1]); 374 exit(1); 375 } 376 377 /* Signal parent we're ready */ 378 if (write(sv[1], &c, 1) != 1) { 379 close(sv[1]); 380 exit(1); 381 } 382 383 /* Wait for parent signal to exit */ 384 if (read(sv[1], &c, 1) != 1) { 385 close(sv[1]); 386 exit(1); 387 } 388 389 close(sv[1]); 390 exit(0); 391 } 392 } 393 394 close(sv[1]); 395 396 /* Wait for all children to signal ready */ 397 for (i = 0; i < num_children; i++) { 398 char c; 399 if (read(sv[0], &c, 1) != 1) { 400 /* If we fail to read, kill all children and exit */ 401 close(sv[0]); 402 for (int j = 0; j < num_children; j++) 403 kill(pids[j], SIGKILL); 404 for (int j = 0; j < num_children; j++) 405 waitpid(pids[j], NULL, 0); 406 ASSERT_TRUE(false); 407 } 408 } 409 410 /* Paginate through all namespaces using small batch sizes */ 411 req.ns_id = 0; 412 while (1) { 413 __u64 batch[5]; /* Small batch size to force pagination */ 414 ssize_t ret; 415 416 ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0); 417 if (ret < 0) { 418 if (errno == ENOSYS) { 419 close(sv[0]); 420 for (i = 0; i < num_children; i++) 421 kill(pids[i], SIGKILL); 422 for (i = 0; i < num_children; i++) 423 waitpid(pids[i], NULL, 0); 424 SKIP(return, "listns() not supported"); 425 } 426 ASSERT_GE(ret, 0); 427 } 428 429 if (ret == 0) 430 break; 431 432 /* Store results */ 433 for (i = 0; i < ret && total_found < 512; i++) { 434 all_ns_ids[total_found++] = batch[i]; 435 } 436 437 /* Update cursor for next batch */ 438 if (ret == ARRAY_SIZE(batch)) 439 req.ns_id = batch[ret - 1]; 440 else 441 break; 442 } 443 444 TH_LOG("Paginated through %d user namespaces", total_found); 445 446 /* Verify no duplicates in pagination */ 447 for (i = 0; i < total_found; i++) { 448 for (int j = i + 1; j < total_found; j++) { 449 if (all_ns_ids[i] == all_ns_ids[j]) { 450 TH_LOG("Found duplicate ns_id: %llu at positions %d and %d", 451 (unsigned long long)all_ns_ids[i], i, j); 452 ASSERT_TRUE(false); 453 } 454 } 455 } 456 457 /* Signal all children to exit */ 458 for (i = 0; i < num_children; i++) { 459 char c = 'X'; 460 if (write(sv[0], &c, 1) != 1) { 461 close(sv[0]); 462 for (int j = i; j < num_children; j++) 463 kill(pids[j], SIGKILL); 464 for (int j = 0; j < num_children; j++) 465 waitpid(pids[j], NULL, 0); 466 ASSERT_TRUE(false); 467 } 468 } 469 470 close(sv[0]); 471 472 /* Wait for all children */ 473 for (i = 0; i < num_children; i++) { 474 int status; 475 waitpid(pids[i], &status, 0); 476 } 477 } 478 479 /* 480 * Test concurrent namespace operations. 481 * Multiple processes creating, querying, and destroying namespaces concurrently. 482 */ 483 TEST(concurrent_namespace_operations) 484 { 485 struct ns_id_req req = { 486 .size = sizeof(req), 487 .spare = 0, 488 .ns_id = 0, 489 .ns_type = 0, 490 .spare2 = 0, 491 .user_ns_id = 0, 492 }; 493 __u64 ns_ids_before[512], ns_ids_after[512]; 494 ssize_t ret_before, ret_after; 495 pid_t pids[20]; 496 int num_workers = 20; 497 int i; 498 499 /* Get baseline */ 500 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 501 if (ret_before < 0) { 502 if (errno == ENOSYS) 503 SKIP(return, "listns() not supported"); 504 ASSERT_GE(ret_before, 0); 505 } 506 507 TH_LOG("Baseline: %zd active namespaces", ret_before); 508 509 /* Create worker processes that do concurrent operations */ 510 for (i = 0; i < num_workers; i++) { 511 pids[i] = fork(); 512 ASSERT_GE(pids[i], 0); 513 514 if (pids[i] == 0) { 515 /* Each worker: create namespaces, list them, repeat */ 516 int iterations; 517 518 for (iterations = 0; iterations < 10; iterations++) { 519 int userns_fd; 520 __u64 temp_ns_ids[100]; 521 ssize_t ret; 522 523 /* Create a user namespace */ 524 userns_fd = get_userns_fd(0, getuid(), 1); 525 if (userns_fd < 0) 526 continue; 527 528 /* List namespaces */ 529 ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0); 530 (void)ret; 531 532 close(userns_fd); 533 534 /* Small delay */ 535 usleep(1000); 536 } 537 538 exit(0); 539 } 540 } 541 542 /* Wait for all workers */ 543 for (i = 0; i < num_workers; i++) { 544 int status; 545 waitpid(pids[i], &status, 0); 546 ASSERT_TRUE(WIFEXITED(status)); 547 ASSERT_EQ(WEXITSTATUS(status), 0); 548 } 549 550 /* Verify we're back to baseline */ 551 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 552 ASSERT_GE(ret_after, 0); 553 554 TH_LOG("After concurrent operations: %zd active namespaces", ret_after); 555 ASSERT_EQ(ret_before, ret_after); 556 } 557 558 /* 559 * Test namespace churn - continuous creation and destruction. 560 * Simulates high-churn scenarios like container orchestration. 561 */ 562 TEST(namespace_churn) 563 { 564 struct ns_id_req req = { 565 .size = sizeof(req), 566 .spare = 0, 567 .ns_id = 0, 568 .ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS, 569 .spare2 = 0, 570 .user_ns_id = 0, 571 }; 572 __u64 ns_ids_before[512], ns_ids_after[512]; 573 ssize_t ret_before, ret_after; 574 int cycle; 575 576 /* Get baseline */ 577 ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0); 578 if (ret_before < 0) { 579 if (errno == ENOSYS) 580 SKIP(return, "listns() not supported"); 581 ASSERT_GE(ret_before, 0); 582 } 583 584 TH_LOG("Baseline: %zd active namespaces", ret_before); 585 586 /* Simulate churn: batches of namespaces created and destroyed */ 587 for (cycle = 0; cycle < 10; cycle++) { 588 pid_t batch_pids[10]; 589 int i; 590 591 /* Create batch */ 592 for (i = 0; i < 10; i++) { 593 batch_pids[i] = fork(); 594 ASSERT_GE(batch_pids[i], 0); 595 596 if (batch_pids[i] == 0) { 597 /* Create multiple namespace types */ 598 if (setup_userns() < 0) 599 exit(1); 600 if (unshare(CLONE_NEWNET) < 0) 601 exit(1); 602 if (unshare(CLONE_NEWUTS) < 0) 603 exit(1); 604 605 /* Keep namespaces alive briefly */ 606 usleep(10000); 607 exit(0); 608 } 609 } 610 611 /* Wait for batch to complete */ 612 for (i = 0; i < 10; i++) { 613 int status; 614 waitpid(batch_pids[i], &status, 0); 615 } 616 } 617 618 /* Verify we're back to baseline */ 619 ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0); 620 ASSERT_GE(ret_after, 0); 621 622 TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after); 623 ASSERT_EQ(ret_before, ret_after); 624 } 625 626 TEST_HARNESS_MAIN 627