1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/limits.h> 3 #include <sys/sysinfo.h> 4 #include <sys/wait.h> 5 #include <errno.h> 6 #include <pthread.h> 7 #include <stdio.h> 8 #include <time.h> 9 10 #include "../kselftest.h" 11 #include "cgroup_util.h" 12 13 enum hog_clock_type { 14 // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock. 15 CPU_HOG_CLOCK_PROCESS, 16 // Count elapsed time using system wallclock time. 17 CPU_HOG_CLOCK_WALL, 18 }; 19 20 struct cpu_hogger { 21 char *cgroup; 22 pid_t pid; 23 long usage; 24 }; 25 26 struct cpu_hog_func_param { 27 int nprocs; 28 struct timespec ts; 29 enum hog_clock_type clock_type; 30 }; 31 32 /* 33 * This test creates two nested cgroups with and without enabling 34 * the cpu controller. 35 */ 36 static int test_cpucg_subtree_control(const char *root) 37 { 38 char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL; 39 int ret = KSFT_FAIL; 40 41 // Create two nested cgroups with the cpu controller enabled. 42 parent = cg_name(root, "cpucg_test_0"); 43 if (!parent) 44 goto cleanup; 45 46 if (cg_create(parent)) 47 goto cleanup; 48 49 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 50 goto cleanup; 51 52 child = cg_name(parent, "cpucg_test_child"); 53 if (!child) 54 goto cleanup; 55 56 if (cg_create(child)) 57 goto cleanup; 58 59 if (cg_read_strstr(child, "cgroup.controllers", "cpu")) 60 goto cleanup; 61 62 // Create two nested cgroups without enabling the cpu controller. 63 parent2 = cg_name(root, "cpucg_test_1"); 64 if (!parent2) 65 goto cleanup; 66 67 if (cg_create(parent2)) 68 goto cleanup; 69 70 child2 = cg_name(parent2, "cpucg_test_child"); 71 if (!child2) 72 goto cleanup; 73 74 if (cg_create(child2)) 75 goto cleanup; 76 77 if (!cg_read_strstr(child2, "cgroup.controllers", "cpu")) 78 goto cleanup; 79 80 ret = KSFT_PASS; 81 82 cleanup: 83 cg_destroy(child); 84 free(child); 85 cg_destroy(child2); 86 free(child2); 87 cg_destroy(parent); 88 free(parent); 89 cg_destroy(parent2); 90 free(parent2); 91 92 return ret; 93 } 94 95 static void *hog_cpu_thread_func(void *arg) 96 { 97 while (1) 98 ; 99 100 return NULL; 101 } 102 103 static struct timespec 104 timespec_sub(const struct timespec *lhs, const struct timespec *rhs) 105 { 106 struct timespec zero = { 107 .tv_sec = 0, 108 .tv_nsec = 0, 109 }; 110 struct timespec ret; 111 112 if (lhs->tv_sec < rhs->tv_sec) 113 return zero; 114 115 ret.tv_sec = lhs->tv_sec - rhs->tv_sec; 116 117 if (lhs->tv_nsec < rhs->tv_nsec) { 118 if (ret.tv_sec == 0) 119 return zero; 120 121 ret.tv_sec--; 122 ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec; 123 } else 124 ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec; 125 126 return ret; 127 } 128 129 static int hog_cpus_timed(const char *cgroup, void *arg) 130 { 131 const struct cpu_hog_func_param *param = 132 (struct cpu_hog_func_param *)arg; 133 struct timespec ts_run = param->ts; 134 struct timespec ts_remaining = ts_run; 135 struct timespec ts_start; 136 int i, ret; 137 138 ret = clock_gettime(CLOCK_MONOTONIC, &ts_start); 139 if (ret != 0) 140 return ret; 141 142 for (i = 0; i < param->nprocs; i++) { 143 pthread_t tid; 144 145 ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL); 146 if (ret != 0) 147 return ret; 148 } 149 150 while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) { 151 struct timespec ts_total; 152 153 ret = nanosleep(&ts_remaining, NULL); 154 if (ret && errno != EINTR) 155 return ret; 156 157 if (param->clock_type == CPU_HOG_CLOCK_PROCESS) { 158 ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); 159 if (ret != 0) 160 return ret; 161 } else { 162 struct timespec ts_current; 163 164 ret = clock_gettime(CLOCK_MONOTONIC, &ts_current); 165 if (ret != 0) 166 return ret; 167 168 ts_total = timespec_sub(&ts_current, &ts_start); 169 } 170 171 ts_remaining = timespec_sub(&ts_run, &ts_total); 172 } 173 174 return 0; 175 } 176 177 /* 178 * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that 179 * cpu.stat shows the expected output. 180 */ 181 static int test_cpucg_stats(const char *root) 182 { 183 int ret = KSFT_FAIL; 184 long usage_usec, user_usec, system_usec; 185 long usage_seconds = 2; 186 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 187 char *cpucg; 188 189 cpucg = cg_name(root, "cpucg_test"); 190 if (!cpucg) 191 goto cleanup; 192 193 if (cg_create(cpucg)) 194 goto cleanup; 195 196 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 197 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 198 system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec"); 199 if (usage_usec != 0 || user_usec != 0 || system_usec != 0) 200 goto cleanup; 201 202 struct cpu_hog_func_param param = { 203 .nprocs = 1, 204 .ts = { 205 .tv_sec = usage_seconds, 206 .tv_nsec = 0, 207 }, 208 .clock_type = CPU_HOG_CLOCK_PROCESS, 209 }; 210 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) 211 goto cleanup; 212 213 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 214 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 215 if (user_usec <= 0) 216 goto cleanup; 217 218 if (!values_close(usage_usec, expected_usage_usec, 1)) 219 goto cleanup; 220 221 ret = KSFT_PASS; 222 223 cleanup: 224 cg_destroy(cpucg); 225 free(cpucg); 226 227 return ret; 228 } 229 230 static int 231 run_cpucg_weight_test( 232 const char *root, 233 pid_t (*spawn_child)(const struct cpu_hogger *child), 234 int (*validate)(const struct cpu_hogger *children, int num_children)) 235 { 236 int ret = KSFT_FAIL, i; 237 char *parent = NULL; 238 struct cpu_hogger children[3] = {}; 239 240 parent = cg_name(root, "cpucg_test_0"); 241 if (!parent) 242 goto cleanup; 243 244 if (cg_create(parent)) 245 goto cleanup; 246 247 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 248 goto cleanup; 249 250 for (i = 0; i < ARRAY_SIZE(children); i++) { 251 children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i); 252 if (!children[i].cgroup) 253 goto cleanup; 254 255 if (cg_create(children[i].cgroup)) 256 goto cleanup; 257 258 if (cg_write_numeric(children[i].cgroup, "cpu.weight", 259 50 * (i + 1))) 260 goto cleanup; 261 } 262 263 for (i = 0; i < ARRAY_SIZE(children); i++) { 264 pid_t pid = spawn_child(&children[i]); 265 if (pid <= 0) 266 goto cleanup; 267 children[i].pid = pid; 268 } 269 270 for (i = 0; i < ARRAY_SIZE(children); i++) { 271 int retcode; 272 273 waitpid(children[i].pid, &retcode, 0); 274 if (!WIFEXITED(retcode)) 275 goto cleanup; 276 if (WEXITSTATUS(retcode)) 277 goto cleanup; 278 } 279 280 for (i = 0; i < ARRAY_SIZE(children); i++) 281 children[i].usage = cg_read_key_long(children[i].cgroup, 282 "cpu.stat", "usage_usec"); 283 284 if (validate(children, ARRAY_SIZE(children))) 285 goto cleanup; 286 287 ret = KSFT_PASS; 288 cleanup: 289 for (i = 0; i < ARRAY_SIZE(children); i++) { 290 cg_destroy(children[i].cgroup); 291 free(children[i].cgroup); 292 } 293 cg_destroy(parent); 294 free(parent); 295 296 return ret; 297 } 298 299 static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus) 300 { 301 long usage_seconds = 10; 302 struct cpu_hog_func_param param = { 303 .nprocs = ncpus, 304 .ts = { 305 .tv_sec = usage_seconds, 306 .tv_nsec = 0, 307 }, 308 .clock_type = CPU_HOG_CLOCK_WALL, 309 }; 310 return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m); 311 } 312 313 static pid_t weight_hog_all_cpus(const struct cpu_hogger *child) 314 { 315 return weight_hog_ncpus(child, get_nprocs()); 316 } 317 318 static int 319 overprovision_validate(const struct cpu_hogger *children, int num_children) 320 { 321 int ret = KSFT_FAIL, i; 322 323 for (i = 0; i < num_children - 1; i++) { 324 long delta; 325 326 if (children[i + 1].usage <= children[i].usage) 327 goto cleanup; 328 329 delta = children[i + 1].usage - children[i].usage; 330 if (!values_close(delta, children[0].usage, 35)) 331 goto cleanup; 332 } 333 334 ret = KSFT_PASS; 335 cleanup: 336 return ret; 337 } 338 339 /* 340 * First, this test creates the following hierarchy: 341 * A 342 * A/B cpu.weight = 50 343 * A/C cpu.weight = 100 344 * A/D cpu.weight = 150 345 * 346 * A separate process is then created for each child cgroup which spawns as 347 * many threads as there are cores, and hogs each CPU as much as possible 348 * for some time interval. 349 * 350 * Once all of the children have exited, we verify that each child cgroup 351 * was given proportional runtime as informed by their cpu.weight. 352 */ 353 static int test_cpucg_weight_overprovisioned(const char *root) 354 { 355 return run_cpucg_weight_test(root, weight_hog_all_cpus, 356 overprovision_validate); 357 } 358 359 static pid_t weight_hog_one_cpu(const struct cpu_hogger *child) 360 { 361 return weight_hog_ncpus(child, 1); 362 } 363 364 static int 365 underprovision_validate(const struct cpu_hogger *children, int num_children) 366 { 367 int ret = KSFT_FAIL, i; 368 369 for (i = 0; i < num_children - 1; i++) { 370 if (!values_close(children[i + 1].usage, children[0].usage, 15)) 371 goto cleanup; 372 } 373 374 ret = KSFT_PASS; 375 cleanup: 376 return ret; 377 } 378 379 /* 380 * First, this test creates the following hierarchy: 381 * A 382 * A/B cpu.weight = 50 383 * A/C cpu.weight = 100 384 * A/D cpu.weight = 150 385 * 386 * A separate process is then created for each child cgroup which spawns a 387 * single thread that hogs a CPU. The testcase is only run on systems that 388 * have at least one core per-thread in the child processes. 389 * 390 * Once all of the children have exited, we verify that each child cgroup 391 * had roughly the same runtime despite having different cpu.weight. 392 */ 393 static int test_cpucg_weight_underprovisioned(const char *root) 394 { 395 // Only run the test if there are enough cores to avoid overprovisioning 396 // the system. 397 if (get_nprocs() < 4) 398 return KSFT_SKIP; 399 400 return run_cpucg_weight_test(root, weight_hog_one_cpu, 401 underprovision_validate); 402 } 403 404 static int 405 run_cpucg_nested_weight_test(const char *root, bool overprovisioned) 406 { 407 int ret = KSFT_FAIL, i; 408 char *parent = NULL, *child = NULL; 409 struct cpu_hogger leaf[3] = {}; 410 long nested_leaf_usage, child_usage; 411 int nprocs = get_nprocs(); 412 413 if (!overprovisioned) { 414 if (nprocs < 4) 415 /* 416 * Only run the test if there are enough cores to avoid overprovisioning 417 * the system. 418 */ 419 return KSFT_SKIP; 420 nprocs /= 4; 421 } 422 423 parent = cg_name(root, "cpucg_test"); 424 child = cg_name(parent, "cpucg_child"); 425 if (!parent || !child) 426 goto cleanup; 427 428 if (cg_create(parent)) 429 goto cleanup; 430 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 431 goto cleanup; 432 433 if (cg_create(child)) 434 goto cleanup; 435 if (cg_write(child, "cgroup.subtree_control", "+cpu")) 436 goto cleanup; 437 if (cg_write(child, "cpu.weight", "1000")) 438 goto cleanup; 439 440 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 441 const char *ancestor; 442 long weight; 443 444 if (i == 0) { 445 ancestor = parent; 446 weight = 1000; 447 } else { 448 ancestor = child; 449 weight = 5000; 450 } 451 leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i); 452 if (!leaf[i].cgroup) 453 goto cleanup; 454 455 if (cg_create(leaf[i].cgroup)) 456 goto cleanup; 457 458 if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight)) 459 goto cleanup; 460 } 461 462 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 463 pid_t pid; 464 struct cpu_hog_func_param param = { 465 .nprocs = nprocs, 466 .ts = { 467 .tv_sec = 10, 468 .tv_nsec = 0, 469 }, 470 .clock_type = CPU_HOG_CLOCK_WALL, 471 }; 472 473 pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed, 474 (void *)¶m); 475 if (pid <= 0) 476 goto cleanup; 477 leaf[i].pid = pid; 478 } 479 480 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 481 int retcode; 482 483 waitpid(leaf[i].pid, &retcode, 0); 484 if (!WIFEXITED(retcode)) 485 goto cleanup; 486 if (WEXITSTATUS(retcode)) 487 goto cleanup; 488 } 489 490 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 491 leaf[i].usage = cg_read_key_long(leaf[i].cgroup, 492 "cpu.stat", "usage_usec"); 493 if (leaf[i].usage <= 0) 494 goto cleanup; 495 } 496 497 nested_leaf_usage = leaf[1].usage + leaf[2].usage; 498 if (overprovisioned) { 499 if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) 500 goto cleanup; 501 } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) 502 goto cleanup; 503 504 505 child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); 506 if (child_usage <= 0) 507 goto cleanup; 508 if (!values_close(child_usage, nested_leaf_usage, 1)) 509 goto cleanup; 510 511 ret = KSFT_PASS; 512 cleanup: 513 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 514 cg_destroy(leaf[i].cgroup); 515 free(leaf[i].cgroup); 516 } 517 cg_destroy(child); 518 free(child); 519 cg_destroy(parent); 520 free(parent); 521 522 return ret; 523 } 524 525 /* 526 * First, this test creates the following hierarchy: 527 * A 528 * A/B cpu.weight = 1000 529 * A/C cpu.weight = 1000 530 * A/C/D cpu.weight = 5000 531 * A/C/E cpu.weight = 5000 532 * 533 * A separate process is then created for each leaf, which spawn nproc threads 534 * that burn a CPU for a few seconds. 535 * 536 * Once all of those processes have exited, we verify that each of the leaf 537 * cgroups have roughly the same usage from cpu.stat. 538 */ 539 static int 540 test_cpucg_nested_weight_overprovisioned(const char *root) 541 { 542 return run_cpucg_nested_weight_test(root, true); 543 } 544 545 /* 546 * First, this test creates the following hierarchy: 547 * A 548 * A/B cpu.weight = 1000 549 * A/C cpu.weight = 1000 550 * A/C/D cpu.weight = 5000 551 * A/C/E cpu.weight = 5000 552 * 553 * A separate process is then created for each leaf, which nproc / 4 threads 554 * that burns a CPU for a few seconds. 555 * 556 * Once all of those processes have exited, we verify that each of the leaf 557 * cgroups have roughly the same usage from cpu.stat. 558 */ 559 static int 560 test_cpucg_nested_weight_underprovisioned(const char *root) 561 { 562 return run_cpucg_nested_weight_test(root, false); 563 } 564 565 /* 566 * This test creates a cgroup with some maximum value within a period, and 567 * verifies that a process in the cgroup is not overscheduled. 568 */ 569 static int test_cpucg_max(const char *root) 570 { 571 int ret = KSFT_FAIL; 572 long usage_usec, user_usec; 573 long usage_seconds = 1; 574 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 575 char *cpucg; 576 577 cpucg = cg_name(root, "cpucg_test"); 578 if (!cpucg) 579 goto cleanup; 580 581 if (cg_create(cpucg)) 582 goto cleanup; 583 584 if (cg_write(cpucg, "cpu.max", "1000")) 585 goto cleanup; 586 587 struct cpu_hog_func_param param = { 588 .nprocs = 1, 589 .ts = { 590 .tv_sec = usage_seconds, 591 .tv_nsec = 0, 592 }, 593 .clock_type = CPU_HOG_CLOCK_WALL, 594 }; 595 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) 596 goto cleanup; 597 598 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 599 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 600 if (user_usec <= 0) 601 goto cleanup; 602 603 if (user_usec >= expected_usage_usec) 604 goto cleanup; 605 606 if (values_close(usage_usec, expected_usage_usec, 95)) 607 goto cleanup; 608 609 ret = KSFT_PASS; 610 611 cleanup: 612 cg_destroy(cpucg); 613 free(cpucg); 614 615 return ret; 616 } 617 618 /* 619 * This test verifies that a process inside of a nested cgroup whose parent 620 * group has a cpu.max value set, is properly throttled. 621 */ 622 static int test_cpucg_max_nested(const char *root) 623 { 624 int ret = KSFT_FAIL; 625 long usage_usec, user_usec; 626 long usage_seconds = 1; 627 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 628 char *parent, *child; 629 630 parent = cg_name(root, "cpucg_parent"); 631 child = cg_name(parent, "cpucg_child"); 632 if (!parent || !child) 633 goto cleanup; 634 635 if (cg_create(parent)) 636 goto cleanup; 637 638 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 639 goto cleanup; 640 641 if (cg_create(child)) 642 goto cleanup; 643 644 if (cg_write(parent, "cpu.max", "1000")) 645 goto cleanup; 646 647 struct cpu_hog_func_param param = { 648 .nprocs = 1, 649 .ts = { 650 .tv_sec = usage_seconds, 651 .tv_nsec = 0, 652 }, 653 .clock_type = CPU_HOG_CLOCK_WALL, 654 }; 655 if (cg_run(child, hog_cpus_timed, (void *)¶m)) 656 goto cleanup; 657 658 usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); 659 user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); 660 if (user_usec <= 0) 661 goto cleanup; 662 663 if (user_usec >= expected_usage_usec) 664 goto cleanup; 665 666 if (values_close(usage_usec, expected_usage_usec, 95)) 667 goto cleanup; 668 669 ret = KSFT_PASS; 670 671 cleanup: 672 cg_destroy(child); 673 free(child); 674 cg_destroy(parent); 675 free(parent); 676 677 return ret; 678 } 679 680 #define T(x) { x, #x } 681 struct cpucg_test { 682 int (*fn)(const char *root); 683 const char *name; 684 } tests[] = { 685 T(test_cpucg_subtree_control), 686 T(test_cpucg_stats), 687 T(test_cpucg_weight_overprovisioned), 688 T(test_cpucg_weight_underprovisioned), 689 T(test_cpucg_nested_weight_overprovisioned), 690 T(test_cpucg_nested_weight_underprovisioned), 691 T(test_cpucg_max), 692 T(test_cpucg_max_nested), 693 }; 694 #undef T 695 696 int main(int argc, char *argv[]) 697 { 698 char root[PATH_MAX]; 699 int i, ret = EXIT_SUCCESS; 700 701 if (cg_find_unified_root(root, sizeof(root), NULL)) 702 ksft_exit_skip("cgroup v2 isn't mounted\n"); 703 704 if (cg_read_strstr(root, "cgroup.subtree_control", "cpu")) 705 if (cg_write(root, "cgroup.subtree_control", "+cpu")) 706 ksft_exit_skip("Failed to set cpu controller\n"); 707 708 for (i = 0; i < ARRAY_SIZE(tests); i++) { 709 switch (tests[i].fn(root)) { 710 case KSFT_PASS: 711 ksft_test_result_pass("%s\n", tests[i].name); 712 break; 713 case KSFT_SKIP: 714 ksft_test_result_skip("%s\n", tests[i].name); 715 break; 716 default: 717 ret = EXIT_FAILURE; 718 ksft_test_result_fail("%s\n", tests[i].name); 719 break; 720 } 721 } 722 723 return ret; 724 } 725