1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 #include <limits.h> 21 22 #include "common.h" 23 24 #define MAX_MSG_LENGTH 1024 25 int config_debug; 26 27 /* 28 * err_msg - print an error message to the stderr 29 */ 30 void err_msg(const char *fmt, ...) 31 { 32 char message[MAX_MSG_LENGTH]; 33 va_list ap; 34 35 va_start(ap, fmt); 36 vsnprintf(message, sizeof(message), fmt, ap); 37 va_end(ap); 38 39 fprintf(stderr, "%s", message); 40 } 41 42 /* 43 * debug_msg - print a debug message to stderr if debug is set 44 */ 45 void debug_msg(const char *fmt, ...) 46 { 47 char message[MAX_MSG_LENGTH]; 48 va_list ap; 49 50 if (!config_debug) 51 return; 52 53 va_start(ap, fmt); 54 vsnprintf(message, sizeof(message), fmt, ap); 55 va_end(ap); 56 57 fprintf(stderr, "%s", message); 58 } 59 60 /* 61 * fatal - print an error message and EOL to stderr and exit with ERROR 62 */ 63 void fatal(const char *fmt, ...) 64 { 65 va_list ap; 66 67 va_start(ap, fmt); 68 vfprintf(stderr, fmt, ap); 69 va_end(ap); 70 fprintf(stderr, "\n"); 71 72 exit(ERROR); 73 } 74 75 /* 76 * get_llong_from_str - get a long long int from a string 77 */ 78 long long get_llong_from_str(char *start) 79 { 80 long long value; 81 char *end; 82 83 errno = 0; 84 value = strtoll(start, &end, 10); 85 if (errno || start == end) 86 return -1; 87 88 return value; 89 } 90 91 /* 92 * get_duration - fill output with a human readable duration since start_time 93 */ 94 void get_duration(time_t start_time, char *output, int output_size) 95 { 96 time_t now = time(NULL); 97 struct tm *tm_info; 98 time_t duration; 99 100 duration = difftime(now, start_time); 101 tm_info = gmtime(&duration); 102 103 snprintf(output, output_size, "%3d %02d:%02d:%02d", 104 tm_info->tm_yday, 105 tm_info->tm_hour, 106 tm_info->tm_min, 107 tm_info->tm_sec); 108 } 109 110 /* 111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 112 * 113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 114 * filling cpu_set_t argument. 115 * 116 * Returns 0 on success, 1 otherwise. 117 */ 118 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 119 { 120 const char *p; 121 int end_cpu; 122 int cpu; 123 int i; 124 125 CPU_ZERO(set); 126 127 for (p = cpu_list; *p; ) { 128 cpu = atoi(p); 129 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 130 goto err; 131 132 while (isdigit(*p)) 133 p++; 134 if (*p == '-') { 135 p++; 136 end_cpu = atoi(p); 137 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 138 goto err; 139 while (isdigit(*p)) 140 p++; 141 } else 142 end_cpu = cpu; 143 144 if (cpu == end_cpu) { 145 debug_msg("cpu_set: adding cpu %d\n", cpu); 146 CPU_SET(cpu, set); 147 } else { 148 for (i = cpu; i <= end_cpu; i++) { 149 debug_msg("cpu_set: adding cpu %d\n", i); 150 CPU_SET(i, set); 151 } 152 } 153 154 if (*p == ',') 155 p++; 156 } 157 158 return 0; 159 err: 160 debug_msg("Error parsing the cpu set %s\n", cpu_list); 161 return 1; 162 } 163 164 /* 165 * parse_stack_format - parse the stack format 166 * 167 * Return: the stack format on success, -1 otherwise. 168 */ 169 int parse_stack_format(char *arg) 170 { 171 if (!strcmp(arg, "truncate")) 172 return STACK_FORMAT_TRUNCATE; 173 if (!strcmp(arg, "skip")) 174 return STACK_FORMAT_SKIP; 175 if (!strcmp(arg, "full")) 176 return STACK_FORMAT_FULL; 177 178 debug_msg("Error parsing the stack format %s\n", arg); 179 return -1; 180 } 181 182 /* 183 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 184 */ 185 long parse_seconds_duration(char *val) 186 { 187 char *end; 188 long t; 189 190 t = strtol(val, &end, 10); 191 192 if (end) { 193 switch (*end) { 194 case 's': 195 case 'S': 196 break; 197 case 'm': 198 case 'M': 199 t *= 60; 200 break; 201 case 'h': 202 case 'H': 203 t *= 60 * 60; 204 break; 205 206 case 'd': 207 case 'D': 208 t *= 24 * 60 * 60; 209 break; 210 } 211 } 212 213 return t; 214 } 215 216 /* 217 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 218 */ 219 long parse_ns_duration(char *val) 220 { 221 char *end; 222 long t; 223 224 t = strtol(val, &end, 10); 225 226 if (end) { 227 if (!strncmp(end, "ns", 2)) { 228 return t; 229 } else if (!strncmp(end, "us", 2)) { 230 t *= 1000; 231 return t; 232 } else if (!strncmp(end, "ms", 2)) { 233 t *= 1000 * 1000; 234 return t; 235 } else if (!strncmp(end, "s", 1)) { 236 t *= 1000 * 1000 * 1000; 237 return t; 238 } 239 return -1; 240 } 241 242 return t; 243 } 244 245 /* 246 * This is a set of helper functions to use SCHED_DEADLINE. 247 */ 248 #ifndef __NR_sched_setattr 249 # ifdef __x86_64__ 250 # define __NR_sched_setattr 314 251 # elif __i386__ 252 # define __NR_sched_setattr 351 253 # elif __arm__ 254 # define __NR_sched_setattr 380 255 # elif __aarch64__ || __riscv 256 # define __NR_sched_setattr 274 257 # elif __powerpc__ 258 # define __NR_sched_setattr 355 259 # elif __s390x__ 260 # define __NR_sched_setattr 345 261 # elif __loongarch__ 262 # define __NR_sched_setattr 274 263 # endif 264 #endif 265 266 #define SCHED_DEADLINE 6 267 268 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 269 unsigned int flags) { 270 return syscall(__NR_sched_setattr, pid, attr, flags); 271 } 272 273 int __set_sched_attr(int pid, struct sched_attr *attr) 274 { 275 int flags = 0; 276 int retval; 277 278 retval = syscall_sched_setattr(pid, attr, flags); 279 if (retval < 0) { 280 err_msg("Failed to set sched attributes to the pid %d: %s\n", 281 pid, strerror(errno)); 282 return 1; 283 } 284 285 return 0; 286 } 287 288 /* 289 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 290 * 291 * Check if the procfs entry is a directory of a process, and then check if the 292 * process has a comm with the prefix set in char *comm_prefix. As the 293 * current users of this function only check for kernel threads, there is no 294 * need to check for the threads for the process. 295 * 296 * Return: True if the proc_entry contains a comm file with comm_prefix*. 297 * Otherwise returns false. 298 */ 299 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 300 { 301 char buffer[MAX_PATH]; 302 int comm_fd, retval; 303 char *t_name; 304 305 if (proc_entry->d_type != DT_DIR) 306 return 0; 307 308 if (*proc_entry->d_name == '.') 309 return 0; 310 311 /* check if the string is a pid */ 312 for (t_name = proc_entry->d_name; t_name; t_name++) { 313 if (!isdigit(*t_name)) 314 break; 315 } 316 317 if (*t_name != '\0') 318 return 0; 319 320 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 321 comm_fd = open(buffer, O_RDONLY); 322 if (comm_fd < 0) 323 return 0; 324 325 memset(buffer, 0, MAX_PATH); 326 retval = read(comm_fd, buffer, MAX_PATH); 327 328 close(comm_fd); 329 330 if (retval <= 0) 331 return 0; 332 333 buffer[MAX_PATH-1] = '\0'; 334 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 335 if (retval) 336 return 0; 337 338 /* comm already have \n */ 339 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 340 341 return 1; 342 } 343 344 /* 345 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 346 * 347 * This function uses procfs to list the currently running threads and then set the 348 * sched_attr *attr to the threads that start with char *comm_prefix. It is 349 * mainly used to set the priority to the kernel threads created by the 350 * tracers. 351 */ 352 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 353 { 354 struct dirent *proc_entry; 355 DIR *procfs; 356 int retval; 357 int pid; 358 359 if (strlen(comm_prefix) >= MAX_PATH) { 360 err_msg("Command prefix is too long: %d < strlen(%s)\n", 361 MAX_PATH, comm_prefix); 362 return 1; 363 } 364 365 procfs = opendir("/proc"); 366 if (!procfs) { 367 err_msg("Could not open procfs\n"); 368 return 1; 369 } 370 371 while ((proc_entry = readdir(procfs))) { 372 373 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 374 if (!retval) 375 continue; 376 377 if (strtoi(proc_entry->d_name, &pid)) { 378 err_msg("'%s' is not a valid pid", proc_entry->d_name); 379 goto out_err; 380 } 381 /* procfs_is_workload_pid confirmed it is a pid */ 382 retval = __set_sched_attr(pid, attr); 383 if (retval) { 384 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 385 goto out_err; 386 } 387 388 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 389 } 390 return 0; 391 392 out_err: 393 closedir(procfs); 394 return 1; 395 } 396 397 #define INVALID_VAL (~0L) 398 static long get_long_ns_after_colon(char *start) 399 { 400 long val = INVALID_VAL; 401 402 /* find the ":" */ 403 start = strstr(start, ":"); 404 if (!start) 405 return -1; 406 407 /* skip ":" */ 408 start++; 409 val = parse_ns_duration(start); 410 411 return val; 412 } 413 414 static long get_long_after_colon(char *start) 415 { 416 long val = INVALID_VAL; 417 418 /* find the ":" */ 419 start = strstr(start, ":"); 420 if (!start) 421 return -1; 422 423 /* skip ":" */ 424 start++; 425 val = get_llong_from_str(start); 426 427 return val; 428 } 429 430 /* 431 * parse priority in the format: 432 * SCHED_OTHER: 433 * o:<prio> 434 * O:<prio> 435 * SCHED_RR: 436 * r:<prio> 437 * R:<prio> 438 * SCHED_FIFO: 439 * f:<prio> 440 * F:<prio> 441 * SCHED_DEADLINE: 442 * d:runtime:period 443 * D:runtime:period 444 */ 445 int parse_prio(char *arg, struct sched_attr *sched_param) 446 { 447 long prio; 448 long runtime; 449 long period; 450 451 memset(sched_param, 0, sizeof(*sched_param)); 452 sched_param->size = sizeof(*sched_param); 453 454 switch (arg[0]) { 455 case 'd': 456 case 'D': 457 /* d:runtime:period */ 458 if (strlen(arg) < 4) 459 return -1; 460 461 runtime = get_long_ns_after_colon(arg); 462 if (runtime == INVALID_VAL) 463 return -1; 464 465 period = get_long_ns_after_colon(&arg[2]); 466 if (period == INVALID_VAL) 467 return -1; 468 469 if (runtime > period) 470 return -1; 471 472 sched_param->sched_policy = SCHED_DEADLINE; 473 sched_param->sched_runtime = runtime; 474 sched_param->sched_deadline = period; 475 sched_param->sched_period = period; 476 break; 477 case 'f': 478 case 'F': 479 /* f:prio */ 480 prio = get_long_after_colon(arg); 481 if (prio == INVALID_VAL) 482 return -1; 483 484 if (prio < sched_get_priority_min(SCHED_FIFO)) 485 return -1; 486 if (prio > sched_get_priority_max(SCHED_FIFO)) 487 return -1; 488 489 sched_param->sched_policy = SCHED_FIFO; 490 sched_param->sched_priority = prio; 491 break; 492 case 'r': 493 case 'R': 494 /* r:prio */ 495 prio = get_long_after_colon(arg); 496 if (prio == INVALID_VAL) 497 return -1; 498 499 if (prio < sched_get_priority_min(SCHED_RR)) 500 return -1; 501 if (prio > sched_get_priority_max(SCHED_RR)) 502 return -1; 503 504 sched_param->sched_policy = SCHED_RR; 505 sched_param->sched_priority = prio; 506 break; 507 case 'o': 508 case 'O': 509 /* o:prio */ 510 prio = get_long_after_colon(arg); 511 if (prio == INVALID_VAL) 512 return -1; 513 514 if (prio < MIN_NICE) 515 return -1; 516 if (prio > MAX_NICE) 517 return -1; 518 519 sched_param->sched_policy = SCHED_OTHER; 520 sched_param->sched_nice = prio; 521 break; 522 default: 523 return -1; 524 } 525 return 0; 526 } 527 528 /* 529 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 530 * 531 * This is used to reduce the exit from idle latency. The value 532 * will be reset once the file descriptor of /dev/cpu_dma_latecy 533 * is closed. 534 * 535 * Return: the /dev/cpu_dma_latecy file descriptor 536 */ 537 int set_cpu_dma_latency(int32_t latency) 538 { 539 int retval; 540 int fd; 541 542 fd = open("/dev/cpu_dma_latency", O_RDWR); 543 if (fd < 0) { 544 err_msg("Error opening /dev/cpu_dma_latency\n"); 545 return -1; 546 } 547 548 retval = write(fd, &latency, 4); 549 if (retval < 1) { 550 err_msg("Error setting /dev/cpu_dma_latency\n"); 551 close(fd); 552 return -1; 553 } 554 555 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 556 557 return fd; 558 } 559 560 #ifdef HAVE_LIBCPUPOWER_SUPPORT 561 static unsigned int **saved_cpu_idle_disable_state; 562 static size_t saved_cpu_idle_disable_state_alloc_ctr; 563 564 /* 565 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 566 * 567 * Saves the current disable of all idle states of a cpu, to be subsequently 568 * restored via restore_cpu_idle_disable_state. 569 * 570 * Return: idle state count on success, negative on error 571 */ 572 int save_cpu_idle_disable_state(unsigned int cpu) 573 { 574 unsigned int nr_states; 575 unsigned int state; 576 int disabled; 577 578 nr_states = cpuidle_state_count(cpu); 579 580 if (nr_states == 0) 581 return 0; 582 583 if (saved_cpu_idle_disable_state == NULL) { 584 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 585 if (!saved_cpu_idle_disable_state) 586 return -1; 587 } 588 589 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 590 if (!saved_cpu_idle_disable_state[cpu]) 591 return -1; 592 saved_cpu_idle_disable_state_alloc_ctr++; 593 594 for (state = 0; state < nr_states; state++) { 595 disabled = cpuidle_is_state_disabled(cpu, state); 596 if (disabled < 0) 597 return disabled; 598 saved_cpu_idle_disable_state[cpu][state] = disabled; 599 } 600 601 return nr_states; 602 } 603 604 /* 605 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 606 * 607 * Restores the current disable state of all idle states of a cpu that was 608 * previously saved by save_cpu_idle_disable_state. 609 * 610 * Return: idle state count on success, negative on error 611 */ 612 int restore_cpu_idle_disable_state(unsigned int cpu) 613 { 614 unsigned int nr_states; 615 unsigned int state; 616 int disabled; 617 int result; 618 619 nr_states = cpuidle_state_count(cpu); 620 621 if (nr_states == 0) 622 return 0; 623 624 if (!saved_cpu_idle_disable_state) 625 return -1; 626 627 for (state = 0; state < nr_states; state++) { 628 if (!saved_cpu_idle_disable_state[cpu]) 629 return -1; 630 disabled = saved_cpu_idle_disable_state[cpu][state]; 631 result = cpuidle_state_disable(cpu, state, disabled); 632 if (result < 0) 633 return result; 634 } 635 636 free(saved_cpu_idle_disable_state[cpu]); 637 saved_cpu_idle_disable_state[cpu] = NULL; 638 saved_cpu_idle_disable_state_alloc_ctr--; 639 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 640 free(saved_cpu_idle_disable_state); 641 saved_cpu_idle_disable_state = NULL; 642 } 643 644 return nr_states; 645 } 646 647 /* 648 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 649 * 650 * Frees the memory used for storing cpu idle state disable for all cpus 651 * and states. 652 * 653 * Normally, the memory is freed automatically in 654 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 655 * error. 656 */ 657 void free_cpu_idle_disable_states(void) 658 { 659 int cpu; 660 661 if (!saved_cpu_idle_disable_state) 662 return; 663 664 for (cpu = 0; cpu < nr_cpus; cpu++) { 665 free(saved_cpu_idle_disable_state[cpu]); 666 saved_cpu_idle_disable_state[cpu] = NULL; 667 } 668 669 free(saved_cpu_idle_disable_state); 670 saved_cpu_idle_disable_state = NULL; 671 } 672 673 /* 674 * set_deepest_cpu_idle_state - limit idle state of cpu 675 * 676 * Disables all idle states deeper than the one given in 677 * deepest_state (assuming states with higher number are deeper). 678 * 679 * This is used to reduce the exit from idle latency. Unlike 680 * set_cpu_dma_latency, it can disable idle states per cpu. 681 * 682 * Return: idle state count on success, negative on error 683 */ 684 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 685 { 686 unsigned int nr_states; 687 unsigned int state; 688 int result; 689 690 nr_states = cpuidle_state_count(cpu); 691 692 for (state = deepest_state + 1; state < nr_states; state++) { 693 result = cpuidle_state_disable(cpu, state, 1); 694 if (result < 0) 695 return result; 696 } 697 698 return nr_states; 699 } 700 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 701 702 #define _STR(x) #x 703 #define STR(x) _STR(x) 704 705 /* 706 * find_mount - find a the mount point of a given fs 707 * 708 * Returns 0 if mount is not found, otherwise return 1 and fill mp 709 * with the mount point. 710 */ 711 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 712 { 713 char mount_point[MAX_PATH+1]; 714 char type[100]; 715 int found = 0; 716 FILE *fp; 717 718 fp = fopen("/proc/mounts", "r"); 719 if (!fp) 720 return 0; 721 722 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 723 if (strcmp(type, fs) == 0) { 724 found = 1; 725 break; 726 } 727 } 728 fclose(fp); 729 730 if (!found) 731 return 0; 732 733 memset(mp, 0, sizeof_mp); 734 strncpy(mp, mount_point, sizeof_mp - 1); 735 736 debug_msg("Fs %s found at %s\n", fs, mp); 737 return 1; 738 } 739 740 /* 741 * get_self_cgroup - get the current thread cgroup path 742 * 743 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 744 * 745 * 0::/user.slice/user-0.slice/session-3.scope'\n' 746 * 747 * This function is interested in the content after the second : and before the '\n'. 748 * 749 * Returns 1 if a string was found, 0 otherwise. 750 */ 751 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 752 { 753 char path[MAX_PATH], *start; 754 int fd, retval; 755 756 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 757 758 fd = open(path, O_RDONLY); 759 if (fd < 0) 760 return 0; 761 762 memset(path, 0, sizeof(path)); 763 retval = read(fd, path, MAX_PATH); 764 765 close(fd); 766 767 if (retval <= 0) 768 return 0; 769 770 path[MAX_PATH-1] = '\0'; 771 start = path; 772 773 start = strstr(start, ":"); 774 if (!start) 775 return 0; 776 777 /* skip ":" */ 778 start++; 779 780 start = strstr(start, ":"); 781 if (!start) 782 return 0; 783 784 /* skip ":" */ 785 start++; 786 787 if (strlen(start) >= sizeof_self_cg) 788 return 0; 789 790 snprintf(self_cg, sizeof_self_cg, "%s", start); 791 792 /* Swap '\n' with '\0' */ 793 start = strstr(self_cg, "\n"); 794 795 /* there must be '\n' */ 796 if (!start) 797 return 0; 798 799 /* ok, it found a string after the second : and before the \n */ 800 *start = '\0'; 801 802 return 1; 803 } 804 805 /* 806 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup 807 * 808 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup 809 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread 810 * will be used. 811 * 812 * Supports cgroup v2. 813 * 814 * Returns the file descriptor on success, -1 otherwise. 815 */ 816 static int open_cgroup_procs(const char *cgroup) 817 { 818 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 819 char cgroup_procs[MAX_PATH]; 820 int retval; 821 int cg_fd; 822 size_t cg_path_len; 823 824 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 825 if (!retval) { 826 err_msg("Did not find cgroupv2 mount point\n"); 827 return -1; 828 } 829 830 cg_path_len = strlen(cgroup_path); 831 832 if (!cgroup) { 833 retval = get_self_cgroup(&cgroup_path[cg_path_len], 834 sizeof(cgroup_path) - cg_path_len); 835 if (!retval) { 836 err_msg("Did not find self cgroup\n"); 837 return -1; 838 } 839 } else { 840 snprintf(&cgroup_path[cg_path_len], 841 sizeof(cgroup_path) - cg_path_len, "%s/", cgroup); 842 } 843 844 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 845 846 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 847 848 cg_fd = open(cgroup_procs, O_RDWR); 849 if (cg_fd < 0) 850 return -1; 851 852 return cg_fd; 853 } 854 855 /* 856 * set_pid_cgroup - Set cgroup to pid_t pid 857 * 858 * If cgroup argument is not NULL, the threads will move to the given cgroup. 859 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 860 * 861 * Supports cgroup v2. 862 * 863 * Returns 1 on success, 0 otherwise. 864 */ 865 int set_pid_cgroup(pid_t pid, const char *cgroup) 866 { 867 char pid_str[24]; 868 int retval; 869 int cg_fd; 870 871 cg_fd = open_cgroup_procs(cgroup); 872 if (cg_fd < 0) 873 return 0; 874 875 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 876 877 retval = write(cg_fd, pid_str, strlen(pid_str)); 878 if (retval < 0) 879 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 880 pid_str, strerror(errno)); 881 else 882 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 883 884 close(cg_fd); 885 886 return (retval >= 0); 887 } 888 889 /** 890 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 891 * 892 * If cgroup argument is not NULL, the threads will move to the given cgroup. 893 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 894 * 895 * Supports cgroup v2. 896 * 897 * Returns 1 on success, 0 otherwise. 898 */ 899 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 900 { 901 struct dirent *proc_entry; 902 DIR *procfs; 903 int retval; 904 int cg_fd; 905 906 if (strlen(comm_prefix) >= MAX_PATH) { 907 err_msg("Command prefix is too long: %d < strlen(%s)\n", 908 MAX_PATH, comm_prefix); 909 return 0; 910 } 911 912 cg_fd = open_cgroup_procs(cgroup); 913 if (cg_fd < 0) 914 return 0; 915 916 procfs = opendir("/proc"); 917 if (!procfs) { 918 err_msg("Could not open procfs\n"); 919 goto out_cg; 920 } 921 922 while ((proc_entry = readdir(procfs))) { 923 924 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 925 if (!retval) 926 continue; 927 928 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 929 if (retval < 0) { 930 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 931 proc_entry->d_name, strerror(errno)); 932 goto out_procfs; 933 } 934 935 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 936 } 937 938 closedir(procfs); 939 close(cg_fd); 940 return 1; 941 942 out_procfs: 943 closedir(procfs); 944 out_cg: 945 close(cg_fd); 946 return 0; 947 } 948 949 /** 950 * auto_house_keeping - Automatically move rtla out of measurement threads 951 * 952 * Try to move rtla away from the tracer, if possible. 953 * 954 * Returns 1 on success, 0 otherwise. 955 */ 956 int auto_house_keeping(cpu_set_t *monitored_cpus) 957 { 958 cpu_set_t rtla_cpus, house_keeping_cpus; 959 int retval; 960 961 /* first get the CPUs in which rtla can actually run. */ 962 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 963 if (retval == -1) { 964 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 965 return 0; 966 } 967 968 /* then check if the existing setup is already good. */ 969 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 970 if (!CPU_COUNT(&house_keeping_cpus)) { 971 debug_msg("rtla and the monitored CPUs do not share CPUs."); 972 debug_msg("Skipping auto house-keeping\n"); 973 return 1; 974 } 975 976 /* remove the intersection */ 977 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 978 979 /* get only those that rtla can run */ 980 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 981 982 /* is there any cpu left? */ 983 if (!CPU_COUNT(&house_keeping_cpus)) { 984 debug_msg("Could not find any CPU for auto house-keeping\n"); 985 return 0; 986 } 987 988 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 989 if (retval == -1) { 990 debug_msg("Could not set affinity for auto house-keeping\n"); 991 return 0; 992 } 993 994 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 995 996 return 1; 997 } 998 999 /** 1000 * parse_optional_arg - Parse optional argument value 1001 * 1002 * Parse optional argument value, which can be in the form of: 1003 * -sarg, -s/--long=arg, -s/--long arg 1004 * 1005 * Returns arg value if found, NULL otherwise. 1006 */ 1007 char *parse_optional_arg(int argc, char **argv) 1008 { 1009 if (optarg) { 1010 if (optarg[0] == '=') { 1011 /* skip the = */ 1012 return &optarg[1]; 1013 } else { 1014 return optarg; 1015 } 1016 /* parse argument of form -s [arg] and --long [arg]*/ 1017 } else if (optind < argc && argv[optind][0] != '-') { 1018 /* consume optind */ 1019 return argv[optind++]; 1020 } else { 1021 return NULL; 1022 } 1023 } 1024 1025 /* 1026 * strtoi - convert string to integer with error checking 1027 * 1028 * Returns 0 on success, -1 if conversion fails or result is out of int range. 1029 */ 1030 int strtoi(const char *s, int *res) 1031 { 1032 char *end_ptr; 1033 long lres; 1034 1035 if (!*s) 1036 return -1; 1037 1038 errno = 0; 1039 lres = strtol(s, &end_ptr, 0); 1040 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN) 1041 return -1; 1042 1043 *res = (int) lres; 1044 return 0; 1045 } 1046 1047 static inline void fatal_alloc(void) 1048 { 1049 fatal("Error allocating memory\n"); 1050 } 1051 1052 void *calloc_fatal(size_t n, size_t size) 1053 { 1054 void *p = calloc(n, size); 1055 1056 if (!p) 1057 fatal_alloc(); 1058 1059 return p; 1060 } 1061 1062 void *reallocarray_fatal(void *p, size_t n, size_t size) 1063 { 1064 p = reallocarray(p, n, size); 1065 1066 if (!p) 1067 fatal_alloc(); 1068 1069 return p; 1070 } 1071 1072 char *strdup_fatal(const char *s) 1073 { 1074 char *p = strdup(s); 1075 1076 if (!p) 1077 fatal_alloc(); 1078 1079 return p; 1080 } 1081