1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 #include <limits.h> 21 22 #include "common.h" 23 24 #define MAX_MSG_LENGTH 1024 25 int config_debug; 26 27 /* 28 * err_msg - print an error message to the stderr 29 */ 30 void err_msg(const char *fmt, ...) 31 { 32 char message[MAX_MSG_LENGTH]; 33 va_list ap; 34 35 va_start(ap, fmt); 36 vsnprintf(message, sizeof(message), fmt, ap); 37 va_end(ap); 38 39 fprintf(stderr, "%s", message); 40 } 41 42 /* 43 * debug_msg - print a debug message to stderr if debug is set 44 */ 45 void debug_msg(const char *fmt, ...) 46 { 47 char message[MAX_MSG_LENGTH]; 48 va_list ap; 49 50 if (!config_debug) 51 return; 52 53 va_start(ap, fmt); 54 vsnprintf(message, sizeof(message), fmt, ap); 55 va_end(ap); 56 57 fprintf(stderr, "%s", message); 58 } 59 60 /* 61 * fatal - print an error message and EOL to stderr and exit with ERROR 62 */ 63 void fatal(const char *fmt, ...) 64 { 65 va_list ap; 66 67 va_start(ap, fmt); 68 vfprintf(stderr, fmt, ap); 69 va_end(ap); 70 fprintf(stderr, "\n"); 71 72 exit(ERROR); 73 } 74 75 /* 76 * get_llong_from_str - get a long long int from a string 77 */ 78 long long get_llong_from_str(char *start) 79 { 80 long long value; 81 char *end; 82 83 errno = 0; 84 value = strtoll(start, &end, 10); 85 if (errno || start == end) 86 return -1; 87 88 return value; 89 } 90 91 /* 92 * get_duration - fill output with a human readable duration since start_time 93 */ 94 void get_duration(time_t start_time, char *output, int output_size) 95 { 96 time_t now = time(NULL); 97 struct tm *tm_info; 98 time_t duration; 99 100 duration = difftime(now, start_time); 101 tm_info = gmtime(&duration); 102 103 snprintf(output, output_size, "%3d %02d:%02d:%02d", 104 tm_info->tm_yday, 105 tm_info->tm_hour, 106 tm_info->tm_min, 107 tm_info->tm_sec); 108 } 109 110 /* 111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 112 * 113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 114 * filling cpu_set_t argument. 115 * 116 * Returns 0 on success, 1 otherwise. 117 */ 118 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 119 { 120 const char *p; 121 int end_cpu; 122 int cpu; 123 int i; 124 125 CPU_ZERO(set); 126 127 for (p = cpu_list; *p; ) { 128 cpu = atoi(p); 129 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 130 goto err; 131 132 while (isdigit(*p)) 133 p++; 134 if (*p == '-') { 135 p++; 136 end_cpu = atoi(p); 137 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 138 goto err; 139 while (isdigit(*p)) 140 p++; 141 } else 142 end_cpu = cpu; 143 144 if (cpu == end_cpu) { 145 debug_msg("cpu_set: adding cpu %d\n", cpu); 146 CPU_SET(cpu, set); 147 } else { 148 for (i = cpu; i <= end_cpu; i++) { 149 debug_msg("cpu_set: adding cpu %d\n", i); 150 CPU_SET(i, set); 151 } 152 } 153 154 if (*p == ',') 155 p++; 156 } 157 158 return 0; 159 err: 160 debug_msg("Error parsing the cpu set %s\n", cpu_list); 161 return 1; 162 } 163 164 /* 165 * parse_stack_format - parse the stack format 166 * 167 * Return: the stack format on success, -1 otherwise. 168 */ 169 int parse_stack_format(char *arg) 170 { 171 if (!strcmp(arg, "truncate")) 172 return STACK_FORMAT_TRUNCATE; 173 if (!strcmp(arg, "skip")) 174 return STACK_FORMAT_SKIP; 175 if (!strcmp(arg, "full")) 176 return STACK_FORMAT_FULL; 177 178 debug_msg("Error parsing the stack format %s\n", arg); 179 return -1; 180 } 181 182 /* 183 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 184 */ 185 long parse_seconds_duration(char *val) 186 { 187 char *end; 188 long t; 189 190 t = strtol(val, &end, 10); 191 192 if (end) { 193 switch (*end) { 194 case 's': 195 case 'S': 196 break; 197 case 'm': 198 case 'M': 199 t *= 60; 200 break; 201 case 'h': 202 case 'H': 203 t *= 60 * 60; 204 break; 205 206 case 'd': 207 case 'D': 208 t *= 24 * 60 * 60; 209 break; 210 } 211 } 212 213 return t; 214 } 215 216 /* 217 * match_time_unit - check if str starts with unit followed by end-of-string or ':' 218 * 219 * This allows the time unit parser to work both in standalone duration strings 220 * like "100ms" and in colon-delimited SCHED_DEADLINE specifications like 221 * "d:10ms:100ms", while still rejecting malformed input like "100msx". 222 */ 223 static bool match_time_unit(const char *str, const char *unit) 224 { 225 size_t len = strlen(unit); 226 227 return strncmp(str, unit, len) == 0 && 228 (str[len] == '\0' || str[len] == ':'); 229 } 230 231 /* 232 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 233 */ 234 long parse_ns_duration(char *val) 235 { 236 char *end; 237 long t; 238 239 t = strtol(val, &end, 10); 240 241 if (end) { 242 if (match_time_unit(end, "ns")) { 243 return t; 244 } else if (match_time_unit(end, "us")) { 245 t *= 1000; 246 return t; 247 } else if (match_time_unit(end, "ms")) { 248 t *= 1000 * 1000; 249 return t; 250 } else if (match_time_unit(end, "s")) { 251 t *= 1000 * 1000 * 1000; 252 return t; 253 } 254 return -1; 255 } 256 257 return t; 258 } 259 260 /* 261 * This is a set of helper functions to use SCHED_DEADLINE. 262 */ 263 #ifndef __NR_sched_setattr 264 # ifdef __x86_64__ 265 # define __NR_sched_setattr 314 266 # elif __i386__ 267 # define __NR_sched_setattr 351 268 # elif __arm__ 269 # define __NR_sched_setattr 380 270 # elif __aarch64__ || __riscv 271 # define __NR_sched_setattr 274 272 # elif __powerpc__ 273 # define __NR_sched_setattr 355 274 # elif __s390x__ 275 # define __NR_sched_setattr 345 276 # elif __loongarch__ 277 # define __NR_sched_setattr 274 278 # endif 279 #endif 280 281 #define SCHED_DEADLINE 6 282 283 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 284 unsigned int flags) { 285 return syscall(__NR_sched_setattr, pid, attr, flags); 286 } 287 288 int __set_sched_attr(int pid, struct sched_attr *attr) 289 { 290 int flags = 0; 291 int retval; 292 293 retval = syscall_sched_setattr(pid, attr, flags); 294 if (retval < 0) { 295 err_msg("Failed to set sched attributes to the pid %d: %s\n", 296 pid, strerror(errno)); 297 return 1; 298 } 299 300 return 0; 301 } 302 303 /* 304 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 305 * 306 * Check if the procfs entry is a directory of a process, and then check if the 307 * process has a comm with the prefix set in char *comm_prefix. As the 308 * current users of this function only check for kernel threads, there is no 309 * need to check for the threads for the process. 310 * 311 * Return: True if the proc_entry contains a comm file with comm_prefix*. 312 * Otherwise returns false. 313 */ 314 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 315 { 316 char buffer[MAX_PATH]; 317 int comm_fd, retval; 318 char *t_name; 319 320 if (proc_entry->d_type != DT_DIR) 321 return 0; 322 323 if (*proc_entry->d_name == '.') 324 return 0; 325 326 /* check if the string is a pid */ 327 for (t_name = proc_entry->d_name; *t_name; t_name++) { 328 if (!isdigit(*t_name)) 329 break; 330 } 331 332 if (*t_name != '\0') 333 return 0; 334 335 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 336 comm_fd = open(buffer, O_RDONLY); 337 if (comm_fd < 0) 338 return 0; 339 340 memset(buffer, 0, MAX_PATH); 341 retval = read(comm_fd, buffer, MAX_PATH); 342 343 close(comm_fd); 344 345 if (retval <= 0) 346 return 0; 347 348 buffer[MAX_PATH-1] = '\0'; 349 if (!str_has_prefix(buffer, comm_prefix)) 350 return 0; 351 352 /* comm already have \n */ 353 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 354 355 return 1; 356 } 357 358 /* 359 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 360 * 361 * This function uses procfs to list the currently running threads and then set the 362 * sched_attr *attr to the threads that start with char *comm_prefix. It is 363 * mainly used to set the priority to the kernel threads created by the 364 * tracers. 365 */ 366 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 367 { 368 struct dirent *proc_entry; 369 DIR *procfs; 370 int retval; 371 int pid; 372 373 if (strlen(comm_prefix) >= MAX_PATH) { 374 err_msg("Command prefix is too long: %d < strlen(%s)\n", 375 MAX_PATH, comm_prefix); 376 return 1; 377 } 378 379 procfs = opendir("/proc"); 380 if (!procfs) { 381 err_msg("Could not open procfs\n"); 382 return 1; 383 } 384 385 while ((proc_entry = readdir(procfs))) { 386 387 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 388 if (!retval) 389 continue; 390 391 if (strtoi(proc_entry->d_name, &pid)) { 392 err_msg("'%s' is not a valid pid", proc_entry->d_name); 393 retval = 1; 394 goto out; 395 } 396 /* procfs_is_workload_pid confirmed it is a pid */ 397 retval = __set_sched_attr(pid, attr); 398 if (retval) { 399 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 400 goto out; 401 } 402 403 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 404 } 405 406 retval = 0; 407 out: 408 closedir(procfs); 409 return retval; 410 } 411 412 #define INVALID_VAL (~0L) 413 static long get_long_ns_after_colon(char *start) 414 { 415 long val = INVALID_VAL; 416 417 /* find the ":" */ 418 start = strstr(start, ":"); 419 if (!start) 420 return -1; 421 422 /* skip ":" */ 423 start++; 424 val = parse_ns_duration(start); 425 426 return val; 427 } 428 429 static long get_long_after_colon(char *start) 430 { 431 long val = INVALID_VAL; 432 433 /* find the ":" */ 434 start = strstr(start, ":"); 435 if (!start) 436 return -1; 437 438 /* skip ":" */ 439 start++; 440 val = get_llong_from_str(start); 441 442 return val; 443 } 444 445 /* 446 * parse priority in the format: 447 * SCHED_OTHER: 448 * o:<prio> 449 * O:<prio> 450 * SCHED_RR: 451 * r:<prio> 452 * R:<prio> 453 * SCHED_FIFO: 454 * f:<prio> 455 * F:<prio> 456 * SCHED_DEADLINE: 457 * d:runtime:period 458 * D:runtime:period 459 */ 460 int parse_prio(char *arg, struct sched_attr *sched_param) 461 { 462 long prio; 463 long runtime; 464 long period; 465 466 memset(sched_param, 0, sizeof(*sched_param)); 467 sched_param->size = sizeof(*sched_param); 468 469 switch (arg[0]) { 470 case 'd': 471 case 'D': 472 /* d:runtime:period */ 473 if (strlen(arg) < 4) 474 return -1; 475 476 runtime = get_long_ns_after_colon(arg); 477 if (runtime == INVALID_VAL) 478 return -1; 479 480 period = get_long_ns_after_colon(&arg[2]); 481 if (period == INVALID_VAL) 482 return -1; 483 484 if (runtime > period) 485 return -1; 486 487 sched_param->sched_policy = SCHED_DEADLINE; 488 sched_param->sched_runtime = runtime; 489 sched_param->sched_deadline = period; 490 sched_param->sched_period = period; 491 break; 492 case 'f': 493 case 'F': 494 /* f:prio */ 495 prio = get_long_after_colon(arg); 496 if (prio == INVALID_VAL) 497 return -1; 498 499 if (prio < sched_get_priority_min(SCHED_FIFO)) 500 return -1; 501 if (prio > sched_get_priority_max(SCHED_FIFO)) 502 return -1; 503 504 sched_param->sched_policy = SCHED_FIFO; 505 sched_param->sched_priority = prio; 506 break; 507 case 'r': 508 case 'R': 509 /* r:prio */ 510 prio = get_long_after_colon(arg); 511 if (prio == INVALID_VAL) 512 return -1; 513 514 if (prio < sched_get_priority_min(SCHED_RR)) 515 return -1; 516 if (prio > sched_get_priority_max(SCHED_RR)) 517 return -1; 518 519 sched_param->sched_policy = SCHED_RR; 520 sched_param->sched_priority = prio; 521 break; 522 case 'o': 523 case 'O': 524 /* o:prio */ 525 prio = get_long_after_colon(arg); 526 if (prio == INVALID_VAL) 527 return -1; 528 529 if (prio < MIN_NICE) 530 return -1; 531 if (prio > MAX_NICE) 532 return -1; 533 534 sched_param->sched_policy = SCHED_OTHER; 535 sched_param->sched_nice = prio; 536 break; 537 default: 538 return -1; 539 } 540 return 0; 541 } 542 543 /* 544 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 545 * 546 * This is used to reduce the exit from idle latency. The value 547 * will be reset once the file descriptor of /dev/cpu_dma_latecy 548 * is closed. 549 * 550 * Return: the /dev/cpu_dma_latecy file descriptor 551 */ 552 int set_cpu_dma_latency(int32_t latency) 553 { 554 int retval; 555 int fd; 556 557 fd = open("/dev/cpu_dma_latency", O_RDWR); 558 if (fd < 0) { 559 err_msg("Error opening /dev/cpu_dma_latency\n"); 560 return -1; 561 } 562 563 retval = write(fd, &latency, 4); 564 if (retval < 1) { 565 err_msg("Error setting /dev/cpu_dma_latency\n"); 566 close(fd); 567 return -1; 568 } 569 570 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 571 572 return fd; 573 } 574 575 #ifdef HAVE_LIBCPUPOWER_SUPPORT 576 static unsigned int **saved_cpu_idle_disable_state; 577 static size_t saved_cpu_idle_disable_state_alloc_ctr; 578 579 /* 580 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 581 * 582 * Saves the current disable of all idle states of a cpu, to be subsequently 583 * restored via restore_cpu_idle_disable_state. 584 * 585 * Return: idle state count on success, negative on error 586 */ 587 int save_cpu_idle_disable_state(unsigned int cpu) 588 { 589 unsigned int nr_states; 590 unsigned int state; 591 int disabled; 592 593 nr_states = cpuidle_state_count(cpu); 594 595 if (nr_states == 0) 596 return 0; 597 598 if (saved_cpu_idle_disable_state == NULL) { 599 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 600 if (!saved_cpu_idle_disable_state) 601 return -1; 602 } 603 604 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 605 if (!saved_cpu_idle_disable_state[cpu]) 606 return -1; 607 saved_cpu_idle_disable_state_alloc_ctr++; 608 609 for (state = 0; state < nr_states; state++) { 610 disabled = cpuidle_is_state_disabled(cpu, state); 611 if (disabled < 0) 612 return disabled; 613 saved_cpu_idle_disable_state[cpu][state] = disabled; 614 } 615 616 return nr_states; 617 } 618 619 /* 620 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 621 * 622 * Restores the current disable state of all idle states of a cpu that was 623 * previously saved by save_cpu_idle_disable_state. 624 * 625 * Return: idle state count on success, negative on error 626 */ 627 int restore_cpu_idle_disable_state(unsigned int cpu) 628 { 629 unsigned int nr_states; 630 unsigned int state; 631 int disabled; 632 int result; 633 634 nr_states = cpuidle_state_count(cpu); 635 636 if (nr_states == 0) 637 return 0; 638 639 if (!saved_cpu_idle_disable_state) 640 return -1; 641 642 for (state = 0; state < nr_states; state++) { 643 if (!saved_cpu_idle_disable_state[cpu]) 644 return -1; 645 disabled = saved_cpu_idle_disable_state[cpu][state]; 646 result = cpuidle_state_disable(cpu, state, disabled); 647 if (result < 0) 648 return result; 649 } 650 651 free(saved_cpu_idle_disable_state[cpu]); 652 saved_cpu_idle_disable_state[cpu] = NULL; 653 saved_cpu_idle_disable_state_alloc_ctr--; 654 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 655 free(saved_cpu_idle_disable_state); 656 saved_cpu_idle_disable_state = NULL; 657 } 658 659 return nr_states; 660 } 661 662 /* 663 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 664 * 665 * Frees the memory used for storing cpu idle state disable for all cpus 666 * and states. 667 * 668 * Normally, the memory is freed automatically in 669 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 670 * error. 671 */ 672 void free_cpu_idle_disable_states(void) 673 { 674 int cpu; 675 676 if (!saved_cpu_idle_disable_state) 677 return; 678 679 for (cpu = 0; cpu < nr_cpus; cpu++) { 680 free(saved_cpu_idle_disable_state[cpu]); 681 saved_cpu_idle_disable_state[cpu] = NULL; 682 } 683 684 free(saved_cpu_idle_disable_state); 685 saved_cpu_idle_disable_state = NULL; 686 } 687 688 /* 689 * set_deepest_cpu_idle_state - limit idle state of cpu 690 * 691 * Disables all idle states deeper than the one given in 692 * deepest_state (assuming states with higher number are deeper). 693 * 694 * This is used to reduce the exit from idle latency. Unlike 695 * set_cpu_dma_latency, it can disable idle states per cpu. 696 * 697 * Return: idle state count on success, negative on error 698 */ 699 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 700 { 701 unsigned int nr_states; 702 unsigned int state; 703 int result; 704 705 nr_states = cpuidle_state_count(cpu); 706 707 for (state = deepest_state + 1; state < nr_states; state++) { 708 result = cpuidle_state_disable(cpu, state, 1); 709 if (result < 0) 710 return result; 711 } 712 713 return nr_states; 714 } 715 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 716 717 #define _STR(x) #x 718 #define STR(x) _STR(x) 719 720 /* 721 * find_mount - find a the mount point of a given fs 722 * 723 * Returns 0 if mount is not found, otherwise return 1 and fill mp 724 * with the mount point. 725 */ 726 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 727 { 728 char mount_point[MAX_PATH+1]; 729 char type[100]; 730 int found = 0; 731 FILE *fp; 732 733 fp = fopen("/proc/mounts", "r"); 734 if (!fp) 735 return 0; 736 737 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 738 if (strcmp(type, fs) == 0) { 739 found = 1; 740 break; 741 } 742 } 743 fclose(fp); 744 745 if (!found) 746 return 0; 747 748 memset(mp, 0, sizeof_mp); 749 strncpy(mp, mount_point, sizeof_mp - 1); 750 751 debug_msg("Fs %s found at %s\n", fs, mp); 752 return 1; 753 } 754 755 /* 756 * get_self_cgroup - get the current thread cgroup path 757 * 758 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 759 * 760 * 0::/user.slice/user-0.slice/session-3.scope'\n' 761 * 762 * This function is interested in the content after the second : and before the '\n'. 763 * 764 * Returns 1 if a string was found, 0 otherwise. 765 */ 766 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 767 { 768 char path[MAX_PATH], *start; 769 int fd, retval; 770 771 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 772 773 fd = open(path, O_RDONLY); 774 if (fd < 0) 775 return 0; 776 777 memset(path, 0, sizeof(path)); 778 retval = read(fd, path, MAX_PATH); 779 780 close(fd); 781 782 if (retval <= 0) 783 return 0; 784 785 path[MAX_PATH-1] = '\0'; 786 start = path; 787 788 start = strstr(start, ":"); 789 if (!start) 790 return 0; 791 792 /* skip ":" */ 793 start++; 794 795 start = strstr(start, ":"); 796 if (!start) 797 return 0; 798 799 /* skip ":" */ 800 start++; 801 802 if (strlen(start) >= sizeof_self_cg) 803 return 0; 804 805 snprintf(self_cg, sizeof_self_cg, "%s", start); 806 807 /* Swap '\n' with '\0' */ 808 start = strstr(self_cg, "\n"); 809 810 /* there must be '\n' */ 811 if (!start) 812 return 0; 813 814 /* ok, it found a string after the second : and before the \n */ 815 *start = '\0'; 816 817 return 1; 818 } 819 820 /* 821 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup 822 * 823 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup 824 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread 825 * will be used. 826 * 827 * Supports cgroup v2. 828 * 829 * Returns the file descriptor on success, -1 otherwise. 830 */ 831 static int open_cgroup_procs(const char *cgroup) 832 { 833 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 834 char cgroup_procs[MAX_PATH]; 835 int retval; 836 int cg_fd; 837 size_t cg_path_len; 838 839 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 840 if (!retval) { 841 err_msg("Did not find cgroupv2 mount point\n"); 842 return -1; 843 } 844 845 cg_path_len = strlen(cgroup_path); 846 847 if (!cgroup) { 848 retval = get_self_cgroup(&cgroup_path[cg_path_len], 849 sizeof(cgroup_path) - cg_path_len); 850 if (!retval) { 851 err_msg("Did not find self cgroup\n"); 852 return -1; 853 } 854 } else { 855 snprintf(&cgroup_path[cg_path_len], 856 sizeof(cgroup_path) - cg_path_len, "%s/", cgroup); 857 } 858 859 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 860 861 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 862 863 cg_fd = open(cgroup_procs, O_RDWR); 864 if (cg_fd < 0) 865 return -1; 866 867 return cg_fd; 868 } 869 870 /* 871 * set_pid_cgroup - Set cgroup to pid_t pid 872 * 873 * If cgroup argument is not NULL, the threads will move to the given cgroup. 874 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 875 * 876 * Supports cgroup v2. 877 * 878 * Returns 1 on success, 0 otherwise. 879 */ 880 int set_pid_cgroup(pid_t pid, const char *cgroup) 881 { 882 char pid_str[24]; 883 int retval; 884 int cg_fd; 885 886 cg_fd = open_cgroup_procs(cgroup); 887 if (cg_fd < 0) 888 return 0; 889 890 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 891 892 retval = write(cg_fd, pid_str, strlen(pid_str)); 893 if (retval < 0) 894 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 895 pid_str, strerror(errno)); 896 else 897 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 898 899 close(cg_fd); 900 901 return (retval >= 0); 902 } 903 904 /** 905 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 906 * 907 * If cgroup argument is not NULL, the threads will move to the given cgroup. 908 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 909 * 910 * Supports cgroup v2. 911 * 912 * Returns 1 on success, 0 otherwise. 913 */ 914 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 915 { 916 struct dirent *proc_entry; 917 DIR *procfs; 918 int retval; 919 int cg_fd; 920 921 if (strlen(comm_prefix) >= MAX_PATH) { 922 err_msg("Command prefix is too long: %d < strlen(%s)\n", 923 MAX_PATH, comm_prefix); 924 return 0; 925 } 926 927 cg_fd = open_cgroup_procs(cgroup); 928 if (cg_fd < 0) 929 return 0; 930 931 procfs = opendir("/proc"); 932 if (!procfs) { 933 err_msg("Could not open procfs\n"); 934 goto out_cg; 935 } 936 937 while ((proc_entry = readdir(procfs))) { 938 939 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 940 if (!retval) 941 continue; 942 943 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 944 if (retval < 0) { 945 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 946 proc_entry->d_name, strerror(errno)); 947 goto out_procfs; 948 } 949 950 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 951 } 952 953 closedir(procfs); 954 close(cg_fd); 955 return 1; 956 957 out_procfs: 958 closedir(procfs); 959 out_cg: 960 close(cg_fd); 961 return 0; 962 } 963 964 /** 965 * auto_house_keeping - Automatically move rtla out of measurement threads 966 * 967 * Try to move rtla away from the tracer, if possible. 968 * 969 * Returns 1 on success, 0 otherwise. 970 */ 971 int auto_house_keeping(cpu_set_t *monitored_cpus) 972 { 973 cpu_set_t rtla_cpus, house_keeping_cpus; 974 int retval; 975 976 /* first get the CPUs in which rtla can actually run. */ 977 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 978 if (retval == -1) { 979 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 980 return 0; 981 } 982 983 /* then check if the existing setup is already good. */ 984 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 985 if (!CPU_COUNT(&house_keeping_cpus)) { 986 debug_msg("rtla and the monitored CPUs do not share CPUs."); 987 debug_msg("Skipping auto house-keeping\n"); 988 return 1; 989 } 990 991 /* remove the intersection */ 992 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 993 994 /* get only those that rtla can run */ 995 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 996 997 /* is there any cpu left? */ 998 if (!CPU_COUNT(&house_keeping_cpus)) { 999 debug_msg("Could not find any CPU for auto house-keeping\n"); 1000 return 0; 1001 } 1002 1003 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 1004 if (retval == -1) { 1005 debug_msg("Could not set affinity for auto house-keeping\n"); 1006 return 0; 1007 } 1008 1009 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 1010 1011 return 1; 1012 } 1013 1014 /** 1015 * parse_optional_arg - Parse optional argument value 1016 * 1017 * Parse optional argument value, which can be in the form of: 1018 * -sarg, -s/--long=arg, -s/--long arg 1019 * 1020 * Returns arg value if found, NULL otherwise. 1021 */ 1022 char *parse_optional_arg(int argc, char **argv) 1023 { 1024 if (optarg) { 1025 if (optarg[0] == '=') { 1026 /* skip the = */ 1027 return &optarg[1]; 1028 } else { 1029 return optarg; 1030 } 1031 /* parse argument of form -s [arg] and --long [arg]*/ 1032 } else if (optind < argc && argv[optind][0] != '-') { 1033 /* consume optind */ 1034 return argv[optind++]; 1035 } else { 1036 return NULL; 1037 } 1038 } 1039 1040 /* 1041 * strtoi - convert string to integer with error checking 1042 * 1043 * Returns 0 on success, -1 if conversion fails or result is out of int range. 1044 */ 1045 int strtoi(const char *s, int *res) 1046 { 1047 char *end_ptr; 1048 long lres; 1049 1050 if (!*s) 1051 return -1; 1052 1053 errno = 0; 1054 lres = strtol(s, &end_ptr, 0); 1055 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN) 1056 return -1; 1057 1058 *res = (int) lres; 1059 return 0; 1060 } 1061 1062 static inline void fatal_alloc(void) 1063 { 1064 fatal("Error allocating memory\n"); 1065 } 1066 1067 void *calloc_fatal(size_t n, size_t size) 1068 { 1069 void *p = calloc(n, size); 1070 1071 if (!p) 1072 fatal_alloc(); 1073 1074 return p; 1075 } 1076 1077 void *reallocarray_fatal(void *p, size_t n, size_t size) 1078 { 1079 p = reallocarray(p, n, size); 1080 1081 if (!p) 1082 fatal_alloc(); 1083 1084 return p; 1085 } 1086 1087 char *strdup_fatal(const char *s) 1088 { 1089 char *p = strdup(s); 1090 1091 if (!p) 1092 fatal_alloc(); 1093 1094 return p; 1095 } 1096