1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 #include <limits.h> 21 22 #include "utils.h" 23 24 #define MAX_MSG_LENGTH 1024 25 int config_debug; 26 27 /* 28 * err_msg - print an error message to the stderr 29 */ 30 void err_msg(const char *fmt, ...) 31 { 32 char message[MAX_MSG_LENGTH]; 33 va_list ap; 34 35 va_start(ap, fmt); 36 vsnprintf(message, sizeof(message), fmt, ap); 37 va_end(ap); 38 39 fprintf(stderr, "%s", message); 40 } 41 42 /* 43 * debug_msg - print a debug message to stderr if debug is set 44 */ 45 void debug_msg(const char *fmt, ...) 46 { 47 char message[MAX_MSG_LENGTH]; 48 va_list ap; 49 50 if (!config_debug) 51 return; 52 53 va_start(ap, fmt); 54 vsnprintf(message, sizeof(message), fmt, ap); 55 va_end(ap); 56 57 fprintf(stderr, "%s", message); 58 } 59 60 /* 61 * fatal - print an error message and EOL to stderr and exit with ERROR 62 */ 63 void fatal(const char *fmt, ...) 64 { 65 va_list ap; 66 67 va_start(ap, fmt); 68 vfprintf(stderr, fmt, ap); 69 va_end(ap); 70 fprintf(stderr, "\n"); 71 72 exit(ERROR); 73 } 74 75 /* 76 * get_llong_from_str - get a long long int from a string 77 */ 78 long long get_llong_from_str(char *start) 79 { 80 long long value; 81 char *end; 82 83 errno = 0; 84 value = strtoll(start, &end, 10); 85 if (errno || start == end) 86 return -1; 87 88 return value; 89 } 90 91 /* 92 * get_duration - fill output with a human readable duration since start_time 93 */ 94 void get_duration(time_t start_time, char *output, int output_size) 95 { 96 time_t now = time(NULL); 97 struct tm *tm_info; 98 time_t duration; 99 100 duration = difftime(now, start_time); 101 tm_info = gmtime(&duration); 102 103 snprintf(output, output_size, "%3d %02d:%02d:%02d", 104 tm_info->tm_yday, 105 tm_info->tm_hour, 106 tm_info->tm_min, 107 tm_info->tm_sec); 108 } 109 110 /* 111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 112 * 113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 114 * filling cpu_set_t argument. 115 * 116 * Returns 0 on success, 1 otherwise. 117 */ 118 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 119 { 120 const char *p; 121 int end_cpu; 122 int nr_cpus; 123 int cpu; 124 int i; 125 126 CPU_ZERO(set); 127 128 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 129 130 for (p = cpu_list; *p; ) { 131 cpu = atoi(p); 132 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 133 goto err; 134 135 while (isdigit(*p)) 136 p++; 137 if (*p == '-') { 138 p++; 139 end_cpu = atoi(p); 140 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 141 goto err; 142 while (isdigit(*p)) 143 p++; 144 } else 145 end_cpu = cpu; 146 147 if (cpu == end_cpu) { 148 debug_msg("cpu_set: adding cpu %d\n", cpu); 149 CPU_SET(cpu, set); 150 } else { 151 for (i = cpu; i <= end_cpu; i++) { 152 debug_msg("cpu_set: adding cpu %d\n", i); 153 CPU_SET(i, set); 154 } 155 } 156 157 if (*p == ',') 158 p++; 159 } 160 161 return 0; 162 err: 163 debug_msg("Error parsing the cpu set %s\n", cpu_list); 164 return 1; 165 } 166 167 /* 168 * parse_stack_format - parse the stack format 169 * 170 * Return: the stack format on success, -1 otherwise. 171 */ 172 int parse_stack_format(char *arg) 173 { 174 if (!strcmp(arg, "truncate")) 175 return STACK_FORMAT_TRUNCATE; 176 if (!strcmp(arg, "skip")) 177 return STACK_FORMAT_SKIP; 178 if (!strcmp(arg, "full")) 179 return STACK_FORMAT_FULL; 180 181 debug_msg("Error parsing the stack format %s\n", arg); 182 return -1; 183 } 184 185 /* 186 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 187 */ 188 long parse_seconds_duration(char *val) 189 { 190 char *end; 191 long t; 192 193 t = strtol(val, &end, 10); 194 195 if (end) { 196 switch (*end) { 197 case 's': 198 case 'S': 199 break; 200 case 'm': 201 case 'M': 202 t *= 60; 203 break; 204 case 'h': 205 case 'H': 206 t *= 60 * 60; 207 break; 208 209 case 'd': 210 case 'D': 211 t *= 24 * 60 * 60; 212 break; 213 } 214 } 215 216 return t; 217 } 218 219 /* 220 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 221 */ 222 long parse_ns_duration(char *val) 223 { 224 char *end; 225 long t; 226 227 t = strtol(val, &end, 10); 228 229 if (end) { 230 if (!strncmp(end, "ns", 2)) { 231 return t; 232 } else if (!strncmp(end, "us", 2)) { 233 t *= 1000; 234 return t; 235 } else if (!strncmp(end, "ms", 2)) { 236 t *= 1000 * 1000; 237 return t; 238 } else if (!strncmp(end, "s", 1)) { 239 t *= 1000 * 1000 * 1000; 240 return t; 241 } 242 return -1; 243 } 244 245 return t; 246 } 247 248 /* 249 * This is a set of helper functions to use SCHED_DEADLINE. 250 */ 251 #ifndef __NR_sched_setattr 252 # ifdef __x86_64__ 253 # define __NR_sched_setattr 314 254 # elif __i386__ 255 # define __NR_sched_setattr 351 256 # elif __arm__ 257 # define __NR_sched_setattr 380 258 # elif __aarch64__ || __riscv 259 # define __NR_sched_setattr 274 260 # elif __powerpc__ 261 # define __NR_sched_setattr 355 262 # elif __s390x__ 263 # define __NR_sched_setattr 345 264 # elif __loongarch__ 265 # define __NR_sched_setattr 274 266 # endif 267 #endif 268 269 #define SCHED_DEADLINE 6 270 271 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 272 unsigned int flags) { 273 return syscall(__NR_sched_setattr, pid, attr, flags); 274 } 275 276 int __set_sched_attr(int pid, struct sched_attr *attr) 277 { 278 int flags = 0; 279 int retval; 280 281 retval = syscall_sched_setattr(pid, attr, flags); 282 if (retval < 0) { 283 err_msg("Failed to set sched attributes to the pid %d: %s\n", 284 pid, strerror(errno)); 285 return 1; 286 } 287 288 return 0; 289 } 290 291 /* 292 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 293 * 294 * Check if the procfs entry is a directory of a process, and then check if the 295 * process has a comm with the prefix set in char *comm_prefix. As the 296 * current users of this function only check for kernel threads, there is no 297 * need to check for the threads for the process. 298 * 299 * Return: True if the proc_entry contains a comm file with comm_prefix*. 300 * Otherwise returns false. 301 */ 302 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 303 { 304 char buffer[MAX_PATH]; 305 int comm_fd, retval; 306 char *t_name; 307 308 if (proc_entry->d_type != DT_DIR) 309 return 0; 310 311 if (*proc_entry->d_name == '.') 312 return 0; 313 314 /* check if the string is a pid */ 315 for (t_name = proc_entry->d_name; t_name; t_name++) { 316 if (!isdigit(*t_name)) 317 break; 318 } 319 320 if (*t_name != '\0') 321 return 0; 322 323 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 324 comm_fd = open(buffer, O_RDONLY); 325 if (comm_fd < 0) 326 return 0; 327 328 memset(buffer, 0, MAX_PATH); 329 retval = read(comm_fd, buffer, MAX_PATH); 330 331 close(comm_fd); 332 333 if (retval <= 0) 334 return 0; 335 336 buffer[MAX_PATH-1] = '\0'; 337 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 338 if (retval) 339 return 0; 340 341 /* comm already have \n */ 342 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 343 344 return 1; 345 } 346 347 /* 348 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 349 * 350 * This function uses procfs to list the currently running threads and then set the 351 * sched_attr *attr to the threads that start with char *comm_prefix. It is 352 * mainly used to set the priority to the kernel threads created by the 353 * tracers. 354 */ 355 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 356 { 357 struct dirent *proc_entry; 358 DIR *procfs; 359 int retval; 360 int pid; 361 362 if (strlen(comm_prefix) >= MAX_PATH) { 363 err_msg("Command prefix is too long: %d < strlen(%s)\n", 364 MAX_PATH, comm_prefix); 365 return 1; 366 } 367 368 procfs = opendir("/proc"); 369 if (!procfs) { 370 err_msg("Could not open procfs\n"); 371 return 1; 372 } 373 374 while ((proc_entry = readdir(procfs))) { 375 376 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 377 if (!retval) 378 continue; 379 380 if (strtoi(proc_entry->d_name, &pid)) { 381 err_msg("'%s' is not a valid pid", proc_entry->d_name); 382 goto out_err; 383 } 384 /* procfs_is_workload_pid confirmed it is a pid */ 385 retval = __set_sched_attr(pid, attr); 386 if (retval) { 387 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 388 goto out_err; 389 } 390 391 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 392 } 393 return 0; 394 395 out_err: 396 closedir(procfs); 397 return 1; 398 } 399 400 #define INVALID_VAL (~0L) 401 static long get_long_ns_after_colon(char *start) 402 { 403 long val = INVALID_VAL; 404 405 /* find the ":" */ 406 start = strstr(start, ":"); 407 if (!start) 408 return -1; 409 410 /* skip ":" */ 411 start++; 412 val = parse_ns_duration(start); 413 414 return val; 415 } 416 417 static long get_long_after_colon(char *start) 418 { 419 long val = INVALID_VAL; 420 421 /* find the ":" */ 422 start = strstr(start, ":"); 423 if (!start) 424 return -1; 425 426 /* skip ":" */ 427 start++; 428 val = get_llong_from_str(start); 429 430 return val; 431 } 432 433 /* 434 * parse priority in the format: 435 * SCHED_OTHER: 436 * o:<prio> 437 * O:<prio> 438 * SCHED_RR: 439 * r:<prio> 440 * R:<prio> 441 * SCHED_FIFO: 442 * f:<prio> 443 * F:<prio> 444 * SCHED_DEADLINE: 445 * d:runtime:period 446 * D:runtime:period 447 */ 448 int parse_prio(char *arg, struct sched_attr *sched_param) 449 { 450 long prio; 451 long runtime; 452 long period; 453 454 memset(sched_param, 0, sizeof(*sched_param)); 455 sched_param->size = sizeof(*sched_param); 456 457 switch (arg[0]) { 458 case 'd': 459 case 'D': 460 /* d:runtime:period */ 461 if (strlen(arg) < 4) 462 return -1; 463 464 runtime = get_long_ns_after_colon(arg); 465 if (runtime == INVALID_VAL) 466 return -1; 467 468 period = get_long_ns_after_colon(&arg[2]); 469 if (period == INVALID_VAL) 470 return -1; 471 472 if (runtime > period) 473 return -1; 474 475 sched_param->sched_policy = SCHED_DEADLINE; 476 sched_param->sched_runtime = runtime; 477 sched_param->sched_deadline = period; 478 sched_param->sched_period = period; 479 break; 480 case 'f': 481 case 'F': 482 /* f:prio */ 483 prio = get_long_after_colon(arg); 484 if (prio == INVALID_VAL) 485 return -1; 486 487 if (prio < sched_get_priority_min(SCHED_FIFO)) 488 return -1; 489 if (prio > sched_get_priority_max(SCHED_FIFO)) 490 return -1; 491 492 sched_param->sched_policy = SCHED_FIFO; 493 sched_param->sched_priority = prio; 494 break; 495 case 'r': 496 case 'R': 497 /* r:prio */ 498 prio = get_long_after_colon(arg); 499 if (prio == INVALID_VAL) 500 return -1; 501 502 if (prio < sched_get_priority_min(SCHED_RR)) 503 return -1; 504 if (prio > sched_get_priority_max(SCHED_RR)) 505 return -1; 506 507 sched_param->sched_policy = SCHED_RR; 508 sched_param->sched_priority = prio; 509 break; 510 case 'o': 511 case 'O': 512 /* o:prio */ 513 prio = get_long_after_colon(arg); 514 if (prio == INVALID_VAL) 515 return -1; 516 517 if (prio < MIN_NICE) 518 return -1; 519 if (prio > MAX_NICE) 520 return -1; 521 522 sched_param->sched_policy = SCHED_OTHER; 523 sched_param->sched_nice = prio; 524 break; 525 default: 526 return -1; 527 } 528 return 0; 529 } 530 531 /* 532 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 533 * 534 * This is used to reduce the exit from idle latency. The value 535 * will be reset once the file descriptor of /dev/cpu_dma_latecy 536 * is closed. 537 * 538 * Return: the /dev/cpu_dma_latecy file descriptor 539 */ 540 int set_cpu_dma_latency(int32_t latency) 541 { 542 int retval; 543 int fd; 544 545 fd = open("/dev/cpu_dma_latency", O_RDWR); 546 if (fd < 0) { 547 err_msg("Error opening /dev/cpu_dma_latency\n"); 548 return -1; 549 } 550 551 retval = write(fd, &latency, 4); 552 if (retval < 1) { 553 err_msg("Error setting /dev/cpu_dma_latency\n"); 554 close(fd); 555 return -1; 556 } 557 558 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 559 560 return fd; 561 } 562 563 #ifdef HAVE_LIBCPUPOWER_SUPPORT 564 static unsigned int **saved_cpu_idle_disable_state; 565 static size_t saved_cpu_idle_disable_state_alloc_ctr; 566 567 /* 568 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 569 * 570 * Saves the current disable of all idle states of a cpu, to be subsequently 571 * restored via restore_cpu_idle_disable_state. 572 * 573 * Return: idle state count on success, negative on error 574 */ 575 int save_cpu_idle_disable_state(unsigned int cpu) 576 { 577 unsigned int nr_states; 578 unsigned int state; 579 int disabled; 580 int nr_cpus; 581 582 nr_states = cpuidle_state_count(cpu); 583 584 if (nr_states == 0) 585 return 0; 586 587 if (saved_cpu_idle_disable_state == NULL) { 588 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 589 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 590 if (!saved_cpu_idle_disable_state) 591 return -1; 592 } 593 594 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 595 if (!saved_cpu_idle_disable_state[cpu]) 596 return -1; 597 saved_cpu_idle_disable_state_alloc_ctr++; 598 599 for (state = 0; state < nr_states; state++) { 600 disabled = cpuidle_is_state_disabled(cpu, state); 601 if (disabled < 0) 602 return disabled; 603 saved_cpu_idle_disable_state[cpu][state] = disabled; 604 } 605 606 return nr_states; 607 } 608 609 /* 610 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 611 * 612 * Restores the current disable state of all idle states of a cpu that was 613 * previously saved by save_cpu_idle_disable_state. 614 * 615 * Return: idle state count on success, negative on error 616 */ 617 int restore_cpu_idle_disable_state(unsigned int cpu) 618 { 619 unsigned int nr_states; 620 unsigned int state; 621 int disabled; 622 int result; 623 624 nr_states = cpuidle_state_count(cpu); 625 626 if (nr_states == 0) 627 return 0; 628 629 if (!saved_cpu_idle_disable_state) 630 return -1; 631 632 for (state = 0; state < nr_states; state++) { 633 if (!saved_cpu_idle_disable_state[cpu]) 634 return -1; 635 disabled = saved_cpu_idle_disable_state[cpu][state]; 636 result = cpuidle_state_disable(cpu, state, disabled); 637 if (result < 0) 638 return result; 639 } 640 641 free(saved_cpu_idle_disable_state[cpu]); 642 saved_cpu_idle_disable_state[cpu] = NULL; 643 saved_cpu_idle_disable_state_alloc_ctr--; 644 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 645 free(saved_cpu_idle_disable_state); 646 saved_cpu_idle_disable_state = NULL; 647 } 648 649 return nr_states; 650 } 651 652 /* 653 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 654 * 655 * Frees the memory used for storing cpu idle state disable for all cpus 656 * and states. 657 * 658 * Normally, the memory is freed automatically in 659 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 660 * error. 661 */ 662 void free_cpu_idle_disable_states(void) 663 { 664 int cpu; 665 int nr_cpus; 666 667 if (!saved_cpu_idle_disable_state) 668 return; 669 670 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 671 672 for (cpu = 0; cpu < nr_cpus; cpu++) { 673 free(saved_cpu_idle_disable_state[cpu]); 674 saved_cpu_idle_disable_state[cpu] = NULL; 675 } 676 677 free(saved_cpu_idle_disable_state); 678 saved_cpu_idle_disable_state = NULL; 679 } 680 681 /* 682 * set_deepest_cpu_idle_state - limit idle state of cpu 683 * 684 * Disables all idle states deeper than the one given in 685 * deepest_state (assuming states with higher number are deeper). 686 * 687 * This is used to reduce the exit from idle latency. Unlike 688 * set_cpu_dma_latency, it can disable idle states per cpu. 689 * 690 * Return: idle state count on success, negative on error 691 */ 692 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 693 { 694 unsigned int nr_states; 695 unsigned int state; 696 int result; 697 698 nr_states = cpuidle_state_count(cpu); 699 700 for (state = deepest_state + 1; state < nr_states; state++) { 701 result = cpuidle_state_disable(cpu, state, 1); 702 if (result < 0) 703 return result; 704 } 705 706 return nr_states; 707 } 708 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 709 710 #define _STR(x) #x 711 #define STR(x) _STR(x) 712 713 /* 714 * find_mount - find a the mount point of a given fs 715 * 716 * Returns 0 if mount is not found, otherwise return 1 and fill mp 717 * with the mount point. 718 */ 719 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 720 { 721 char mount_point[MAX_PATH+1]; 722 char type[100]; 723 int found = 0; 724 FILE *fp; 725 726 fp = fopen("/proc/mounts", "r"); 727 if (!fp) 728 return 0; 729 730 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 731 if (strcmp(type, fs) == 0) { 732 found = 1; 733 break; 734 } 735 } 736 fclose(fp); 737 738 if (!found) 739 return 0; 740 741 memset(mp, 0, sizeof_mp); 742 strncpy(mp, mount_point, sizeof_mp - 1); 743 744 debug_msg("Fs %s found at %s\n", fs, mp); 745 return 1; 746 } 747 748 /* 749 * get_self_cgroup - get the current thread cgroup path 750 * 751 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 752 * 753 * 0::/user.slice/user-0.slice/session-3.scope'\n' 754 * 755 * This function is interested in the content after the second : and before the '\n'. 756 * 757 * Returns 1 if a string was found, 0 otherwise. 758 */ 759 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 760 { 761 char path[MAX_PATH], *start; 762 int fd, retval; 763 764 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 765 766 fd = open(path, O_RDONLY); 767 if (fd < 0) 768 return 0; 769 770 memset(path, 0, sizeof(path)); 771 retval = read(fd, path, MAX_PATH); 772 773 close(fd); 774 775 if (retval <= 0) 776 return 0; 777 778 path[MAX_PATH-1] = '\0'; 779 start = path; 780 781 start = strstr(start, ":"); 782 if (!start) 783 return 0; 784 785 /* skip ":" */ 786 start++; 787 788 start = strstr(start, ":"); 789 if (!start) 790 return 0; 791 792 /* skip ":" */ 793 start++; 794 795 if (strlen(start) >= sizeof_self_cg) 796 return 0; 797 798 snprintf(self_cg, sizeof_self_cg, "%s", start); 799 800 /* Swap '\n' with '\0' */ 801 start = strstr(self_cg, "\n"); 802 803 /* there must be '\n' */ 804 if (!start) 805 return 0; 806 807 /* ok, it found a string after the second : and before the \n */ 808 *start = '\0'; 809 810 return 1; 811 } 812 813 /* 814 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup 815 * 816 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup 817 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread 818 * will be used. 819 * 820 * Supports cgroup v2. 821 * 822 * Returns the file descriptor on success, -1 otherwise. 823 */ 824 static int open_cgroup_procs(const char *cgroup) 825 { 826 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 827 char cgroup_procs[MAX_PATH]; 828 int retval; 829 int cg_fd; 830 831 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 832 if (!retval) { 833 err_msg("Did not find cgroupv2 mount point\n"); 834 return -1; 835 } 836 837 if (!cgroup) { 838 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 839 sizeof(cgroup_path) - strlen(cgroup_path)); 840 if (!retval) { 841 err_msg("Did not find self cgroup\n"); 842 return -1; 843 } 844 } else { 845 snprintf(&cgroup_path[strlen(cgroup_path)], 846 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 847 } 848 849 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 850 851 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 852 853 cg_fd = open(cgroup_procs, O_RDWR); 854 if (cg_fd < 0) 855 return -1; 856 857 return cg_fd; 858 } 859 860 /* 861 * set_pid_cgroup - Set cgroup to pid_t pid 862 * 863 * If cgroup argument is not NULL, the threads will move to the given cgroup. 864 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 865 * 866 * Supports cgroup v2. 867 * 868 * Returns 1 on success, 0 otherwise. 869 */ 870 int set_pid_cgroup(pid_t pid, const char *cgroup) 871 { 872 char pid_str[24]; 873 int retval; 874 int cg_fd; 875 876 cg_fd = open_cgroup_procs(cgroup); 877 if (cg_fd < 0) 878 return 0; 879 880 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 881 882 retval = write(cg_fd, pid_str, strlen(pid_str)); 883 if (retval < 0) 884 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 885 pid_str, strerror(errno)); 886 else 887 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 888 889 close(cg_fd); 890 891 return (retval >= 0); 892 } 893 894 /** 895 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 896 * 897 * If cgroup argument is not NULL, the threads will move to the given cgroup. 898 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 899 * 900 * Supports cgroup v2. 901 * 902 * Returns 1 on success, 0 otherwise. 903 */ 904 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 905 { 906 struct dirent *proc_entry; 907 DIR *procfs; 908 int retval; 909 int cg_fd; 910 911 if (strlen(comm_prefix) >= MAX_PATH) { 912 err_msg("Command prefix is too long: %d < strlen(%s)\n", 913 MAX_PATH, comm_prefix); 914 return 0; 915 } 916 917 cg_fd = open_cgroup_procs(cgroup); 918 if (cg_fd < 0) 919 return 0; 920 921 procfs = opendir("/proc"); 922 if (!procfs) { 923 err_msg("Could not open procfs\n"); 924 goto out_cg; 925 } 926 927 while ((proc_entry = readdir(procfs))) { 928 929 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 930 if (!retval) 931 continue; 932 933 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 934 if (retval < 0) { 935 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 936 proc_entry->d_name, strerror(errno)); 937 goto out_procfs; 938 } 939 940 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 941 } 942 943 closedir(procfs); 944 close(cg_fd); 945 return 1; 946 947 out_procfs: 948 closedir(procfs); 949 out_cg: 950 close(cg_fd); 951 return 0; 952 } 953 954 /** 955 * auto_house_keeping - Automatically move rtla out of measurement threads 956 * 957 * Try to move rtla away from the tracer, if possible. 958 * 959 * Returns 1 on success, 0 otherwise. 960 */ 961 int auto_house_keeping(cpu_set_t *monitored_cpus) 962 { 963 cpu_set_t rtla_cpus, house_keeping_cpus; 964 int retval; 965 966 /* first get the CPUs in which rtla can actually run. */ 967 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 968 if (retval == -1) { 969 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 970 return 0; 971 } 972 973 /* then check if the existing setup is already good. */ 974 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 975 if (!CPU_COUNT(&house_keeping_cpus)) { 976 debug_msg("rtla and the monitored CPUs do not share CPUs."); 977 debug_msg("Skipping auto house-keeping\n"); 978 return 1; 979 } 980 981 /* remove the intersection */ 982 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 983 984 /* get only those that rtla can run */ 985 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 986 987 /* is there any cpu left? */ 988 if (!CPU_COUNT(&house_keeping_cpus)) { 989 debug_msg("Could not find any CPU for auto house-keeping\n"); 990 return 0; 991 } 992 993 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 994 if (retval == -1) { 995 debug_msg("Could not set affinity for auto house-keeping\n"); 996 return 0; 997 } 998 999 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 1000 1001 return 1; 1002 } 1003 1004 /** 1005 * parse_optional_arg - Parse optional argument value 1006 * 1007 * Parse optional argument value, which can be in the form of: 1008 * -sarg, -s/--long=arg, -s/--long arg 1009 * 1010 * Returns arg value if found, NULL otherwise. 1011 */ 1012 char *parse_optional_arg(int argc, char **argv) 1013 { 1014 if (optarg) { 1015 if (optarg[0] == '=') { 1016 /* skip the = */ 1017 return &optarg[1]; 1018 } else { 1019 return optarg; 1020 } 1021 /* parse argument of form -s [arg] and --long [arg]*/ 1022 } else if (optind < argc && argv[optind][0] != '-') { 1023 /* consume optind */ 1024 return argv[optind++]; 1025 } else { 1026 return NULL; 1027 } 1028 } 1029 1030 /* 1031 * strtoi - convert string to integer with error checking 1032 * 1033 * Returns 0 on success, -1 if conversion fails or result is out of int range. 1034 */ 1035 int strtoi(const char *s, int *res) 1036 { 1037 char *end_ptr; 1038 long lres; 1039 1040 if (!*s) 1041 return -1; 1042 1043 errno = 0; 1044 lres = strtol(s, &end_ptr, 0); 1045 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN) 1046 return -1; 1047 1048 *res = (int) lres; 1049 return 0; 1050 } 1051