1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 #include <limits.h> 21 22 #include "utils.h" 23 24 #define MAX_MSG_LENGTH 1024 25 int config_debug; 26 27 /* 28 * err_msg - print an error message to the stderr 29 */ 30 void err_msg(const char *fmt, ...) 31 { 32 char message[MAX_MSG_LENGTH]; 33 va_list ap; 34 35 va_start(ap, fmt); 36 vsnprintf(message, sizeof(message), fmt, ap); 37 va_end(ap); 38 39 fprintf(stderr, "%s", message); 40 } 41 42 /* 43 * debug_msg - print a debug message to stderr if debug is set 44 */ 45 void debug_msg(const char *fmt, ...) 46 { 47 char message[MAX_MSG_LENGTH]; 48 va_list ap; 49 50 if (!config_debug) 51 return; 52 53 va_start(ap, fmt); 54 vsnprintf(message, sizeof(message), fmt, ap); 55 va_end(ap); 56 57 fprintf(stderr, "%s", message); 58 } 59 60 /* 61 * fatal - print an error message and EOL to stderr and exit with ERROR 62 */ 63 void fatal(const char *fmt, ...) 64 { 65 va_list ap; 66 67 va_start(ap, fmt); 68 vfprintf(stderr, fmt, ap); 69 va_end(ap); 70 fprintf(stderr, "\n"); 71 72 exit(ERROR); 73 } 74 75 /* 76 * get_llong_from_str - get a long long int from a string 77 */ 78 long long get_llong_from_str(char *start) 79 { 80 long long value; 81 char *end; 82 83 errno = 0; 84 value = strtoll(start, &end, 10); 85 if (errno || start == end) 86 return -1; 87 88 return value; 89 } 90 91 /* 92 * get_duration - fill output with a human readable duration since start_time 93 */ 94 void get_duration(time_t start_time, char *output, int output_size) 95 { 96 time_t now = time(NULL); 97 struct tm *tm_info; 98 time_t duration; 99 100 duration = difftime(now, start_time); 101 tm_info = gmtime(&duration); 102 103 snprintf(output, output_size, "%3d %02d:%02d:%02d", 104 tm_info->tm_yday, 105 tm_info->tm_hour, 106 tm_info->tm_min, 107 tm_info->tm_sec); 108 } 109 110 /* 111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 112 * 113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 114 * filling cpu_set_t argument. 115 * 116 * Returns 0 on success, 1 otherwise. 117 */ 118 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 119 { 120 const char *p; 121 int end_cpu; 122 int nr_cpus; 123 int cpu; 124 int i; 125 126 CPU_ZERO(set); 127 128 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 129 130 for (p = cpu_list; *p; ) { 131 cpu = atoi(p); 132 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 133 goto err; 134 135 while (isdigit(*p)) 136 p++; 137 if (*p == '-') { 138 p++; 139 end_cpu = atoi(p); 140 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 141 goto err; 142 while (isdigit(*p)) 143 p++; 144 } else 145 end_cpu = cpu; 146 147 if (cpu == end_cpu) { 148 debug_msg("cpu_set: adding cpu %d\n", cpu); 149 CPU_SET(cpu, set); 150 } else { 151 for (i = cpu; i <= end_cpu; i++) { 152 debug_msg("cpu_set: adding cpu %d\n", i); 153 CPU_SET(i, set); 154 } 155 } 156 157 if (*p == ',') 158 p++; 159 } 160 161 return 0; 162 err: 163 debug_msg("Error parsing the cpu set %s\n", cpu_list); 164 return 1; 165 } 166 167 /* 168 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 169 */ 170 long parse_seconds_duration(char *val) 171 { 172 char *end; 173 long t; 174 175 t = strtol(val, &end, 10); 176 177 if (end) { 178 switch (*end) { 179 case 's': 180 case 'S': 181 break; 182 case 'm': 183 case 'M': 184 t *= 60; 185 break; 186 case 'h': 187 case 'H': 188 t *= 60 * 60; 189 break; 190 191 case 'd': 192 case 'D': 193 t *= 24 * 60 * 60; 194 break; 195 } 196 } 197 198 return t; 199 } 200 201 /* 202 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 203 */ 204 long parse_ns_duration(char *val) 205 { 206 char *end; 207 long t; 208 209 t = strtol(val, &end, 10); 210 211 if (end) { 212 if (!strncmp(end, "ns", 2)) { 213 return t; 214 } else if (!strncmp(end, "us", 2)) { 215 t *= 1000; 216 return t; 217 } else if (!strncmp(end, "ms", 2)) { 218 t *= 1000 * 1000; 219 return t; 220 } else if (!strncmp(end, "s", 1)) { 221 t *= 1000 * 1000 * 1000; 222 return t; 223 } 224 return -1; 225 } 226 227 return t; 228 } 229 230 /* 231 * This is a set of helper functions to use SCHED_DEADLINE. 232 */ 233 #ifndef __NR_sched_setattr 234 # ifdef __x86_64__ 235 # define __NR_sched_setattr 314 236 # elif __i386__ 237 # define __NR_sched_setattr 351 238 # elif __arm__ 239 # define __NR_sched_setattr 380 240 # elif __aarch64__ || __riscv 241 # define __NR_sched_setattr 274 242 # elif __powerpc__ 243 # define __NR_sched_setattr 355 244 # elif __s390x__ 245 # define __NR_sched_setattr 345 246 # elif __loongarch__ 247 # define __NR_sched_setattr 274 248 # endif 249 #endif 250 251 #define SCHED_DEADLINE 6 252 253 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 254 unsigned int flags) { 255 return syscall(__NR_sched_setattr, pid, attr, flags); 256 } 257 258 int __set_sched_attr(int pid, struct sched_attr *attr) 259 { 260 int flags = 0; 261 int retval; 262 263 retval = syscall_sched_setattr(pid, attr, flags); 264 if (retval < 0) { 265 err_msg("Failed to set sched attributes to the pid %d: %s\n", 266 pid, strerror(errno)); 267 return 1; 268 } 269 270 return 0; 271 } 272 273 /* 274 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 275 * 276 * Check if the procfs entry is a directory of a process, and then check if the 277 * process has a comm with the prefix set in char *comm_prefix. As the 278 * current users of this function only check for kernel threads, there is no 279 * need to check for the threads for the process. 280 * 281 * Return: True if the proc_entry contains a comm file with comm_prefix*. 282 * Otherwise returns false. 283 */ 284 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 285 { 286 char buffer[MAX_PATH]; 287 int comm_fd, retval; 288 char *t_name; 289 290 if (proc_entry->d_type != DT_DIR) 291 return 0; 292 293 if (*proc_entry->d_name == '.') 294 return 0; 295 296 /* check if the string is a pid */ 297 for (t_name = proc_entry->d_name; t_name; t_name++) { 298 if (!isdigit(*t_name)) 299 break; 300 } 301 302 if (*t_name != '\0') 303 return 0; 304 305 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 306 comm_fd = open(buffer, O_RDONLY); 307 if (comm_fd < 0) 308 return 0; 309 310 memset(buffer, 0, MAX_PATH); 311 retval = read(comm_fd, buffer, MAX_PATH); 312 313 close(comm_fd); 314 315 if (retval <= 0) 316 return 0; 317 318 buffer[MAX_PATH-1] = '\0'; 319 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 320 if (retval) 321 return 0; 322 323 /* comm already have \n */ 324 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 325 326 return 1; 327 } 328 329 /* 330 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 331 * 332 * This function uses procfs to list the currently running threads and then set the 333 * sched_attr *attr to the threads that start with char *comm_prefix. It is 334 * mainly used to set the priority to the kernel threads created by the 335 * tracers. 336 */ 337 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 338 { 339 struct dirent *proc_entry; 340 DIR *procfs; 341 int retval; 342 int pid; 343 344 if (strlen(comm_prefix) >= MAX_PATH) { 345 err_msg("Command prefix is too long: %d < strlen(%s)\n", 346 MAX_PATH, comm_prefix); 347 return 1; 348 } 349 350 procfs = opendir("/proc"); 351 if (!procfs) { 352 err_msg("Could not open procfs\n"); 353 return 1; 354 } 355 356 while ((proc_entry = readdir(procfs))) { 357 358 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 359 if (!retval) 360 continue; 361 362 if (strtoi(proc_entry->d_name, &pid)) { 363 err_msg("'%s' is not a valid pid", proc_entry->d_name); 364 goto out_err; 365 } 366 /* procfs_is_workload_pid confirmed it is a pid */ 367 retval = __set_sched_attr(pid, attr); 368 if (retval) { 369 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 370 goto out_err; 371 } 372 373 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 374 } 375 return 0; 376 377 out_err: 378 closedir(procfs); 379 return 1; 380 } 381 382 #define INVALID_VAL (~0L) 383 static long get_long_ns_after_colon(char *start) 384 { 385 long val = INVALID_VAL; 386 387 /* find the ":" */ 388 start = strstr(start, ":"); 389 if (!start) 390 return -1; 391 392 /* skip ":" */ 393 start++; 394 val = parse_ns_duration(start); 395 396 return val; 397 } 398 399 static long get_long_after_colon(char *start) 400 { 401 long val = INVALID_VAL; 402 403 /* find the ":" */ 404 start = strstr(start, ":"); 405 if (!start) 406 return -1; 407 408 /* skip ":" */ 409 start++; 410 val = get_llong_from_str(start); 411 412 return val; 413 } 414 415 /* 416 * parse priority in the format: 417 * SCHED_OTHER: 418 * o:<prio> 419 * O:<prio> 420 * SCHED_RR: 421 * r:<prio> 422 * R:<prio> 423 * SCHED_FIFO: 424 * f:<prio> 425 * F:<prio> 426 * SCHED_DEADLINE: 427 * d:runtime:period 428 * D:runtime:period 429 */ 430 int parse_prio(char *arg, struct sched_attr *sched_param) 431 { 432 long prio; 433 long runtime; 434 long period; 435 436 memset(sched_param, 0, sizeof(*sched_param)); 437 sched_param->size = sizeof(*sched_param); 438 439 switch (arg[0]) { 440 case 'd': 441 case 'D': 442 /* d:runtime:period */ 443 if (strlen(arg) < 4) 444 return -1; 445 446 runtime = get_long_ns_after_colon(arg); 447 if (runtime == INVALID_VAL) 448 return -1; 449 450 period = get_long_ns_after_colon(&arg[2]); 451 if (period == INVALID_VAL) 452 return -1; 453 454 if (runtime > period) 455 return -1; 456 457 sched_param->sched_policy = SCHED_DEADLINE; 458 sched_param->sched_runtime = runtime; 459 sched_param->sched_deadline = period; 460 sched_param->sched_period = period; 461 break; 462 case 'f': 463 case 'F': 464 /* f:prio */ 465 prio = get_long_after_colon(arg); 466 if (prio == INVALID_VAL) 467 return -1; 468 469 if (prio < sched_get_priority_min(SCHED_FIFO)) 470 return -1; 471 if (prio > sched_get_priority_max(SCHED_FIFO)) 472 return -1; 473 474 sched_param->sched_policy = SCHED_FIFO; 475 sched_param->sched_priority = prio; 476 break; 477 case 'r': 478 case 'R': 479 /* r:prio */ 480 prio = get_long_after_colon(arg); 481 if (prio == INVALID_VAL) 482 return -1; 483 484 if (prio < sched_get_priority_min(SCHED_RR)) 485 return -1; 486 if (prio > sched_get_priority_max(SCHED_RR)) 487 return -1; 488 489 sched_param->sched_policy = SCHED_RR; 490 sched_param->sched_priority = prio; 491 break; 492 case 'o': 493 case 'O': 494 /* o:prio */ 495 prio = get_long_after_colon(arg); 496 if (prio == INVALID_VAL) 497 return -1; 498 499 if (prio < MIN_NICE) 500 return -1; 501 if (prio > MAX_NICE) 502 return -1; 503 504 sched_param->sched_policy = SCHED_OTHER; 505 sched_param->sched_nice = prio; 506 break; 507 default: 508 return -1; 509 } 510 return 0; 511 } 512 513 /* 514 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 515 * 516 * This is used to reduce the exit from idle latency. The value 517 * will be reset once the file descriptor of /dev/cpu_dma_latecy 518 * is closed. 519 * 520 * Return: the /dev/cpu_dma_latecy file descriptor 521 */ 522 int set_cpu_dma_latency(int32_t latency) 523 { 524 int retval; 525 int fd; 526 527 fd = open("/dev/cpu_dma_latency", O_RDWR); 528 if (fd < 0) { 529 err_msg("Error opening /dev/cpu_dma_latency\n"); 530 return -1; 531 } 532 533 retval = write(fd, &latency, 4); 534 if (retval < 1) { 535 err_msg("Error setting /dev/cpu_dma_latency\n"); 536 close(fd); 537 return -1; 538 } 539 540 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 541 542 return fd; 543 } 544 545 #ifdef HAVE_LIBCPUPOWER_SUPPORT 546 static unsigned int **saved_cpu_idle_disable_state; 547 static size_t saved_cpu_idle_disable_state_alloc_ctr; 548 549 /* 550 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 551 * 552 * Saves the current disable of all idle states of a cpu, to be subsequently 553 * restored via restore_cpu_idle_disable_state. 554 * 555 * Return: idle state count on success, negative on error 556 */ 557 int save_cpu_idle_disable_state(unsigned int cpu) 558 { 559 unsigned int nr_states; 560 unsigned int state; 561 int disabled; 562 int nr_cpus; 563 564 nr_states = cpuidle_state_count(cpu); 565 566 if (nr_states == 0) 567 return 0; 568 569 if (saved_cpu_idle_disable_state == NULL) { 570 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 571 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 572 if (!saved_cpu_idle_disable_state) 573 return -1; 574 } 575 576 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 577 if (!saved_cpu_idle_disable_state[cpu]) 578 return -1; 579 saved_cpu_idle_disable_state_alloc_ctr++; 580 581 for (state = 0; state < nr_states; state++) { 582 disabled = cpuidle_is_state_disabled(cpu, state); 583 if (disabled < 0) 584 return disabled; 585 saved_cpu_idle_disable_state[cpu][state] = disabled; 586 } 587 588 return nr_states; 589 } 590 591 /* 592 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 593 * 594 * Restores the current disable state of all idle states of a cpu that was 595 * previously saved by save_cpu_idle_disable_state. 596 * 597 * Return: idle state count on success, negative on error 598 */ 599 int restore_cpu_idle_disable_state(unsigned int cpu) 600 { 601 unsigned int nr_states; 602 unsigned int state; 603 int disabled; 604 int result; 605 606 nr_states = cpuidle_state_count(cpu); 607 608 if (nr_states == 0) 609 return 0; 610 611 if (!saved_cpu_idle_disable_state) 612 return -1; 613 614 for (state = 0; state < nr_states; state++) { 615 if (!saved_cpu_idle_disable_state[cpu]) 616 return -1; 617 disabled = saved_cpu_idle_disable_state[cpu][state]; 618 result = cpuidle_state_disable(cpu, state, disabled); 619 if (result < 0) 620 return result; 621 } 622 623 free(saved_cpu_idle_disable_state[cpu]); 624 saved_cpu_idle_disable_state[cpu] = NULL; 625 saved_cpu_idle_disable_state_alloc_ctr--; 626 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 627 free(saved_cpu_idle_disable_state); 628 saved_cpu_idle_disable_state = NULL; 629 } 630 631 return nr_states; 632 } 633 634 /* 635 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 636 * 637 * Frees the memory used for storing cpu idle state disable for all cpus 638 * and states. 639 * 640 * Normally, the memory is freed automatically in 641 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 642 * error. 643 */ 644 void free_cpu_idle_disable_states(void) 645 { 646 int cpu; 647 int nr_cpus; 648 649 if (!saved_cpu_idle_disable_state) 650 return; 651 652 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 653 654 for (cpu = 0; cpu < nr_cpus; cpu++) { 655 free(saved_cpu_idle_disable_state[cpu]); 656 saved_cpu_idle_disable_state[cpu] = NULL; 657 } 658 659 free(saved_cpu_idle_disable_state); 660 saved_cpu_idle_disable_state = NULL; 661 } 662 663 /* 664 * set_deepest_cpu_idle_state - limit idle state of cpu 665 * 666 * Disables all idle states deeper than the one given in 667 * deepest_state (assuming states with higher number are deeper). 668 * 669 * This is used to reduce the exit from idle latency. Unlike 670 * set_cpu_dma_latency, it can disable idle states per cpu. 671 * 672 * Return: idle state count on success, negative on error 673 */ 674 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 675 { 676 unsigned int nr_states; 677 unsigned int state; 678 int result; 679 680 nr_states = cpuidle_state_count(cpu); 681 682 for (state = deepest_state + 1; state < nr_states; state++) { 683 result = cpuidle_state_disable(cpu, state, 1); 684 if (result < 0) 685 return result; 686 } 687 688 return nr_states; 689 } 690 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 691 692 #define _STR(x) #x 693 #define STR(x) _STR(x) 694 695 /* 696 * find_mount - find a the mount point of a given fs 697 * 698 * Returns 0 if mount is not found, otherwise return 1 and fill mp 699 * with the mount point. 700 */ 701 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 702 { 703 char mount_point[MAX_PATH+1]; 704 char type[100]; 705 int found = 0; 706 FILE *fp; 707 708 fp = fopen("/proc/mounts", "r"); 709 if (!fp) 710 return 0; 711 712 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 713 if (strcmp(type, fs) == 0) { 714 found = 1; 715 break; 716 } 717 } 718 fclose(fp); 719 720 if (!found) 721 return 0; 722 723 memset(mp, 0, sizeof_mp); 724 strncpy(mp, mount_point, sizeof_mp - 1); 725 726 debug_msg("Fs %s found at %s\n", fs, mp); 727 return 1; 728 } 729 730 /* 731 * get_self_cgroup - get the current thread cgroup path 732 * 733 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 734 * 735 * 0::/user.slice/user-0.slice/session-3.scope'\n' 736 * 737 * This function is interested in the content after the second : and before the '\n'. 738 * 739 * Returns 1 if a string was found, 0 otherwise. 740 */ 741 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 742 { 743 char path[MAX_PATH], *start; 744 int fd, retval; 745 746 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 747 748 fd = open(path, O_RDONLY); 749 if (fd < 0) 750 return 0; 751 752 memset(path, 0, sizeof(path)); 753 retval = read(fd, path, MAX_PATH); 754 755 close(fd); 756 757 if (retval <= 0) 758 return 0; 759 760 path[MAX_PATH-1] = '\0'; 761 start = path; 762 763 start = strstr(start, ":"); 764 if (!start) 765 return 0; 766 767 /* skip ":" */ 768 start++; 769 770 start = strstr(start, ":"); 771 if (!start) 772 return 0; 773 774 /* skip ":" */ 775 start++; 776 777 if (strlen(start) >= sizeof_self_cg) 778 return 0; 779 780 snprintf(self_cg, sizeof_self_cg, "%s", start); 781 782 /* Swap '\n' with '\0' */ 783 start = strstr(self_cg, "\n"); 784 785 /* there must be '\n' */ 786 if (!start) 787 return 0; 788 789 /* ok, it found a string after the second : and before the \n */ 790 *start = '\0'; 791 792 return 1; 793 } 794 795 /* 796 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup 797 * 798 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup 799 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread 800 * will be used. 801 * 802 * Supports cgroup v2. 803 * 804 * Returns the file descriptor on success, -1 otherwise. 805 */ 806 static int open_cgroup_procs(const char *cgroup) 807 { 808 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 809 char cgroup_procs[MAX_PATH]; 810 int retval; 811 int cg_fd; 812 813 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 814 if (!retval) { 815 err_msg("Did not find cgroupv2 mount point\n"); 816 return -1; 817 } 818 819 if (!cgroup) { 820 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 821 sizeof(cgroup_path) - strlen(cgroup_path)); 822 if (!retval) { 823 err_msg("Did not find self cgroup\n"); 824 return -1; 825 } 826 } else { 827 snprintf(&cgroup_path[strlen(cgroup_path)], 828 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 829 } 830 831 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 832 833 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 834 835 cg_fd = open(cgroup_procs, O_RDWR); 836 if (cg_fd < 0) 837 return -1; 838 839 return cg_fd; 840 } 841 842 /* 843 * set_pid_cgroup - Set cgroup to pid_t pid 844 * 845 * If cgroup argument is not NULL, the threads will move to the given cgroup. 846 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 847 * 848 * Supports cgroup v2. 849 * 850 * Returns 1 on success, 0 otherwise. 851 */ 852 int set_pid_cgroup(pid_t pid, const char *cgroup) 853 { 854 char pid_str[24]; 855 int retval; 856 int cg_fd; 857 858 cg_fd = open_cgroup_procs(cgroup); 859 if (cg_fd < 0) 860 return 0; 861 862 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 863 864 retval = write(cg_fd, pid_str, strlen(pid_str)); 865 if (retval < 0) 866 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 867 pid_str, strerror(errno)); 868 else 869 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 870 871 close(cg_fd); 872 873 return (retval >= 0); 874 } 875 876 /** 877 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 878 * 879 * If cgroup argument is not NULL, the threads will move to the given cgroup. 880 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 881 * 882 * Supports cgroup v2. 883 * 884 * Returns 1 on success, 0 otherwise. 885 */ 886 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 887 { 888 struct dirent *proc_entry; 889 DIR *procfs; 890 int retval; 891 int cg_fd; 892 893 if (strlen(comm_prefix) >= MAX_PATH) { 894 err_msg("Command prefix is too long: %d < strlen(%s)\n", 895 MAX_PATH, comm_prefix); 896 return 0; 897 } 898 899 cg_fd = open_cgroup_procs(cgroup); 900 if (cg_fd < 0) 901 return 0; 902 903 procfs = opendir("/proc"); 904 if (!procfs) { 905 err_msg("Could not open procfs\n"); 906 goto out_cg; 907 } 908 909 while ((proc_entry = readdir(procfs))) { 910 911 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 912 if (!retval) 913 continue; 914 915 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 916 if (retval < 0) { 917 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 918 proc_entry->d_name, strerror(errno)); 919 goto out_procfs; 920 } 921 922 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 923 } 924 925 closedir(procfs); 926 close(cg_fd); 927 return 1; 928 929 out_procfs: 930 closedir(procfs); 931 out_cg: 932 close(cg_fd); 933 return 0; 934 } 935 936 /** 937 * auto_house_keeping - Automatically move rtla out of measurement threads 938 * 939 * Try to move rtla away from the tracer, if possible. 940 * 941 * Returns 1 on success, 0 otherwise. 942 */ 943 int auto_house_keeping(cpu_set_t *monitored_cpus) 944 { 945 cpu_set_t rtla_cpus, house_keeping_cpus; 946 int retval; 947 948 /* first get the CPUs in which rtla can actually run. */ 949 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 950 if (retval == -1) { 951 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 952 return 0; 953 } 954 955 /* then check if the existing setup is already good. */ 956 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 957 if (!CPU_COUNT(&house_keeping_cpus)) { 958 debug_msg("rtla and the monitored CPUs do not share CPUs."); 959 debug_msg("Skipping auto house-keeping\n"); 960 return 1; 961 } 962 963 /* remove the intersection */ 964 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 965 966 /* get only those that rtla can run */ 967 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 968 969 /* is there any cpu left? */ 970 if (!CPU_COUNT(&house_keeping_cpus)) { 971 debug_msg("Could not find any CPU for auto house-keeping\n"); 972 return 0; 973 } 974 975 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 976 if (retval == -1) { 977 debug_msg("Could not set affinity for auto house-keeping\n"); 978 return 0; 979 } 980 981 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 982 983 return 1; 984 } 985 986 /** 987 * parse_optional_arg - Parse optional argument value 988 * 989 * Parse optional argument value, which can be in the form of: 990 * -sarg, -s/--long=arg, -s/--long arg 991 * 992 * Returns arg value if found, NULL otherwise. 993 */ 994 char *parse_optional_arg(int argc, char **argv) 995 { 996 if (optarg) { 997 if (optarg[0] == '=') { 998 /* skip the = */ 999 return &optarg[1]; 1000 } else { 1001 return optarg; 1002 } 1003 /* parse argument of form -s [arg] and --long [arg]*/ 1004 } else if (optind < argc && argv[optind][0] != '-') { 1005 /* consume optind */ 1006 return argv[optind++]; 1007 } else { 1008 return NULL; 1009 } 1010 } 1011 1012 /* 1013 * strtoi - convert string to integer with error checking 1014 * 1015 * Returns 0 on success, -1 if conversion fails or result is out of int range. 1016 */ 1017 int strtoi(const char *s, int *res) 1018 { 1019 char *end_ptr; 1020 long lres; 1021 1022 if (!*s) 1023 return -1; 1024 1025 errno = 0; 1026 lres = strtol(s, &end_ptr, 0); 1027 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN) 1028 return -1; 1029 1030 *res = (int) lres; 1031 return 0; 1032 } 1033