1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 #include <limits.h> 21 22 #include "utils.h" 23 24 #define MAX_MSG_LENGTH 1024 25 int config_debug; 26 27 /* 28 * err_msg - print an error message to the stderr 29 */ 30 void err_msg(const char *fmt, ...) 31 { 32 char message[MAX_MSG_LENGTH]; 33 va_list ap; 34 35 va_start(ap, fmt); 36 vsnprintf(message, sizeof(message), fmt, ap); 37 va_end(ap); 38 39 fprintf(stderr, "%s", message); 40 } 41 42 /* 43 * debug_msg - print a debug message to stderr if debug is set 44 */ 45 void debug_msg(const char *fmt, ...) 46 { 47 char message[MAX_MSG_LENGTH]; 48 va_list ap; 49 50 if (!config_debug) 51 return; 52 53 va_start(ap, fmt); 54 vsnprintf(message, sizeof(message), fmt, ap); 55 va_end(ap); 56 57 fprintf(stderr, "%s", message); 58 } 59 60 /* 61 * fatal - print an error message and EOL to stderr and exit with ERROR 62 */ 63 void fatal(const char *fmt, ...) 64 { 65 va_list ap; 66 67 va_start(ap, fmt); 68 vfprintf(stderr, fmt, ap); 69 va_end(ap); 70 fprintf(stderr, "\n"); 71 72 exit(ERROR); 73 } 74 75 /* 76 * get_llong_from_str - get a long long int from a string 77 */ 78 long long get_llong_from_str(char *start) 79 { 80 long long value; 81 char *end; 82 83 errno = 0; 84 value = strtoll(start, &end, 10); 85 if (errno || start == end) 86 return -1; 87 88 return value; 89 } 90 91 /* 92 * get_duration - fill output with a human readable duration since start_time 93 */ 94 void get_duration(time_t start_time, char *output, int output_size) 95 { 96 time_t now = time(NULL); 97 struct tm *tm_info; 98 time_t duration; 99 100 duration = difftime(now, start_time); 101 tm_info = gmtime(&duration); 102 103 snprintf(output, output_size, "%3d %02d:%02d:%02d", 104 tm_info->tm_yday, 105 tm_info->tm_hour, 106 tm_info->tm_min, 107 tm_info->tm_sec); 108 } 109 110 /* 111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 112 * 113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 114 * filling cpu_set_t argument. 115 * 116 * Returns 1 on success, 0 otherwise. 117 */ 118 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 119 { 120 const char *p; 121 int end_cpu; 122 int nr_cpus; 123 int cpu; 124 int i; 125 126 CPU_ZERO(set); 127 128 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 129 130 for (p = cpu_list; *p; ) { 131 if (strtoi(p, &cpu)) 132 goto err; 133 if (cpu < 0 || cpu >= nr_cpus) 134 goto err; 135 136 while (isdigit(*p)) 137 p++; 138 if (*p == '-') { 139 p++; 140 if (strtoi(p, &end_cpu)) 141 goto err; 142 if (end_cpu < cpu || end_cpu >= nr_cpus) 143 goto err; 144 while (isdigit(*p)) 145 p++; 146 } else 147 end_cpu = cpu; 148 149 if (cpu == end_cpu) { 150 debug_msg("cpu_set: adding cpu %d\n", cpu); 151 CPU_SET(cpu, set); 152 } else { 153 for (i = cpu; i <= end_cpu; i++) { 154 debug_msg("cpu_set: adding cpu %d\n", i); 155 CPU_SET(i, set); 156 } 157 } 158 159 if (*p == ',') 160 p++; 161 } 162 163 return 0; 164 err: 165 debug_msg("Error parsing the cpu set %s\n", cpu_list); 166 return 1; 167 } 168 169 /* 170 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 171 */ 172 long parse_seconds_duration(char *val) 173 { 174 char *end; 175 long t; 176 177 t = strtol(val, &end, 10); 178 179 if (end) { 180 switch (*end) { 181 case 's': 182 case 'S': 183 break; 184 case 'm': 185 case 'M': 186 t *= 60; 187 break; 188 case 'h': 189 case 'H': 190 t *= 60 * 60; 191 break; 192 193 case 'd': 194 case 'D': 195 t *= 24 * 60 * 60; 196 break; 197 } 198 } 199 200 return t; 201 } 202 203 /* 204 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 205 */ 206 long parse_ns_duration(char *val) 207 { 208 char *end; 209 long t; 210 211 t = strtol(val, &end, 10); 212 213 if (end) { 214 if (!strncmp(end, "ns", 2)) { 215 return t; 216 } else if (!strncmp(end, "us", 2)) { 217 t *= 1000; 218 return t; 219 } else if (!strncmp(end, "ms", 2)) { 220 t *= 1000 * 1000; 221 return t; 222 } else if (!strncmp(end, "s", 1)) { 223 t *= 1000 * 1000 * 1000; 224 return t; 225 } 226 return -1; 227 } 228 229 return t; 230 } 231 232 /* 233 * This is a set of helper functions to use SCHED_DEADLINE. 234 */ 235 #ifndef __NR_sched_setattr 236 # ifdef __x86_64__ 237 # define __NR_sched_setattr 314 238 # elif __i386__ 239 # define __NR_sched_setattr 351 240 # elif __arm__ 241 # define __NR_sched_setattr 380 242 # elif __aarch64__ || __riscv 243 # define __NR_sched_setattr 274 244 # elif __powerpc__ 245 # define __NR_sched_setattr 355 246 # elif __s390x__ 247 # define __NR_sched_setattr 345 248 # elif __loongarch__ 249 # define __NR_sched_setattr 274 250 # endif 251 #endif 252 253 #define SCHED_DEADLINE 6 254 255 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 256 unsigned int flags) { 257 return syscall(__NR_sched_setattr, pid, attr, flags); 258 } 259 260 int __set_sched_attr(int pid, struct sched_attr *attr) 261 { 262 int flags = 0; 263 int retval; 264 265 retval = syscall_sched_setattr(pid, attr, flags); 266 if (retval < 0) { 267 err_msg("Failed to set sched attributes to the pid %d: %s\n", 268 pid, strerror(errno)); 269 return 1; 270 } 271 272 return 0; 273 } 274 275 /* 276 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 277 * 278 * Check if the procfs entry is a directory of a process, and then check if the 279 * process has a comm with the prefix set in char *comm_prefix. As the 280 * current users of this function only check for kernel threads, there is no 281 * need to check for the threads for the process. 282 * 283 * Return: True if the proc_entry contains a comm file with comm_prefix*. 284 * Otherwise returns false. 285 */ 286 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 287 { 288 char buffer[MAX_PATH]; 289 int comm_fd, retval; 290 char *t_name; 291 292 if (proc_entry->d_type != DT_DIR) 293 return 0; 294 295 if (*proc_entry->d_name == '.') 296 return 0; 297 298 /* check if the string is a pid */ 299 for (t_name = proc_entry->d_name; t_name; t_name++) { 300 if (!isdigit(*t_name)) 301 break; 302 } 303 304 if (*t_name != '\0') 305 return 0; 306 307 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 308 comm_fd = open(buffer, O_RDONLY); 309 if (comm_fd < 0) 310 return 0; 311 312 memset(buffer, 0, MAX_PATH); 313 retval = read(comm_fd, buffer, MAX_PATH); 314 315 close(comm_fd); 316 317 if (retval <= 0) 318 return 0; 319 320 buffer[MAX_PATH-1] = '\0'; 321 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 322 if (retval) 323 return 0; 324 325 /* comm already have \n */ 326 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 327 328 return 1; 329 } 330 331 /* 332 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 333 * 334 * This function uses procfs to list the currently running threads and then set the 335 * sched_attr *attr to the threads that start with char *comm_prefix. It is 336 * mainly used to set the priority to the kernel threads created by the 337 * tracers. 338 */ 339 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 340 { 341 struct dirent *proc_entry; 342 DIR *procfs; 343 int retval; 344 int pid; 345 346 if (strlen(comm_prefix) >= MAX_PATH) { 347 err_msg("Command prefix is too long: %d < strlen(%s)\n", 348 MAX_PATH, comm_prefix); 349 return 1; 350 } 351 352 procfs = opendir("/proc"); 353 if (!procfs) { 354 err_msg("Could not open procfs\n"); 355 return 1; 356 } 357 358 while ((proc_entry = readdir(procfs))) { 359 360 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 361 if (!retval) 362 continue; 363 364 if (strtoi(proc_entry->d_name, &pid)) { 365 err_msg("'%s' is not a valid pid", proc_entry->d_name); 366 goto out_err; 367 } 368 /* procfs_is_workload_pid confirmed it is a pid */ 369 retval = __set_sched_attr(pid, attr); 370 if (retval) { 371 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 372 goto out_err; 373 } 374 375 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 376 } 377 return 0; 378 379 out_err: 380 closedir(procfs); 381 return 1; 382 } 383 384 #define INVALID_VAL (~0L) 385 static long get_long_ns_after_colon(char *start) 386 { 387 long val = INVALID_VAL; 388 389 /* find the ":" */ 390 start = strstr(start, ":"); 391 if (!start) 392 return -1; 393 394 /* skip ":" */ 395 start++; 396 val = parse_ns_duration(start); 397 398 return val; 399 } 400 401 static long get_long_after_colon(char *start) 402 { 403 long val = INVALID_VAL; 404 405 /* find the ":" */ 406 start = strstr(start, ":"); 407 if (!start) 408 return -1; 409 410 /* skip ":" */ 411 start++; 412 val = get_llong_from_str(start); 413 414 return val; 415 } 416 417 /* 418 * parse priority in the format: 419 * SCHED_OTHER: 420 * o:<prio> 421 * O:<prio> 422 * SCHED_RR: 423 * r:<prio> 424 * R:<prio> 425 * SCHED_FIFO: 426 * f:<prio> 427 * F:<prio> 428 * SCHED_DEADLINE: 429 * d:runtime:period 430 * D:runtime:period 431 */ 432 int parse_prio(char *arg, struct sched_attr *sched_param) 433 { 434 long prio; 435 long runtime; 436 long period; 437 438 memset(sched_param, 0, sizeof(*sched_param)); 439 sched_param->size = sizeof(*sched_param); 440 441 switch (arg[0]) { 442 case 'd': 443 case 'D': 444 /* d:runtime:period */ 445 if (strlen(arg) < 4) 446 return -1; 447 448 runtime = get_long_ns_after_colon(arg); 449 if (runtime == INVALID_VAL) 450 return -1; 451 452 period = get_long_ns_after_colon(&arg[2]); 453 if (period == INVALID_VAL) 454 return -1; 455 456 if (runtime > period) 457 return -1; 458 459 sched_param->sched_policy = SCHED_DEADLINE; 460 sched_param->sched_runtime = runtime; 461 sched_param->sched_deadline = period; 462 sched_param->sched_period = period; 463 break; 464 case 'f': 465 case 'F': 466 /* f:prio */ 467 prio = get_long_after_colon(arg); 468 if (prio == INVALID_VAL) 469 return -1; 470 471 if (prio < sched_get_priority_min(SCHED_FIFO)) 472 return -1; 473 if (prio > sched_get_priority_max(SCHED_FIFO)) 474 return -1; 475 476 sched_param->sched_policy = SCHED_FIFO; 477 sched_param->sched_priority = prio; 478 break; 479 case 'r': 480 case 'R': 481 /* r:prio */ 482 prio = get_long_after_colon(arg); 483 if (prio == INVALID_VAL) 484 return -1; 485 486 if (prio < sched_get_priority_min(SCHED_RR)) 487 return -1; 488 if (prio > sched_get_priority_max(SCHED_RR)) 489 return -1; 490 491 sched_param->sched_policy = SCHED_RR; 492 sched_param->sched_priority = prio; 493 break; 494 case 'o': 495 case 'O': 496 /* o:prio */ 497 prio = get_long_after_colon(arg); 498 if (prio == INVALID_VAL) 499 return -1; 500 501 if (prio < MIN_NICE) 502 return -1; 503 if (prio > MAX_NICE) 504 return -1; 505 506 sched_param->sched_policy = SCHED_OTHER; 507 sched_param->sched_nice = prio; 508 break; 509 default: 510 return -1; 511 } 512 return 0; 513 } 514 515 /* 516 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 517 * 518 * This is used to reduce the exit from idle latency. The value 519 * will be reset once the file descriptor of /dev/cpu_dma_latecy 520 * is closed. 521 * 522 * Return: the /dev/cpu_dma_latecy file descriptor 523 */ 524 int set_cpu_dma_latency(int32_t latency) 525 { 526 int retval; 527 int fd; 528 529 fd = open("/dev/cpu_dma_latency", O_RDWR); 530 if (fd < 0) { 531 err_msg("Error opening /dev/cpu_dma_latency\n"); 532 return -1; 533 } 534 535 retval = write(fd, &latency, 4); 536 if (retval < 1) { 537 err_msg("Error setting /dev/cpu_dma_latency\n"); 538 close(fd); 539 return -1; 540 } 541 542 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 543 544 return fd; 545 } 546 547 #ifdef HAVE_LIBCPUPOWER_SUPPORT 548 static unsigned int **saved_cpu_idle_disable_state; 549 static size_t saved_cpu_idle_disable_state_alloc_ctr; 550 551 /* 552 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 553 * 554 * Saves the current disable of all idle states of a cpu, to be subsequently 555 * restored via restore_cpu_idle_disable_state. 556 * 557 * Return: idle state count on success, negative on error 558 */ 559 int save_cpu_idle_disable_state(unsigned int cpu) 560 { 561 unsigned int nr_states; 562 unsigned int state; 563 int disabled; 564 int nr_cpus; 565 566 nr_states = cpuidle_state_count(cpu); 567 568 if (nr_states == 0) 569 return 0; 570 571 if (saved_cpu_idle_disable_state == NULL) { 572 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 573 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 574 if (!saved_cpu_idle_disable_state) 575 return -1; 576 } 577 578 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 579 if (!saved_cpu_idle_disable_state[cpu]) 580 return -1; 581 saved_cpu_idle_disable_state_alloc_ctr++; 582 583 for (state = 0; state < nr_states; state++) { 584 disabled = cpuidle_is_state_disabled(cpu, state); 585 if (disabled < 0) 586 return disabled; 587 saved_cpu_idle_disable_state[cpu][state] = disabled; 588 } 589 590 return nr_states; 591 } 592 593 /* 594 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 595 * 596 * Restores the current disable state of all idle states of a cpu that was 597 * previously saved by save_cpu_idle_disable_state. 598 * 599 * Return: idle state count on success, negative on error 600 */ 601 int restore_cpu_idle_disable_state(unsigned int cpu) 602 { 603 unsigned int nr_states; 604 unsigned int state; 605 int disabled; 606 int result; 607 608 nr_states = cpuidle_state_count(cpu); 609 610 if (nr_states == 0) 611 return 0; 612 613 if (!saved_cpu_idle_disable_state) 614 return -1; 615 616 for (state = 0; state < nr_states; state++) { 617 if (!saved_cpu_idle_disable_state[cpu]) 618 return -1; 619 disabled = saved_cpu_idle_disable_state[cpu][state]; 620 result = cpuidle_state_disable(cpu, state, disabled); 621 if (result < 0) 622 return result; 623 } 624 625 free(saved_cpu_idle_disable_state[cpu]); 626 saved_cpu_idle_disable_state[cpu] = NULL; 627 saved_cpu_idle_disable_state_alloc_ctr--; 628 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 629 free(saved_cpu_idle_disable_state); 630 saved_cpu_idle_disable_state = NULL; 631 } 632 633 return nr_states; 634 } 635 636 /* 637 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 638 * 639 * Frees the memory used for storing cpu idle state disable for all cpus 640 * and states. 641 * 642 * Normally, the memory is freed automatically in 643 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 644 * error. 645 */ 646 void free_cpu_idle_disable_states(void) 647 { 648 int cpu; 649 int nr_cpus; 650 651 if (!saved_cpu_idle_disable_state) 652 return; 653 654 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 655 656 for (cpu = 0; cpu < nr_cpus; cpu++) { 657 free(saved_cpu_idle_disable_state[cpu]); 658 saved_cpu_idle_disable_state[cpu] = NULL; 659 } 660 661 free(saved_cpu_idle_disable_state); 662 saved_cpu_idle_disable_state = NULL; 663 } 664 665 /* 666 * set_deepest_cpu_idle_state - limit idle state of cpu 667 * 668 * Disables all idle states deeper than the one given in 669 * deepest_state (assuming states with higher number are deeper). 670 * 671 * This is used to reduce the exit from idle latency. Unlike 672 * set_cpu_dma_latency, it can disable idle states per cpu. 673 * 674 * Return: idle state count on success, negative on error 675 */ 676 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 677 { 678 unsigned int nr_states; 679 unsigned int state; 680 int result; 681 682 nr_states = cpuidle_state_count(cpu); 683 684 for (state = deepest_state + 1; state < nr_states; state++) { 685 result = cpuidle_state_disable(cpu, state, 1); 686 if (result < 0) 687 return result; 688 } 689 690 return nr_states; 691 } 692 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 693 694 #define _STR(x) #x 695 #define STR(x) _STR(x) 696 697 /* 698 * find_mount - find a the mount point of a given fs 699 * 700 * Returns 0 if mount is not found, otherwise return 1 and fill mp 701 * with the mount point. 702 */ 703 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 704 { 705 char mount_point[MAX_PATH+1]; 706 char type[100]; 707 int found = 0; 708 FILE *fp; 709 710 fp = fopen("/proc/mounts", "r"); 711 if (!fp) 712 return 0; 713 714 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 715 if (strcmp(type, fs) == 0) { 716 found = 1; 717 break; 718 } 719 } 720 fclose(fp); 721 722 if (!found) 723 return 0; 724 725 memset(mp, 0, sizeof_mp); 726 strncpy(mp, mount_point, sizeof_mp - 1); 727 728 debug_msg("Fs %s found at %s\n", fs, mp); 729 return 1; 730 } 731 732 /* 733 * get_self_cgroup - get the current thread cgroup path 734 * 735 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 736 * 737 * 0::/user.slice/user-0.slice/session-3.scope'\n' 738 * 739 * This function is interested in the content after the second : and before the '\n'. 740 * 741 * Returns 1 if a string was found, 0 otherwise. 742 */ 743 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 744 { 745 char path[MAX_PATH], *start; 746 int fd, retval; 747 748 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 749 750 fd = open(path, O_RDONLY); 751 if (fd < 0) 752 return 0; 753 754 memset(path, 0, sizeof(path)); 755 retval = read(fd, path, MAX_PATH); 756 757 close(fd); 758 759 if (retval <= 0) 760 return 0; 761 762 path[MAX_PATH-1] = '\0'; 763 start = path; 764 765 start = strstr(start, ":"); 766 if (!start) 767 return 0; 768 769 /* skip ":" */ 770 start++; 771 772 start = strstr(start, ":"); 773 if (!start) 774 return 0; 775 776 /* skip ":" */ 777 start++; 778 779 if (strlen(start) >= sizeof_self_cg) 780 return 0; 781 782 snprintf(self_cg, sizeof_self_cg, "%s", start); 783 784 /* Swap '\n' with '\0' */ 785 start = strstr(self_cg, "\n"); 786 787 /* there must be '\n' */ 788 if (!start) 789 return 0; 790 791 /* ok, it found a string after the second : and before the \n */ 792 *start = '\0'; 793 794 return 1; 795 } 796 797 /* 798 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup 799 * 800 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup 801 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread 802 * will be used. 803 * 804 * Supports cgroup v2. 805 * 806 * Returns the file descriptor on success, -1 otherwise. 807 */ 808 static int open_cgroup_procs(const char *cgroup) 809 { 810 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 811 char cgroup_procs[MAX_PATH]; 812 int retval; 813 int cg_fd; 814 815 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 816 if (!retval) { 817 err_msg("Did not find cgroupv2 mount point\n"); 818 return -1; 819 } 820 821 if (!cgroup) { 822 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 823 sizeof(cgroup_path) - strlen(cgroup_path)); 824 if (!retval) { 825 err_msg("Did not find self cgroup\n"); 826 return -1; 827 } 828 } else { 829 snprintf(&cgroup_path[strlen(cgroup_path)], 830 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 831 } 832 833 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 834 835 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 836 837 cg_fd = open(cgroup_procs, O_RDWR); 838 if (cg_fd < 0) 839 return -1; 840 841 return cg_fd; 842 } 843 844 /* 845 * set_pid_cgroup - Set cgroup to pid_t pid 846 * 847 * If cgroup argument is not NULL, the threads will move to the given cgroup. 848 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 849 * 850 * Supports cgroup v2. 851 * 852 * Returns 1 on success, 0 otherwise. 853 */ 854 int set_pid_cgroup(pid_t pid, const char *cgroup) 855 { 856 char pid_str[24]; 857 int retval; 858 int cg_fd; 859 860 cg_fd = open_cgroup_procs(cgroup); 861 if (cg_fd < 0) 862 return 0; 863 864 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 865 866 retval = write(cg_fd, pid_str, strlen(pid_str)); 867 if (retval < 0) 868 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 869 pid_str, strerror(errno)); 870 else 871 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 872 873 close(cg_fd); 874 875 return (retval >= 0); 876 } 877 878 /** 879 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 880 * 881 * If cgroup argument is not NULL, the threads will move to the given cgroup. 882 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 883 * 884 * Supports cgroup v2. 885 * 886 * Returns 1 on success, 0 otherwise. 887 */ 888 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 889 { 890 struct dirent *proc_entry; 891 DIR *procfs; 892 int retval; 893 int cg_fd; 894 895 if (strlen(comm_prefix) >= MAX_PATH) { 896 err_msg("Command prefix is too long: %d < strlen(%s)\n", 897 MAX_PATH, comm_prefix); 898 return 0; 899 } 900 901 cg_fd = open_cgroup_procs(cgroup); 902 if (cg_fd < 0) 903 return 0; 904 905 procfs = opendir("/proc"); 906 if (!procfs) { 907 err_msg("Could not open procfs\n"); 908 goto out_cg; 909 } 910 911 while ((proc_entry = readdir(procfs))) { 912 913 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 914 if (!retval) 915 continue; 916 917 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 918 if (retval < 0) { 919 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 920 proc_entry->d_name, strerror(errno)); 921 goto out_procfs; 922 } 923 924 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 925 } 926 927 closedir(procfs); 928 close(cg_fd); 929 return 1; 930 931 out_procfs: 932 closedir(procfs); 933 out_cg: 934 close(cg_fd); 935 return 0; 936 } 937 938 /** 939 * auto_house_keeping - Automatically move rtla out of measurement threads 940 * 941 * Try to move rtla away from the tracer, if possible. 942 * 943 * Returns 1 on success, 0 otherwise. 944 */ 945 int auto_house_keeping(cpu_set_t *monitored_cpus) 946 { 947 cpu_set_t rtla_cpus, house_keeping_cpus; 948 int retval; 949 950 /* first get the CPUs in which rtla can actually run. */ 951 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 952 if (retval == -1) { 953 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 954 return 0; 955 } 956 957 /* then check if the existing setup is already good. */ 958 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 959 if (!CPU_COUNT(&house_keeping_cpus)) { 960 debug_msg("rtla and the monitored CPUs do not share CPUs."); 961 debug_msg("Skipping auto house-keeping\n"); 962 return 1; 963 } 964 965 /* remove the intersection */ 966 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 967 968 /* get only those that rtla can run */ 969 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 970 971 /* is there any cpu left? */ 972 if (!CPU_COUNT(&house_keeping_cpus)) { 973 debug_msg("Could not find any CPU for auto house-keeping\n"); 974 return 0; 975 } 976 977 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 978 if (retval == -1) { 979 debug_msg("Could not set affinity for auto house-keeping\n"); 980 return 0; 981 } 982 983 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 984 985 return 1; 986 } 987 988 /** 989 * parse_optional_arg - Parse optional argument value 990 * 991 * Parse optional argument value, which can be in the form of: 992 * -sarg, -s/--long=arg, -s/--long arg 993 * 994 * Returns arg value if found, NULL otherwise. 995 */ 996 char *parse_optional_arg(int argc, char **argv) 997 { 998 if (optarg) { 999 if (optarg[0] == '=') { 1000 /* skip the = */ 1001 return &optarg[1]; 1002 } else { 1003 return optarg; 1004 } 1005 /* parse argument of form -s [arg] and --long [arg]*/ 1006 } else if (optind < argc && argv[optind][0] != '-') { 1007 /* consume optind */ 1008 return argv[optind++]; 1009 } else { 1010 return NULL; 1011 } 1012 } 1013 1014 /* 1015 * strtoi - convert string to integer with error checking 1016 * 1017 * Returns 0 on success, -1 if conversion fails or result is out of int range. 1018 */ 1019 int strtoi(const char *s, int *res) 1020 { 1021 char *end_ptr; 1022 long lres; 1023 1024 if (!*s) 1025 return -1; 1026 1027 errno = 0; 1028 lres = strtol(s, &end_ptr, 0); 1029 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN) 1030 return -1; 1031 1032 *res = (int) lres; 1033 return 0; 1034 } 1035