1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #ifdef HAVE_LIBCPUPOWER_SUPPORT 8 #include <cpuidle.h> 9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 10 #include <dirent.h> 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <ctype.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <sched.h> 19 #include <stdio.h> 20 21 #include "utils.h" 22 23 #define MAX_MSG_LENGTH 1024 24 int config_debug; 25 26 /* 27 * err_msg - print an error message to the stderr 28 */ 29 void err_msg(const char *fmt, ...) 30 { 31 char message[MAX_MSG_LENGTH]; 32 va_list ap; 33 34 va_start(ap, fmt); 35 vsnprintf(message, sizeof(message), fmt, ap); 36 va_end(ap); 37 38 fprintf(stderr, "%s", message); 39 } 40 41 /* 42 * debug_msg - print a debug message to stderr if debug is set 43 */ 44 void debug_msg(const char *fmt, ...) 45 { 46 char message[MAX_MSG_LENGTH]; 47 va_list ap; 48 49 if (!config_debug) 50 return; 51 52 va_start(ap, fmt); 53 vsnprintf(message, sizeof(message), fmt, ap); 54 va_end(ap); 55 56 fprintf(stderr, "%s", message); 57 } 58 59 /* 60 * fatal - print an error message and EOL to stderr and exit with ERROR 61 */ 62 void fatal(const char *fmt, ...) 63 { 64 va_list ap; 65 66 va_start(ap, fmt); 67 vfprintf(stderr, fmt, ap); 68 va_end(ap); 69 fprintf(stderr, "\n"); 70 71 exit(ERROR); 72 } 73 74 /* 75 * get_llong_from_str - get a long long int from a string 76 */ 77 long long get_llong_from_str(char *start) 78 { 79 long long value; 80 char *end; 81 82 errno = 0; 83 value = strtoll(start, &end, 10); 84 if (errno || start == end) 85 return -1; 86 87 return value; 88 } 89 90 /* 91 * get_duration - fill output with a human readable duration since start_time 92 */ 93 void get_duration(time_t start_time, char *output, int output_size) 94 { 95 time_t now = time(NULL); 96 struct tm *tm_info; 97 time_t duration; 98 99 duration = difftime(now, start_time); 100 tm_info = gmtime(&duration); 101 102 snprintf(output, output_size, "%3d %02d:%02d:%02d", 103 tm_info->tm_yday, 104 tm_info->tm_hour, 105 tm_info->tm_min, 106 tm_info->tm_sec); 107 } 108 109 /* 110 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 111 * 112 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 113 * filling cpu_set_t argument. 114 * 115 * Returns 1 on success, 0 otherwise. 116 */ 117 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 118 { 119 const char *p; 120 int end_cpu; 121 int nr_cpus; 122 int cpu; 123 int i; 124 125 CPU_ZERO(set); 126 127 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 128 129 for (p = cpu_list; *p; ) { 130 cpu = atoi(p); 131 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 132 goto err; 133 134 while (isdigit(*p)) 135 p++; 136 if (*p == '-') { 137 p++; 138 end_cpu = atoi(p); 139 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 140 goto err; 141 while (isdigit(*p)) 142 p++; 143 } else 144 end_cpu = cpu; 145 146 if (cpu == end_cpu) { 147 debug_msg("cpu_set: adding cpu %d\n", cpu); 148 CPU_SET(cpu, set); 149 } else { 150 for (i = cpu; i <= end_cpu; i++) { 151 debug_msg("cpu_set: adding cpu %d\n", i); 152 CPU_SET(i, set); 153 } 154 } 155 156 if (*p == ',') 157 p++; 158 } 159 160 return 0; 161 err: 162 debug_msg("Error parsing the cpu set %s\n", cpu_list); 163 return 1; 164 } 165 166 /* 167 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 168 */ 169 long parse_seconds_duration(char *val) 170 { 171 char *end; 172 long t; 173 174 t = strtol(val, &end, 10); 175 176 if (end) { 177 switch (*end) { 178 case 's': 179 case 'S': 180 break; 181 case 'm': 182 case 'M': 183 t *= 60; 184 break; 185 case 'h': 186 case 'H': 187 t *= 60 * 60; 188 break; 189 190 case 'd': 191 case 'D': 192 t *= 24 * 60 * 60; 193 break; 194 } 195 } 196 197 return t; 198 } 199 200 /* 201 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 202 */ 203 long parse_ns_duration(char *val) 204 { 205 char *end; 206 long t; 207 208 t = strtol(val, &end, 10); 209 210 if (end) { 211 if (!strncmp(end, "ns", 2)) { 212 return t; 213 } else if (!strncmp(end, "us", 2)) { 214 t *= 1000; 215 return t; 216 } else if (!strncmp(end, "ms", 2)) { 217 t *= 1000 * 1000; 218 return t; 219 } else if (!strncmp(end, "s", 1)) { 220 t *= 1000 * 1000 * 1000; 221 return t; 222 } 223 return -1; 224 } 225 226 return t; 227 } 228 229 /* 230 * This is a set of helper functions to use SCHED_DEADLINE. 231 */ 232 #ifndef __NR_sched_setattr 233 # ifdef __x86_64__ 234 # define __NR_sched_setattr 314 235 # elif __i386__ 236 # define __NR_sched_setattr 351 237 # elif __arm__ 238 # define __NR_sched_setattr 380 239 # elif __aarch64__ || __riscv 240 # define __NR_sched_setattr 274 241 # elif __powerpc__ 242 # define __NR_sched_setattr 355 243 # elif __s390x__ 244 # define __NR_sched_setattr 345 245 # elif __loongarch__ 246 # define __NR_sched_setattr 274 247 # endif 248 #endif 249 250 #define SCHED_DEADLINE 6 251 252 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, 253 unsigned int flags) { 254 return syscall(__NR_sched_setattr, pid, attr, flags); 255 } 256 257 int __set_sched_attr(int pid, struct sched_attr *attr) 258 { 259 int flags = 0; 260 int retval; 261 262 retval = syscall_sched_setattr(pid, attr, flags); 263 if (retval < 0) { 264 err_msg("Failed to set sched attributes to the pid %d: %s\n", 265 pid, strerror(errno)); 266 return 1; 267 } 268 269 return 0; 270 } 271 272 /* 273 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 274 * 275 * Check if the procfs entry is a directory of a process, and then check if the 276 * process has a comm with the prefix set in char *comm_prefix. As the 277 * current users of this function only check for kernel threads, there is no 278 * need to check for the threads for the process. 279 * 280 * Return: True if the proc_entry contains a comm file with comm_prefix*. 281 * Otherwise returns false. 282 */ 283 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 284 { 285 char buffer[MAX_PATH]; 286 int comm_fd, retval; 287 char *t_name; 288 289 if (proc_entry->d_type != DT_DIR) 290 return 0; 291 292 if (*proc_entry->d_name == '.') 293 return 0; 294 295 /* check if the string is a pid */ 296 for (t_name = proc_entry->d_name; t_name; t_name++) { 297 if (!isdigit(*t_name)) 298 break; 299 } 300 301 if (*t_name != '\0') 302 return 0; 303 304 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 305 comm_fd = open(buffer, O_RDONLY); 306 if (comm_fd < 0) 307 return 0; 308 309 memset(buffer, 0, MAX_PATH); 310 retval = read(comm_fd, buffer, MAX_PATH); 311 312 close(comm_fd); 313 314 if (retval <= 0) 315 return 0; 316 317 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 318 if (retval) 319 return 0; 320 321 /* comm already have \n */ 322 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 323 324 return 1; 325 } 326 327 /* 328 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 329 * 330 * This function uses procfs to list the currently running threads and then set the 331 * sched_attr *attr to the threads that start with char *comm_prefix. It is 332 * mainly used to set the priority to the kernel threads created by the 333 * tracers. 334 */ 335 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 336 { 337 struct dirent *proc_entry; 338 DIR *procfs; 339 int retval; 340 341 if (strlen(comm_prefix) >= MAX_PATH) { 342 err_msg("Command prefix is too long: %d < strlen(%s)\n", 343 MAX_PATH, comm_prefix); 344 return 1; 345 } 346 347 procfs = opendir("/proc"); 348 if (!procfs) { 349 err_msg("Could not open procfs\n"); 350 return 1; 351 } 352 353 while ((proc_entry = readdir(procfs))) { 354 355 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 356 if (!retval) 357 continue; 358 359 /* procfs_is_workload_pid confirmed it is a pid */ 360 retval = __set_sched_attr(atoi(proc_entry->d_name), attr); 361 if (retval) { 362 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 363 goto out_err; 364 } 365 366 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 367 } 368 return 0; 369 370 out_err: 371 closedir(procfs); 372 return 1; 373 } 374 375 #define INVALID_VAL (~0L) 376 static long get_long_ns_after_colon(char *start) 377 { 378 long val = INVALID_VAL; 379 380 /* find the ":" */ 381 start = strstr(start, ":"); 382 if (!start) 383 return -1; 384 385 /* skip ":" */ 386 start++; 387 val = parse_ns_duration(start); 388 389 return val; 390 } 391 392 static long get_long_after_colon(char *start) 393 { 394 long val = INVALID_VAL; 395 396 /* find the ":" */ 397 start = strstr(start, ":"); 398 if (!start) 399 return -1; 400 401 /* skip ":" */ 402 start++; 403 val = get_llong_from_str(start); 404 405 return val; 406 } 407 408 /* 409 * parse priority in the format: 410 * SCHED_OTHER: 411 * o:<prio> 412 * O:<prio> 413 * SCHED_RR: 414 * r:<prio> 415 * R:<prio> 416 * SCHED_FIFO: 417 * f:<prio> 418 * F:<prio> 419 * SCHED_DEADLINE: 420 * d:runtime:period 421 * D:runtime:period 422 */ 423 int parse_prio(char *arg, struct sched_attr *sched_param) 424 { 425 long prio; 426 long runtime; 427 long period; 428 429 memset(sched_param, 0, sizeof(*sched_param)); 430 sched_param->size = sizeof(*sched_param); 431 432 switch (arg[0]) { 433 case 'd': 434 case 'D': 435 /* d:runtime:period */ 436 if (strlen(arg) < 4) 437 return -1; 438 439 runtime = get_long_ns_after_colon(arg); 440 if (runtime == INVALID_VAL) 441 return -1; 442 443 period = get_long_ns_after_colon(&arg[2]); 444 if (period == INVALID_VAL) 445 return -1; 446 447 if (runtime > period) 448 return -1; 449 450 sched_param->sched_policy = SCHED_DEADLINE; 451 sched_param->sched_runtime = runtime; 452 sched_param->sched_deadline = period; 453 sched_param->sched_period = period; 454 break; 455 case 'f': 456 case 'F': 457 /* f:prio */ 458 prio = get_long_after_colon(arg); 459 if (prio == INVALID_VAL) 460 return -1; 461 462 if (prio < sched_get_priority_min(SCHED_FIFO)) 463 return -1; 464 if (prio > sched_get_priority_max(SCHED_FIFO)) 465 return -1; 466 467 sched_param->sched_policy = SCHED_FIFO; 468 sched_param->sched_priority = prio; 469 break; 470 case 'r': 471 case 'R': 472 /* r:prio */ 473 prio = get_long_after_colon(arg); 474 if (prio == INVALID_VAL) 475 return -1; 476 477 if (prio < sched_get_priority_min(SCHED_RR)) 478 return -1; 479 if (prio > sched_get_priority_max(SCHED_RR)) 480 return -1; 481 482 sched_param->sched_policy = SCHED_RR; 483 sched_param->sched_priority = prio; 484 break; 485 case 'o': 486 case 'O': 487 /* o:prio */ 488 prio = get_long_after_colon(arg); 489 if (prio == INVALID_VAL) 490 return -1; 491 492 if (prio < MIN_NICE) 493 return -1; 494 if (prio > MAX_NICE) 495 return -1; 496 497 sched_param->sched_policy = SCHED_OTHER; 498 sched_param->sched_nice = prio; 499 break; 500 default: 501 return -1; 502 } 503 return 0; 504 } 505 506 /* 507 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 508 * 509 * This is used to reduce the exit from idle latency. The value 510 * will be reset once the file descriptor of /dev/cpu_dma_latecy 511 * is closed. 512 * 513 * Return: the /dev/cpu_dma_latecy file descriptor 514 */ 515 int set_cpu_dma_latency(int32_t latency) 516 { 517 int retval; 518 int fd; 519 520 fd = open("/dev/cpu_dma_latency", O_RDWR); 521 if (fd < 0) { 522 err_msg("Error opening /dev/cpu_dma_latency\n"); 523 return -1; 524 } 525 526 retval = write(fd, &latency, 4); 527 if (retval < 1) { 528 err_msg("Error setting /dev/cpu_dma_latency\n"); 529 close(fd); 530 return -1; 531 } 532 533 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 534 535 return fd; 536 } 537 538 #ifdef HAVE_LIBCPUPOWER_SUPPORT 539 static unsigned int **saved_cpu_idle_disable_state; 540 static size_t saved_cpu_idle_disable_state_alloc_ctr; 541 542 /* 543 * save_cpu_idle_state_disable - save disable for all idle states of a cpu 544 * 545 * Saves the current disable of all idle states of a cpu, to be subsequently 546 * restored via restore_cpu_idle_disable_state. 547 * 548 * Return: idle state count on success, negative on error 549 */ 550 int save_cpu_idle_disable_state(unsigned int cpu) 551 { 552 unsigned int nr_states; 553 unsigned int state; 554 int disabled; 555 int nr_cpus; 556 557 nr_states = cpuidle_state_count(cpu); 558 559 if (nr_states == 0) 560 return 0; 561 562 if (saved_cpu_idle_disable_state == NULL) { 563 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 564 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); 565 if (!saved_cpu_idle_disable_state) 566 return -1; 567 } 568 569 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); 570 if (!saved_cpu_idle_disable_state[cpu]) 571 return -1; 572 saved_cpu_idle_disable_state_alloc_ctr++; 573 574 for (state = 0; state < nr_states; state++) { 575 disabled = cpuidle_is_state_disabled(cpu, state); 576 if (disabled < 0) 577 return disabled; 578 saved_cpu_idle_disable_state[cpu][state] = disabled; 579 } 580 581 return nr_states; 582 } 583 584 /* 585 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu 586 * 587 * Restores the current disable state of all idle states of a cpu that was 588 * previously saved by save_cpu_idle_disable_state. 589 * 590 * Return: idle state count on success, negative on error 591 */ 592 int restore_cpu_idle_disable_state(unsigned int cpu) 593 { 594 unsigned int nr_states; 595 unsigned int state; 596 int disabled; 597 int result; 598 599 nr_states = cpuidle_state_count(cpu); 600 601 if (nr_states == 0) 602 return 0; 603 604 if (!saved_cpu_idle_disable_state) 605 return -1; 606 607 for (state = 0; state < nr_states; state++) { 608 if (!saved_cpu_idle_disable_state[cpu]) 609 return -1; 610 disabled = saved_cpu_idle_disable_state[cpu][state]; 611 result = cpuidle_state_disable(cpu, state, disabled); 612 if (result < 0) 613 return result; 614 } 615 616 free(saved_cpu_idle_disable_state[cpu]); 617 saved_cpu_idle_disable_state[cpu] = NULL; 618 saved_cpu_idle_disable_state_alloc_ctr--; 619 if (saved_cpu_idle_disable_state_alloc_ctr == 0) { 620 free(saved_cpu_idle_disable_state); 621 saved_cpu_idle_disable_state = NULL; 622 } 623 624 return nr_states; 625 } 626 627 /* 628 * free_cpu_idle_disable_states - free saved idle state disable for all cpus 629 * 630 * Frees the memory used for storing cpu idle state disable for all cpus 631 * and states. 632 * 633 * Normally, the memory is freed automatically in 634 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an 635 * error. 636 */ 637 void free_cpu_idle_disable_states(void) 638 { 639 int cpu; 640 int nr_cpus; 641 642 if (!saved_cpu_idle_disable_state) 643 return; 644 645 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 646 647 for (cpu = 0; cpu < nr_cpus; cpu++) { 648 free(saved_cpu_idle_disable_state[cpu]); 649 saved_cpu_idle_disable_state[cpu] = NULL; 650 } 651 652 free(saved_cpu_idle_disable_state); 653 saved_cpu_idle_disable_state = NULL; 654 } 655 656 /* 657 * set_deepest_cpu_idle_state - limit idle state of cpu 658 * 659 * Disables all idle states deeper than the one given in 660 * deepest_state (assuming states with higher number are deeper). 661 * 662 * This is used to reduce the exit from idle latency. Unlike 663 * set_cpu_dma_latency, it can disable idle states per cpu. 664 * 665 * Return: idle state count on success, negative on error 666 */ 667 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) 668 { 669 unsigned int nr_states; 670 unsigned int state; 671 int result; 672 673 nr_states = cpuidle_state_count(cpu); 674 675 for (state = deepest_state + 1; state < nr_states; state++) { 676 result = cpuidle_state_disable(cpu, state, 1); 677 if (result < 0) 678 return result; 679 } 680 681 return nr_states; 682 } 683 #endif /* HAVE_LIBCPUPOWER_SUPPORT */ 684 685 #define _STR(x) #x 686 #define STR(x) _STR(x) 687 688 /* 689 * find_mount - find a the mount point of a given fs 690 * 691 * Returns 0 if mount is not found, otherwise return 1 and fill mp 692 * with the mount point. 693 */ 694 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 695 { 696 char mount_point[MAX_PATH+1]; 697 char type[100]; 698 int found = 0; 699 FILE *fp; 700 701 fp = fopen("/proc/mounts", "r"); 702 if (!fp) 703 return 0; 704 705 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 706 if (strcmp(type, fs) == 0) { 707 found = 1; 708 break; 709 } 710 } 711 fclose(fp); 712 713 if (!found) 714 return 0; 715 716 memset(mp, 0, sizeof_mp); 717 strncpy(mp, mount_point, sizeof_mp - 1); 718 719 debug_msg("Fs %s found at %s\n", fs, mp); 720 return 1; 721 } 722 723 /* 724 * get_self_cgroup - get the current thread cgroup path 725 * 726 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 727 * 728 * 0::/user.slice/user-0.slice/session-3.scope'\n' 729 * 730 * This function is interested in the content after the second : and before the '\n'. 731 * 732 * Returns 1 if a string was found, 0 otherwise. 733 */ 734 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 735 { 736 char path[MAX_PATH], *start; 737 int fd, retval; 738 739 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 740 741 fd = open(path, O_RDONLY); 742 if (fd < 0) 743 return 0; 744 745 retval = read(fd, path, MAX_PATH); 746 747 close(fd); 748 749 if (retval <= 0) 750 return 0; 751 752 start = path; 753 754 start = strstr(start, ":"); 755 if (!start) 756 return 0; 757 758 /* skip ":" */ 759 start++; 760 761 start = strstr(start, ":"); 762 if (!start) 763 return 0; 764 765 /* skip ":" */ 766 start++; 767 768 if (strlen(start) >= sizeof_self_cg) 769 return 0; 770 771 snprintf(self_cg, sizeof_self_cg, "%s", start); 772 773 /* Swap '\n' with '\0' */ 774 start = strstr(self_cg, "\n"); 775 776 /* there must be '\n' */ 777 if (!start) 778 return 0; 779 780 /* ok, it found a string after the second : and before the \n */ 781 *start = '\0'; 782 783 return 1; 784 } 785 786 /* 787 * set_comm_cgroup - Set cgroup to pid_t pid 788 * 789 * If cgroup argument is not NULL, the threads will move to the given cgroup. 790 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 791 * 792 * Supports cgroup v2. 793 * 794 * Returns 1 on success, 0 otherwise. 795 */ 796 int set_pid_cgroup(pid_t pid, const char *cgroup) 797 { 798 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 799 char cgroup_procs[MAX_PATH]; 800 char pid_str[24]; 801 int retval; 802 int cg_fd; 803 804 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 805 if (!retval) { 806 err_msg("Did not find cgroupv2 mount point\n"); 807 return 0; 808 } 809 810 if (!cgroup) { 811 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 812 sizeof(cgroup_path) - strlen(cgroup_path)); 813 if (!retval) { 814 err_msg("Did not find self cgroup\n"); 815 return 0; 816 } 817 } else { 818 snprintf(&cgroup_path[strlen(cgroup_path)], 819 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 820 } 821 822 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 823 824 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 825 826 cg_fd = open(cgroup_procs, O_RDWR); 827 if (cg_fd < 0) 828 return 0; 829 830 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 831 832 retval = write(cg_fd, pid_str, strlen(pid_str)); 833 if (retval < 0) 834 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 835 pid_str, strerror(errno)); 836 else 837 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 838 839 close(cg_fd); 840 841 return (retval >= 0); 842 } 843 844 /** 845 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 846 * 847 * If cgroup argument is not NULL, the threads will move to the given cgroup. 848 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 849 * 850 * Supports cgroup v2. 851 * 852 * Returns 1 on success, 0 otherwise. 853 */ 854 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 855 { 856 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 857 char cgroup_procs[MAX_PATH]; 858 struct dirent *proc_entry; 859 DIR *procfs; 860 int retval; 861 int cg_fd; 862 863 if (strlen(comm_prefix) >= MAX_PATH) { 864 err_msg("Command prefix is too long: %d < strlen(%s)\n", 865 MAX_PATH, comm_prefix); 866 return 0; 867 } 868 869 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 870 if (!retval) { 871 err_msg("Did not find cgroupv2 mount point\n"); 872 return 0; 873 } 874 875 if (!cgroup) { 876 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 877 sizeof(cgroup_path) - strlen(cgroup_path)); 878 if (!retval) { 879 err_msg("Did not find self cgroup\n"); 880 return 0; 881 } 882 } else { 883 snprintf(&cgroup_path[strlen(cgroup_path)], 884 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 885 } 886 887 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 888 889 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 890 891 cg_fd = open(cgroup_procs, O_RDWR); 892 if (cg_fd < 0) 893 return 0; 894 895 procfs = opendir("/proc"); 896 if (!procfs) { 897 err_msg("Could not open procfs\n"); 898 goto out_cg; 899 } 900 901 while ((proc_entry = readdir(procfs))) { 902 903 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 904 if (!retval) 905 continue; 906 907 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 908 if (retval < 0) { 909 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 910 proc_entry->d_name, strerror(errno)); 911 goto out_procfs; 912 } 913 914 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 915 } 916 917 closedir(procfs); 918 close(cg_fd); 919 return 1; 920 921 out_procfs: 922 closedir(procfs); 923 out_cg: 924 close(cg_fd); 925 return 0; 926 } 927 928 /** 929 * auto_house_keeping - Automatically move rtla out of measurement threads 930 * 931 * Try to move rtla away from the tracer, if possible. 932 * 933 * Returns 1 on success, 0 otherwise. 934 */ 935 int auto_house_keeping(cpu_set_t *monitored_cpus) 936 { 937 cpu_set_t rtla_cpus, house_keeping_cpus; 938 int retval; 939 940 /* first get the CPUs in which rtla can actually run. */ 941 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 942 if (retval == -1) { 943 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 944 return 0; 945 } 946 947 /* then check if the existing setup is already good. */ 948 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 949 if (!CPU_COUNT(&house_keeping_cpus)) { 950 debug_msg("rtla and the monitored CPUs do not share CPUs."); 951 debug_msg("Skipping auto house-keeping\n"); 952 return 1; 953 } 954 955 /* remove the intersection */ 956 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 957 958 /* get only those that rtla can run */ 959 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 960 961 /* is there any cpu left? */ 962 if (!CPU_COUNT(&house_keeping_cpus)) { 963 debug_msg("Could not find any CPU for auto house-keeping\n"); 964 return 0; 965 } 966 967 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 968 if (retval == -1) { 969 debug_msg("Could not set affinity for auto house-keeping\n"); 970 return 0; 971 } 972 973 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 974 975 return 1; 976 } 977 978 /** 979 * parse_optional_arg - Parse optional argument value 980 * 981 * Parse optional argument value, which can be in the form of: 982 * -sarg, -s/--long=arg, -s/--long arg 983 * 984 * Returns arg value if found, NULL otherwise. 985 */ 986 char *parse_optional_arg(int argc, char **argv) 987 { 988 if (optarg) { 989 if (optarg[0] == '=') { 990 /* skip the = */ 991 return &optarg[1]; 992 } else { 993 return optarg; 994 } 995 /* parse argument of form -s [arg] and --long [arg]*/ 996 } else if (optind < argc && argv[optind][0] != '-') { 997 /* consume optind */ 998 return argv[optind++]; 999 } else { 1000 return NULL; 1001 } 1002 } 1003