1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #include <dirent.h> 8 #include <stdarg.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <ctype.h> 13 #include <errno.h> 14 #include <fcntl.h> 15 #include <sched.h> 16 #include <stdio.h> 17 18 #include "utils.h" 19 20 #define MAX_MSG_LENGTH 1024 21 int config_debug; 22 23 /* 24 * err_msg - print an error message to the stderr 25 */ 26 void err_msg(const char *fmt, ...) 27 { 28 char message[MAX_MSG_LENGTH]; 29 va_list ap; 30 31 va_start(ap, fmt); 32 vsnprintf(message, sizeof(message), fmt, ap); 33 va_end(ap); 34 35 fprintf(stderr, "%s", message); 36 } 37 38 /* 39 * debug_msg - print a debug message to stderr if debug is set 40 */ 41 void debug_msg(const char *fmt, ...) 42 { 43 char message[MAX_MSG_LENGTH]; 44 va_list ap; 45 46 if (!config_debug) 47 return; 48 49 va_start(ap, fmt); 50 vsnprintf(message, sizeof(message), fmt, ap); 51 va_end(ap); 52 53 fprintf(stderr, "%s", message); 54 } 55 56 /* 57 * get_llong_from_str - get a long long int from a string 58 */ 59 long long get_llong_from_str(char *start) 60 { 61 long long value; 62 char *end; 63 64 errno = 0; 65 value = strtoll(start, &end, 10); 66 if (errno || start == end) 67 return -1; 68 69 return value; 70 } 71 72 /* 73 * get_duration - fill output with a human readable duration since start_time 74 */ 75 void get_duration(time_t start_time, char *output, int output_size) 76 { 77 time_t now = time(NULL); 78 struct tm *tm_info; 79 time_t duration; 80 81 duration = difftime(now, start_time); 82 tm_info = gmtime(&duration); 83 84 snprintf(output, output_size, "%3d %02d:%02d:%02d", 85 tm_info->tm_yday, 86 tm_info->tm_hour, 87 tm_info->tm_min, 88 tm_info->tm_sec); 89 } 90 91 /* 92 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 93 * 94 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 95 * filling cpu_set_t argument. 96 * 97 * Returns 1 on success, 0 otherwise. 98 */ 99 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 100 { 101 const char *p; 102 int end_cpu; 103 int nr_cpus; 104 int cpu; 105 int i; 106 107 CPU_ZERO(set); 108 109 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 110 111 for (p = cpu_list; *p; ) { 112 cpu = atoi(p); 113 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 114 goto err; 115 116 while (isdigit(*p)) 117 p++; 118 if (*p == '-') { 119 p++; 120 end_cpu = atoi(p); 121 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 122 goto err; 123 while (isdigit(*p)) 124 p++; 125 } else 126 end_cpu = cpu; 127 128 if (cpu == end_cpu) { 129 debug_msg("cpu_set: adding cpu %d\n", cpu); 130 CPU_SET(cpu, set); 131 } else { 132 for (i = cpu; i <= end_cpu; i++) { 133 debug_msg("cpu_set: adding cpu %d\n", i); 134 CPU_SET(i, set); 135 } 136 } 137 138 if (*p == ',') 139 p++; 140 } 141 142 return 0; 143 err: 144 debug_msg("Error parsing the cpu set %s\n", cpu_list); 145 return 1; 146 } 147 148 /* 149 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 150 */ 151 long parse_seconds_duration(char *val) 152 { 153 char *end; 154 long t; 155 156 t = strtol(val, &end, 10); 157 158 if (end) { 159 switch (*end) { 160 case 's': 161 case 'S': 162 break; 163 case 'm': 164 case 'M': 165 t *= 60; 166 break; 167 case 'h': 168 case 'H': 169 t *= 60 * 60; 170 break; 171 172 case 'd': 173 case 'D': 174 t *= 24 * 60 * 60; 175 break; 176 } 177 } 178 179 return t; 180 } 181 182 /* 183 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 184 */ 185 long parse_ns_duration(char *val) 186 { 187 char *end; 188 long t; 189 190 t = strtol(val, &end, 10); 191 192 if (end) { 193 if (!strncmp(end, "ns", 2)) { 194 return t; 195 } else if (!strncmp(end, "us", 2)) { 196 t *= 1000; 197 return t; 198 } else if (!strncmp(end, "ms", 2)) { 199 t *= 1000 * 1000; 200 return t; 201 } else if (!strncmp(end, "s", 1)) { 202 t *= 1000 * 1000 * 1000; 203 return t; 204 } 205 return -1; 206 } 207 208 return t; 209 } 210 211 /* 212 * This is a set of helper functions to use SCHED_DEADLINE. 213 */ 214 #ifdef __x86_64__ 215 # define __NR_sched_setattr 314 216 # define __NR_sched_getattr 315 217 #elif __i386__ 218 # define __NR_sched_setattr 351 219 # define __NR_sched_getattr 352 220 #elif __arm__ 221 # define __NR_sched_setattr 380 222 # define __NR_sched_getattr 381 223 #elif __aarch64__ || __riscv 224 # define __NR_sched_setattr 274 225 # define __NR_sched_getattr 275 226 #elif __powerpc__ 227 # define __NR_sched_setattr 355 228 # define __NR_sched_getattr 356 229 #elif __s390x__ 230 # define __NR_sched_setattr 345 231 # define __NR_sched_getattr 346 232 #endif 233 234 #define SCHED_DEADLINE 6 235 236 static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, 237 unsigned int flags) { 238 return syscall(__NR_sched_setattr, pid, attr, flags); 239 } 240 241 static inline int sched_getattr(pid_t pid, struct sched_attr *attr, 242 unsigned int size, unsigned int flags) 243 { 244 return syscall(__NR_sched_getattr, pid, attr, size, flags); 245 } 246 247 int __set_sched_attr(int pid, struct sched_attr *attr) 248 { 249 int flags = 0; 250 int retval; 251 252 retval = sched_setattr(pid, attr, flags); 253 if (retval < 0) { 254 err_msg("Failed to set sched attributes to the pid %d: %s\n", 255 pid, strerror(errno)); 256 return 1; 257 } 258 259 return 0; 260 } 261 262 /* 263 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 264 * 265 * Check if the procfs entry is a directory of a process, and then check if the 266 * process has a comm with the prefix set in char *comm_prefix. As the 267 * current users of this function only check for kernel threads, there is no 268 * need to check for the threads for the process. 269 * 270 * Return: True if the proc_entry contains a comm file with comm_prefix*. 271 * Otherwise returns false. 272 */ 273 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 274 { 275 char buffer[MAX_PATH]; 276 int comm_fd, retval; 277 char *t_name; 278 279 if (proc_entry->d_type != DT_DIR) 280 return 0; 281 282 if (*proc_entry->d_name == '.') 283 return 0; 284 285 /* check if the string is a pid */ 286 for (t_name = proc_entry->d_name; t_name; t_name++) { 287 if (!isdigit(*t_name)) 288 break; 289 } 290 291 if (*t_name != '\0') 292 return 0; 293 294 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 295 comm_fd = open(buffer, O_RDONLY); 296 if (comm_fd < 0) 297 return 0; 298 299 memset(buffer, 0, MAX_PATH); 300 retval = read(comm_fd, buffer, MAX_PATH); 301 302 close(comm_fd); 303 304 if (retval <= 0) 305 return 0; 306 307 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 308 if (retval) 309 return 0; 310 311 /* comm already have \n */ 312 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 313 314 return 1; 315 } 316 317 /* 318 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 319 * 320 * This function uses procfs to list the currently running threads and then set the 321 * sched_attr *attr to the threads that start with char *comm_prefix. It is 322 * mainly used to set the priority to the kernel threads created by the 323 * tracers. 324 */ 325 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 326 { 327 struct dirent *proc_entry; 328 DIR *procfs; 329 int retval; 330 331 if (strlen(comm_prefix) >= MAX_PATH) { 332 err_msg("Command prefix is too long: %d < strlen(%s)\n", 333 MAX_PATH, comm_prefix); 334 return 1; 335 } 336 337 procfs = opendir("/proc"); 338 if (!procfs) { 339 err_msg("Could not open procfs\n"); 340 return 1; 341 } 342 343 while ((proc_entry = readdir(procfs))) { 344 345 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 346 if (!retval) 347 continue; 348 349 /* procfs_is_workload_pid confirmed it is a pid */ 350 retval = __set_sched_attr(atoi(proc_entry->d_name), attr); 351 if (retval) { 352 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 353 goto out_err; 354 } 355 356 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 357 } 358 return 0; 359 360 out_err: 361 closedir(procfs); 362 return 1; 363 } 364 365 #define INVALID_VAL (~0L) 366 static long get_long_ns_after_colon(char *start) 367 { 368 long val = INVALID_VAL; 369 370 /* find the ":" */ 371 start = strstr(start, ":"); 372 if (!start) 373 return -1; 374 375 /* skip ":" */ 376 start++; 377 val = parse_ns_duration(start); 378 379 return val; 380 } 381 382 static long get_long_after_colon(char *start) 383 { 384 long val = INVALID_VAL; 385 386 /* find the ":" */ 387 start = strstr(start, ":"); 388 if (!start) 389 return -1; 390 391 /* skip ":" */ 392 start++; 393 val = get_llong_from_str(start); 394 395 return val; 396 } 397 398 /* 399 * parse priority in the format: 400 * SCHED_OTHER: 401 * o:<prio> 402 * O:<prio> 403 * SCHED_RR: 404 * r:<prio> 405 * R:<prio> 406 * SCHED_FIFO: 407 * f:<prio> 408 * F:<prio> 409 * SCHED_DEADLINE: 410 * d:runtime:period 411 * D:runtime:period 412 */ 413 int parse_prio(char *arg, struct sched_attr *sched_param) 414 { 415 long prio; 416 long runtime; 417 long period; 418 419 memset(sched_param, 0, sizeof(*sched_param)); 420 sched_param->size = sizeof(*sched_param); 421 422 switch (arg[0]) { 423 case 'd': 424 case 'D': 425 /* d:runtime:period */ 426 if (strlen(arg) < 4) 427 return -1; 428 429 runtime = get_long_ns_after_colon(arg); 430 if (runtime == INVALID_VAL) 431 return -1; 432 433 period = get_long_ns_after_colon(&arg[2]); 434 if (period == INVALID_VAL) 435 return -1; 436 437 if (runtime > period) 438 return -1; 439 440 sched_param->sched_policy = SCHED_DEADLINE; 441 sched_param->sched_runtime = runtime; 442 sched_param->sched_deadline = period; 443 sched_param->sched_period = period; 444 break; 445 case 'f': 446 case 'F': 447 /* f:prio */ 448 prio = get_long_after_colon(arg); 449 if (prio == INVALID_VAL) 450 return -1; 451 452 if (prio < sched_get_priority_min(SCHED_FIFO)) 453 return -1; 454 if (prio > sched_get_priority_max(SCHED_FIFO)) 455 return -1; 456 457 sched_param->sched_policy = SCHED_FIFO; 458 sched_param->sched_priority = prio; 459 break; 460 case 'r': 461 case 'R': 462 /* r:prio */ 463 prio = get_long_after_colon(arg); 464 if (prio == INVALID_VAL) 465 return -1; 466 467 if (prio < sched_get_priority_min(SCHED_RR)) 468 return -1; 469 if (prio > sched_get_priority_max(SCHED_RR)) 470 return -1; 471 472 sched_param->sched_policy = SCHED_RR; 473 sched_param->sched_priority = prio; 474 break; 475 case 'o': 476 case 'O': 477 /* o:prio */ 478 prio = get_long_after_colon(arg); 479 if (prio == INVALID_VAL) 480 return -1; 481 482 if (prio < sched_get_priority_min(SCHED_OTHER)) 483 return -1; 484 if (prio > sched_get_priority_max(SCHED_OTHER)) 485 return -1; 486 487 sched_param->sched_policy = SCHED_OTHER; 488 sched_param->sched_priority = prio; 489 break; 490 default: 491 return -1; 492 } 493 return 0; 494 } 495 496 /* 497 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 498 * 499 * This is used to reduce the exit from idle latency. The value 500 * will be reset once the file descriptor of /dev/cpu_dma_latecy 501 * is closed. 502 * 503 * Return: the /dev/cpu_dma_latecy file descriptor 504 */ 505 int set_cpu_dma_latency(int32_t latency) 506 { 507 int retval; 508 int fd; 509 510 fd = open("/dev/cpu_dma_latency", O_RDWR); 511 if (fd < 0) { 512 err_msg("Error opening /dev/cpu_dma_latency\n"); 513 return -1; 514 } 515 516 retval = write(fd, &latency, 4); 517 if (retval < 1) { 518 err_msg("Error setting /dev/cpu_dma_latency\n"); 519 close(fd); 520 return -1; 521 } 522 523 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 524 525 return fd; 526 } 527 528 #define _STR(x) #x 529 #define STR(x) _STR(x) 530 531 /* 532 * find_mount - find a the mount point of a given fs 533 * 534 * Returns 0 if mount is not found, otherwise return 1 and fill mp 535 * with the mount point. 536 */ 537 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 538 { 539 char mount_point[MAX_PATH]; 540 char type[100]; 541 int found = 0; 542 FILE *fp; 543 544 fp = fopen("/proc/mounts", "r"); 545 if (!fp) 546 return 0; 547 548 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 549 if (strcmp(type, fs) == 0) { 550 found = 1; 551 break; 552 } 553 } 554 fclose(fp); 555 556 if (!found) 557 return 0; 558 559 memset(mp, 0, sizeof_mp); 560 strncpy(mp, mount_point, sizeof_mp - 1); 561 562 debug_msg("Fs %s found at %s\n", fs, mp); 563 return 1; 564 } 565 566 /* 567 * get_self_cgroup - get the current thread cgroup path 568 * 569 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 570 * 571 * 0::/user.slice/user-0.slice/session-3.scope'\n' 572 * 573 * This function is interested in the content after the second : and before the '\n'. 574 * 575 * Returns 1 if a string was found, 0 otherwise. 576 */ 577 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 578 { 579 char path[MAX_PATH], *start; 580 int fd, retval; 581 582 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 583 584 fd = open(path, O_RDONLY); 585 if (fd < 0) 586 return 0; 587 588 retval = read(fd, path, MAX_PATH); 589 590 close(fd); 591 592 if (retval <= 0) 593 return 0; 594 595 start = path; 596 597 start = strstr(start, ":"); 598 if (!start) 599 return 0; 600 601 /* skip ":" */ 602 start++; 603 604 start = strstr(start, ":"); 605 if (!start) 606 return 0; 607 608 /* skip ":" */ 609 start++; 610 611 if (strlen(start) >= sizeof_self_cg) 612 return 0; 613 614 snprintf(self_cg, sizeof_self_cg, "%s", start); 615 616 /* Swap '\n' with '\0' */ 617 start = strstr(self_cg, "\n"); 618 619 /* there must be '\n' */ 620 if (!start) 621 return 0; 622 623 /* ok, it found a string after the second : and before the \n */ 624 *start = '\0'; 625 626 return 1; 627 } 628 629 /* 630 * set_comm_cgroup - Set cgroup to pid_t pid 631 * 632 * If cgroup argument is not NULL, the threads will move to the given cgroup. 633 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 634 * 635 * Supports cgroup v2. 636 * 637 * Returns 1 on success, 0 otherwise. 638 */ 639 int set_pid_cgroup(pid_t pid, const char *cgroup) 640 { 641 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 642 char cgroup_procs[MAX_PATH]; 643 char pid_str[24]; 644 int retval; 645 int cg_fd; 646 647 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 648 if (!retval) { 649 err_msg("Did not find cgroupv2 mount point\n"); 650 return 0; 651 } 652 653 if (!cgroup) { 654 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 655 sizeof(cgroup_path) - strlen(cgroup_path)); 656 if (!retval) { 657 err_msg("Did not find self cgroup\n"); 658 return 0; 659 } 660 } else { 661 snprintf(&cgroup_path[strlen(cgroup_path)], 662 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 663 } 664 665 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 666 667 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 668 669 cg_fd = open(cgroup_procs, O_RDWR); 670 if (cg_fd < 0) 671 return 0; 672 673 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 674 675 retval = write(cg_fd, pid_str, strlen(pid_str)); 676 if (retval < 0) 677 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 678 pid_str, strerror(errno)); 679 else 680 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 681 682 close(cg_fd); 683 684 return (retval >= 0); 685 } 686 687 /** 688 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 689 * 690 * If cgroup argument is not NULL, the threads will move to the given cgroup. 691 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 692 * 693 * Supports cgroup v2. 694 * 695 * Returns 1 on success, 0 otherwise. 696 */ 697 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 698 { 699 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 700 char cgroup_procs[MAX_PATH]; 701 struct dirent *proc_entry; 702 DIR *procfs; 703 int retval; 704 int cg_fd; 705 706 if (strlen(comm_prefix) >= MAX_PATH) { 707 err_msg("Command prefix is too long: %d < strlen(%s)\n", 708 MAX_PATH, comm_prefix); 709 return 0; 710 } 711 712 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 713 if (!retval) { 714 err_msg("Did not find cgroupv2 mount point\n"); 715 return 0; 716 } 717 718 if (!cgroup) { 719 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 720 sizeof(cgroup_path) - strlen(cgroup_path)); 721 if (!retval) { 722 err_msg("Did not find self cgroup\n"); 723 return 0; 724 } 725 } else { 726 snprintf(&cgroup_path[strlen(cgroup_path)], 727 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 728 } 729 730 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 731 732 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 733 734 cg_fd = open(cgroup_procs, O_RDWR); 735 if (cg_fd < 0) 736 return 0; 737 738 procfs = opendir("/proc"); 739 if (!procfs) { 740 err_msg("Could not open procfs\n"); 741 goto out_cg; 742 } 743 744 while ((proc_entry = readdir(procfs))) { 745 746 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 747 if (!retval) 748 continue; 749 750 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 751 if (retval < 0) { 752 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 753 proc_entry->d_name, strerror(errno)); 754 goto out_procfs; 755 } 756 757 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 758 } 759 760 closedir(procfs); 761 close(cg_fd); 762 return 1; 763 764 out_procfs: 765 closedir(procfs); 766 out_cg: 767 close(cg_fd); 768 return 0; 769 } 770 771 /** 772 * auto_house_keeping - Automatically move rtla out of measurement threads 773 * 774 * Try to move rtla away from the tracer, if possible. 775 * 776 * Returns 1 on success, 0 otherwise. 777 */ 778 int auto_house_keeping(cpu_set_t *monitored_cpus) 779 { 780 cpu_set_t rtla_cpus, house_keeping_cpus; 781 int retval; 782 783 /* first get the CPUs in which rtla can actually run. */ 784 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 785 if (retval == -1) { 786 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 787 return 0; 788 } 789 790 /* then check if the existing setup is already good. */ 791 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 792 if (!CPU_COUNT(&house_keeping_cpus)) { 793 debug_msg("rtla and the monitored CPUs do not share CPUs."); 794 debug_msg("Skipping auto house-keeping\n"); 795 return 1; 796 } 797 798 /* remove the intersection */ 799 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 800 801 /* get only those that rtla can run */ 802 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 803 804 /* is there any cpu left? */ 805 if (!CPU_COUNT(&house_keeping_cpus)) { 806 debug_msg("Could not find any CPU for auto house-keeping\n"); 807 return 0; 808 } 809 810 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 811 if (retval == -1) { 812 debug_msg("Could not set affinity for auto house-keeping\n"); 813 return 0; 814 } 815 816 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 817 818 return 1; 819 } 820