1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #include <dirent.h> 8 #include <stdarg.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <ctype.h> 13 #include <errno.h> 14 #include <fcntl.h> 15 #include <sched.h> 16 #include <stdio.h> 17 18 #include "utils.h" 19 20 #define MAX_MSG_LENGTH 1024 21 int config_debug; 22 23 /* 24 * err_msg - print an error message to the stderr 25 */ 26 void err_msg(const char *fmt, ...) 27 { 28 char message[MAX_MSG_LENGTH]; 29 va_list ap; 30 31 va_start(ap, fmt); 32 vsnprintf(message, sizeof(message), fmt, ap); 33 va_end(ap); 34 35 fprintf(stderr, "%s", message); 36 } 37 38 /* 39 * debug_msg - print a debug message to stderr if debug is set 40 */ 41 void debug_msg(const char *fmt, ...) 42 { 43 char message[MAX_MSG_LENGTH]; 44 va_list ap; 45 46 if (!config_debug) 47 return; 48 49 va_start(ap, fmt); 50 vsnprintf(message, sizeof(message), fmt, ap); 51 va_end(ap); 52 53 fprintf(stderr, "%s", message); 54 } 55 56 /* 57 * get_llong_from_str - get a long long int from a string 58 */ 59 long long get_llong_from_str(char *start) 60 { 61 long long value; 62 char *end; 63 64 errno = 0; 65 value = strtoll(start, &end, 10); 66 if (errno || start == end) 67 return -1; 68 69 return value; 70 } 71 72 /* 73 * get_duration - fill output with a human readable duration since start_time 74 */ 75 void get_duration(time_t start_time, char *output, int output_size) 76 { 77 time_t now = time(NULL); 78 struct tm *tm_info; 79 time_t duration; 80 81 duration = difftime(now, start_time); 82 tm_info = gmtime(&duration); 83 84 snprintf(output, output_size, "%3d %02d:%02d:%02d", 85 tm_info->tm_yday, 86 tm_info->tm_hour, 87 tm_info->tm_min, 88 tm_info->tm_sec); 89 } 90 91 /* 92 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 93 * 94 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 95 * filling cpu_set_t argument. 96 * 97 * Returns 1 on success, 0 otherwise. 98 */ 99 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 100 { 101 const char *p; 102 int end_cpu; 103 int nr_cpus; 104 int cpu; 105 int i; 106 107 CPU_ZERO(set); 108 109 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 110 111 for (p = cpu_list; *p; ) { 112 cpu = atoi(p); 113 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 114 goto err; 115 116 while (isdigit(*p)) 117 p++; 118 if (*p == '-') { 119 p++; 120 end_cpu = atoi(p); 121 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 122 goto err; 123 while (isdigit(*p)) 124 p++; 125 } else 126 end_cpu = cpu; 127 128 if (cpu == end_cpu) { 129 debug_msg("cpu_set: adding cpu %d\n", cpu); 130 CPU_SET(cpu, set); 131 } else { 132 for (i = cpu; i <= end_cpu; i++) { 133 debug_msg("cpu_set: adding cpu %d\n", i); 134 CPU_SET(i, set); 135 } 136 } 137 138 if (*p == ',') 139 p++; 140 } 141 142 return 0; 143 err: 144 debug_msg("Error parsing the cpu set %s\n", cpu_list); 145 return 1; 146 } 147 148 /* 149 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 150 */ 151 long parse_seconds_duration(char *val) 152 { 153 char *end; 154 long t; 155 156 t = strtol(val, &end, 10); 157 158 if (end) { 159 switch (*end) { 160 case 's': 161 case 'S': 162 break; 163 case 'm': 164 case 'M': 165 t *= 60; 166 break; 167 case 'h': 168 case 'H': 169 t *= 60 * 60; 170 break; 171 172 case 'd': 173 case 'D': 174 t *= 24 * 60 * 60; 175 break; 176 } 177 } 178 179 return t; 180 } 181 182 /* 183 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 184 */ 185 long parse_ns_duration(char *val) 186 { 187 char *end; 188 long t; 189 190 t = strtol(val, &end, 10); 191 192 if (end) { 193 if (!strncmp(end, "ns", 2)) { 194 return t; 195 } else if (!strncmp(end, "us", 2)) { 196 t *= 1000; 197 return t; 198 } else if (!strncmp(end, "ms", 2)) { 199 t *= 1000 * 1000; 200 return t; 201 } else if (!strncmp(end, "s", 1)) { 202 t *= 1000 * 1000 * 1000; 203 return t; 204 } 205 return -1; 206 } 207 208 return t; 209 } 210 211 /* 212 * This is a set of helper functions to use SCHED_DEADLINE. 213 */ 214 #ifdef __x86_64__ 215 # define __NR_sched_setattr 314 216 # define __NR_sched_getattr 315 217 #elif __i386__ 218 # define __NR_sched_setattr 351 219 # define __NR_sched_getattr 352 220 #elif __arm__ 221 # define __NR_sched_setattr 380 222 # define __NR_sched_getattr 381 223 #elif __aarch64__ || __riscv 224 # define __NR_sched_setattr 274 225 # define __NR_sched_getattr 275 226 #elif __powerpc__ 227 # define __NR_sched_setattr 355 228 # define __NR_sched_getattr 356 229 #elif __s390x__ 230 # define __NR_sched_setattr 345 231 # define __NR_sched_getattr 346 232 #endif 233 234 #define SCHED_DEADLINE 6 235 236 static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, 237 unsigned int flags) { 238 return syscall(__NR_sched_setattr, pid, attr, flags); 239 } 240 241 int __set_sched_attr(int pid, struct sched_attr *attr) 242 { 243 int flags = 0; 244 int retval; 245 246 retval = sched_setattr(pid, attr, flags); 247 if (retval < 0) { 248 err_msg("Failed to set sched attributes to the pid %d: %s\n", 249 pid, strerror(errno)); 250 return 1; 251 } 252 253 return 0; 254 } 255 256 /* 257 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 258 * 259 * Check if the procfs entry is a directory of a process, and then check if the 260 * process has a comm with the prefix set in char *comm_prefix. As the 261 * current users of this function only check for kernel threads, there is no 262 * need to check for the threads for the process. 263 * 264 * Return: True if the proc_entry contains a comm file with comm_prefix*. 265 * Otherwise returns false. 266 */ 267 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 268 { 269 char buffer[MAX_PATH]; 270 int comm_fd, retval; 271 char *t_name; 272 273 if (proc_entry->d_type != DT_DIR) 274 return 0; 275 276 if (*proc_entry->d_name == '.') 277 return 0; 278 279 /* check if the string is a pid */ 280 for (t_name = proc_entry->d_name; t_name; t_name++) { 281 if (!isdigit(*t_name)) 282 break; 283 } 284 285 if (*t_name != '\0') 286 return 0; 287 288 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 289 comm_fd = open(buffer, O_RDONLY); 290 if (comm_fd < 0) 291 return 0; 292 293 memset(buffer, 0, MAX_PATH); 294 retval = read(comm_fd, buffer, MAX_PATH); 295 296 close(comm_fd); 297 298 if (retval <= 0) 299 return 0; 300 301 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 302 if (retval) 303 return 0; 304 305 /* comm already have \n */ 306 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 307 308 return 1; 309 } 310 311 /* 312 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 313 * 314 * This function uses procfs to list the currently running threads and then set the 315 * sched_attr *attr to the threads that start with char *comm_prefix. It is 316 * mainly used to set the priority to the kernel threads created by the 317 * tracers. 318 */ 319 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 320 { 321 struct dirent *proc_entry; 322 DIR *procfs; 323 int retval; 324 325 if (strlen(comm_prefix) >= MAX_PATH) { 326 err_msg("Command prefix is too long: %d < strlen(%s)\n", 327 MAX_PATH, comm_prefix); 328 return 1; 329 } 330 331 procfs = opendir("/proc"); 332 if (!procfs) { 333 err_msg("Could not open procfs\n"); 334 return 1; 335 } 336 337 while ((proc_entry = readdir(procfs))) { 338 339 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 340 if (!retval) 341 continue; 342 343 /* procfs_is_workload_pid confirmed it is a pid */ 344 retval = __set_sched_attr(atoi(proc_entry->d_name), attr); 345 if (retval) { 346 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 347 goto out_err; 348 } 349 350 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 351 } 352 return 0; 353 354 out_err: 355 closedir(procfs); 356 return 1; 357 } 358 359 #define INVALID_VAL (~0L) 360 static long get_long_ns_after_colon(char *start) 361 { 362 long val = INVALID_VAL; 363 364 /* find the ":" */ 365 start = strstr(start, ":"); 366 if (!start) 367 return -1; 368 369 /* skip ":" */ 370 start++; 371 val = parse_ns_duration(start); 372 373 return val; 374 } 375 376 static long get_long_after_colon(char *start) 377 { 378 long val = INVALID_VAL; 379 380 /* find the ":" */ 381 start = strstr(start, ":"); 382 if (!start) 383 return -1; 384 385 /* skip ":" */ 386 start++; 387 val = get_llong_from_str(start); 388 389 return val; 390 } 391 392 /* 393 * parse priority in the format: 394 * SCHED_OTHER: 395 * o:<prio> 396 * O:<prio> 397 * SCHED_RR: 398 * r:<prio> 399 * R:<prio> 400 * SCHED_FIFO: 401 * f:<prio> 402 * F:<prio> 403 * SCHED_DEADLINE: 404 * d:runtime:period 405 * D:runtime:period 406 */ 407 int parse_prio(char *arg, struct sched_attr *sched_param) 408 { 409 long prio; 410 long runtime; 411 long period; 412 413 memset(sched_param, 0, sizeof(*sched_param)); 414 sched_param->size = sizeof(*sched_param); 415 416 switch (arg[0]) { 417 case 'd': 418 case 'D': 419 /* d:runtime:period */ 420 if (strlen(arg) < 4) 421 return -1; 422 423 runtime = get_long_ns_after_colon(arg); 424 if (runtime == INVALID_VAL) 425 return -1; 426 427 period = get_long_ns_after_colon(&arg[2]); 428 if (period == INVALID_VAL) 429 return -1; 430 431 if (runtime > period) 432 return -1; 433 434 sched_param->sched_policy = SCHED_DEADLINE; 435 sched_param->sched_runtime = runtime; 436 sched_param->sched_deadline = period; 437 sched_param->sched_period = period; 438 break; 439 case 'f': 440 case 'F': 441 /* f:prio */ 442 prio = get_long_after_colon(arg); 443 if (prio == INVALID_VAL) 444 return -1; 445 446 if (prio < sched_get_priority_min(SCHED_FIFO)) 447 return -1; 448 if (prio > sched_get_priority_max(SCHED_FIFO)) 449 return -1; 450 451 sched_param->sched_policy = SCHED_FIFO; 452 sched_param->sched_priority = prio; 453 break; 454 case 'r': 455 case 'R': 456 /* r:prio */ 457 prio = get_long_after_colon(arg); 458 if (prio == INVALID_VAL) 459 return -1; 460 461 if (prio < sched_get_priority_min(SCHED_RR)) 462 return -1; 463 if (prio > sched_get_priority_max(SCHED_RR)) 464 return -1; 465 466 sched_param->sched_policy = SCHED_RR; 467 sched_param->sched_priority = prio; 468 break; 469 case 'o': 470 case 'O': 471 /* o:prio */ 472 prio = get_long_after_colon(arg); 473 if (prio == INVALID_VAL) 474 return -1; 475 476 if (prio < MIN_NICE) 477 return -1; 478 if (prio > MAX_NICE) 479 return -1; 480 481 sched_param->sched_policy = SCHED_OTHER; 482 sched_param->sched_nice = prio; 483 break; 484 default: 485 return -1; 486 } 487 return 0; 488 } 489 490 /* 491 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 492 * 493 * This is used to reduce the exit from idle latency. The value 494 * will be reset once the file descriptor of /dev/cpu_dma_latecy 495 * is closed. 496 * 497 * Return: the /dev/cpu_dma_latecy file descriptor 498 */ 499 int set_cpu_dma_latency(int32_t latency) 500 { 501 int retval; 502 int fd; 503 504 fd = open("/dev/cpu_dma_latency", O_RDWR); 505 if (fd < 0) { 506 err_msg("Error opening /dev/cpu_dma_latency\n"); 507 return -1; 508 } 509 510 retval = write(fd, &latency, 4); 511 if (retval < 1) { 512 err_msg("Error setting /dev/cpu_dma_latency\n"); 513 close(fd); 514 return -1; 515 } 516 517 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 518 519 return fd; 520 } 521 522 #define _STR(x) #x 523 #define STR(x) _STR(x) 524 525 /* 526 * find_mount - find a the mount point of a given fs 527 * 528 * Returns 0 if mount is not found, otherwise return 1 and fill mp 529 * with the mount point. 530 */ 531 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 532 { 533 char mount_point[MAX_PATH+1]; 534 char type[100]; 535 int found = 0; 536 FILE *fp; 537 538 fp = fopen("/proc/mounts", "r"); 539 if (!fp) 540 return 0; 541 542 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 543 if (strcmp(type, fs) == 0) { 544 found = 1; 545 break; 546 } 547 } 548 fclose(fp); 549 550 if (!found) 551 return 0; 552 553 memset(mp, 0, sizeof_mp); 554 strncpy(mp, mount_point, sizeof_mp - 1); 555 556 debug_msg("Fs %s found at %s\n", fs, mp); 557 return 1; 558 } 559 560 /* 561 * get_self_cgroup - get the current thread cgroup path 562 * 563 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 564 * 565 * 0::/user.slice/user-0.slice/session-3.scope'\n' 566 * 567 * This function is interested in the content after the second : and before the '\n'. 568 * 569 * Returns 1 if a string was found, 0 otherwise. 570 */ 571 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 572 { 573 char path[MAX_PATH], *start; 574 int fd, retval; 575 576 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 577 578 fd = open(path, O_RDONLY); 579 if (fd < 0) 580 return 0; 581 582 retval = read(fd, path, MAX_PATH); 583 584 close(fd); 585 586 if (retval <= 0) 587 return 0; 588 589 start = path; 590 591 start = strstr(start, ":"); 592 if (!start) 593 return 0; 594 595 /* skip ":" */ 596 start++; 597 598 start = strstr(start, ":"); 599 if (!start) 600 return 0; 601 602 /* skip ":" */ 603 start++; 604 605 if (strlen(start) >= sizeof_self_cg) 606 return 0; 607 608 snprintf(self_cg, sizeof_self_cg, "%s", start); 609 610 /* Swap '\n' with '\0' */ 611 start = strstr(self_cg, "\n"); 612 613 /* there must be '\n' */ 614 if (!start) 615 return 0; 616 617 /* ok, it found a string after the second : and before the \n */ 618 *start = '\0'; 619 620 return 1; 621 } 622 623 /* 624 * set_comm_cgroup - Set cgroup to pid_t pid 625 * 626 * If cgroup argument is not NULL, the threads will move to the given cgroup. 627 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 628 * 629 * Supports cgroup v2. 630 * 631 * Returns 1 on success, 0 otherwise. 632 */ 633 int set_pid_cgroup(pid_t pid, const char *cgroup) 634 { 635 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 636 char cgroup_procs[MAX_PATH]; 637 char pid_str[24]; 638 int retval; 639 int cg_fd; 640 641 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 642 if (!retval) { 643 err_msg("Did not find cgroupv2 mount point\n"); 644 return 0; 645 } 646 647 if (!cgroup) { 648 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 649 sizeof(cgroup_path) - strlen(cgroup_path)); 650 if (!retval) { 651 err_msg("Did not find self cgroup\n"); 652 return 0; 653 } 654 } else { 655 snprintf(&cgroup_path[strlen(cgroup_path)], 656 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 657 } 658 659 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 660 661 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 662 663 cg_fd = open(cgroup_procs, O_RDWR); 664 if (cg_fd < 0) 665 return 0; 666 667 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 668 669 retval = write(cg_fd, pid_str, strlen(pid_str)); 670 if (retval < 0) 671 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 672 pid_str, strerror(errno)); 673 else 674 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 675 676 close(cg_fd); 677 678 return (retval >= 0); 679 } 680 681 /** 682 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 683 * 684 * If cgroup argument is not NULL, the threads will move to the given cgroup. 685 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 686 * 687 * Supports cgroup v2. 688 * 689 * Returns 1 on success, 0 otherwise. 690 */ 691 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 692 { 693 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 694 char cgroup_procs[MAX_PATH]; 695 struct dirent *proc_entry; 696 DIR *procfs; 697 int retval; 698 int cg_fd; 699 700 if (strlen(comm_prefix) >= MAX_PATH) { 701 err_msg("Command prefix is too long: %d < strlen(%s)\n", 702 MAX_PATH, comm_prefix); 703 return 0; 704 } 705 706 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 707 if (!retval) { 708 err_msg("Did not find cgroupv2 mount point\n"); 709 return 0; 710 } 711 712 if (!cgroup) { 713 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 714 sizeof(cgroup_path) - strlen(cgroup_path)); 715 if (!retval) { 716 err_msg("Did not find self cgroup\n"); 717 return 0; 718 } 719 } else { 720 snprintf(&cgroup_path[strlen(cgroup_path)], 721 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 722 } 723 724 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 725 726 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 727 728 cg_fd = open(cgroup_procs, O_RDWR); 729 if (cg_fd < 0) 730 return 0; 731 732 procfs = opendir("/proc"); 733 if (!procfs) { 734 err_msg("Could not open procfs\n"); 735 goto out_cg; 736 } 737 738 while ((proc_entry = readdir(procfs))) { 739 740 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 741 if (!retval) 742 continue; 743 744 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 745 if (retval < 0) { 746 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 747 proc_entry->d_name, strerror(errno)); 748 goto out_procfs; 749 } 750 751 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 752 } 753 754 closedir(procfs); 755 close(cg_fd); 756 return 1; 757 758 out_procfs: 759 closedir(procfs); 760 out_cg: 761 close(cg_fd); 762 return 0; 763 } 764 765 /** 766 * auto_house_keeping - Automatically move rtla out of measurement threads 767 * 768 * Try to move rtla away from the tracer, if possible. 769 * 770 * Returns 1 on success, 0 otherwise. 771 */ 772 int auto_house_keeping(cpu_set_t *monitored_cpus) 773 { 774 cpu_set_t rtla_cpus, house_keeping_cpus; 775 int retval; 776 777 /* first get the CPUs in which rtla can actually run. */ 778 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 779 if (retval == -1) { 780 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 781 return 0; 782 } 783 784 /* then check if the existing setup is already good. */ 785 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 786 if (!CPU_COUNT(&house_keeping_cpus)) { 787 debug_msg("rtla and the monitored CPUs do not share CPUs."); 788 debug_msg("Skipping auto house-keeping\n"); 789 return 1; 790 } 791 792 /* remove the intersection */ 793 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 794 795 /* get only those that rtla can run */ 796 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 797 798 /* is there any cpu left? */ 799 if (!CPU_COUNT(&house_keeping_cpus)) { 800 debug_msg("Could not find any CPU for auto house-keeping\n"); 801 return 0; 802 } 803 804 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 805 if (retval == -1) { 806 debug_msg("Could not set affinity for auto house-keeping\n"); 807 return 0; 808 } 809 810 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 811 812 return 1; 813 } 814